Unverified Commit 7d343fd2 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)
parents 0e0b9246 630687f4
......@@ -104,4 +104,6 @@ venv.bak/
.mypy_cache/
# cython generated cpp
mmdet/ops/nms/*.cpp
\ No newline at end of file
mmdet/ops/nms/*.cpp
mmdet/version.py
data
### MMCV
- [ ] Implement the attr 'get' of 'Config'
- [ ] Config bugs: None type to '{}' with addict
- [ ] Default logger should be only with gpu0
- [ ] Unit Test: mmcv and mmcv.torchpack
### MMDetection
#### Basic
- [ ] Implement training function without distributed
- [ ] Verify nccl/nccl2/gloo
- [ ] Replace UGLY code: params plug in 'args' to reach a global flow
- [ ] Replace 'print' by 'logger'
#### Testing
- [ ] Implement distributed testing
- [ ] Implement single gpu testing
#### Refactor
- [ ] Re-consider params names
- [ ] Refactor functions in 'core'
- [ ] Merge single test & aug test as one function, so as other redundancy
#### New features
- [ ] Plug loss params into Config
- [ ] Multi-head communication
# model settings
model = dict(
pretrained=
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
type='FasterRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='resnet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='fb'),
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
......@@ -18,15 +18,14 @@ model = dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
coarsest_stride=32,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
roi_block=dict(
type='SingleLevelRoI',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
......@@ -40,28 +39,23 @@ model = dict(
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False))
meta_params = dict(
rpn_train_cfg = dict(
# model training and testing settings
train_cfg = dict(
rpn=dict(
pos_fraction=0.5,
pos_balance_sampling=False,
neg_pos_ub=256,
allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256,
pos_iou_thr=0.7,
neg_iou_thr=0.3,
neg_balance_thr=0,
min_pos_iou=1e-3,
min_pos_iou=0.3,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_test_cfg = dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn_train_cfg = dict(
rcnn=dict(
pos_iou_thr=0.5,
neg_iou_thr=0.5,
crowd_thr=1.1,
......@@ -71,55 +65,84 @@ meta_params = dict(
pos_balance_sampling=False,
neg_pos_ub=512,
neg_balance_thr=0,
min_pos_iou=1.1,
pos_weight=-1,
debug=False),
rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5)
)
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5))
# dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True)
img_per_gpu = 1
data_workers = 2
train_dataset = dict(
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5)
test_dataset = dict(
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32)
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_policy = dict(
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.333,
warmup_ratio=1.0 / 3,
step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
work_dir = './model/r50_fpn_frcnn_1x'
# runtime settings
total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
pretrained=
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
type='MaskRCNN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='resnet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='fb'),
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
......@@ -18,15 +18,14 @@ model = dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
coarsest_stride=32,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True),
roi_block=dict(
type='SingleLevelRoI',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
......@@ -40,8 +39,8 @@ model = dict(
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False),
mask_block=dict(
type='SingleLevelRoI',
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
......@@ -51,28 +50,23 @@ model = dict(
in_channels=256,
conv_out_channels=256,
num_classes=81))
meta_params = dict(
rpn_train_cfg=dict(
# model training and testing settings
train_cfg = dict(
rpn=dict(
pos_fraction=0.5,
pos_balance_sampling=False,
neg_pos_ub=256,
allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256,
pos_iou_thr=0.7,
neg_iou_thr=0.3,
neg_balance_thr=0,
min_pos_iou=1e-3,
min_pos_iou=0.3,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_test_cfg=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn_train_cfg=dict(
rcnn=dict(
mask_size=28,
pos_iou_thr=0.5,
neg_iou_thr=0.5,
......@@ -83,54 +77,85 @@ meta_params = dict(
pos_balance_sampling=False,
neg_pos_ub=512,
neg_balance_thr=0,
min_pos_iou=1.1,
pos_weight=-1,
debug=False),
rcnn_test_cfg=dict(
score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
# dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_per_gpu = 1
data_workers = 2
train_dataset = dict(
with_mask=True,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5)
test_dataset = dict(
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_policy = dict(
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.333,
warmup_ratio=1.0 / 3,
step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
work_dir = './model/r50_fpn_mask_rcnn_1x'
# runtime settings
total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
model = dict(
pretrained=
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth',
type='RPN',
pretrained='modelzoo://resnet50',
backbone=dict(
type='resnet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
style='fb'),
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
......@@ -18,28 +18,30 @@ model = dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
coarsest_stride=32,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True))
meta_params = dict(
rpn_train_cfg=dict(
# model training and testing settings
train_cfg = dict(
rpn=dict(
pos_fraction=0.5,
pos_balance_sampling=False,
neg_pos_ub=256,
allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256,
pos_iou_thr=0.7,
neg_iou_thr=0.3,
neg_balance_thr=0,
min_pos_iou=1e-3,
min_pos_iou=0.3,
pos_weight=-1,
smoothl1_beta=1 / 9.0,
debug=False),
rpn_test_cfg=dict(
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
......@@ -47,49 +49,70 @@ meta_params = dict(
nms_thr=0.7,
min_bbox_size=0))
# dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/'
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_per_gpu = 1
data_workers = 2
train_dataset = dict(
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5)
test_dataset = dict(
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
test_mode=True)
data = dict(
imgs_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=False,
with_crowd=False,
with_label=False),
val=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2)
# learning policy
lr_policy = dict(
# runner configs
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.333,
warmup_ratio=1.0 / 3,
step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')),
# dict(type='TensorboardLoggerHook')
])
# yapf:enable
work_dir = './model/r50_fpn_1x'
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rpn_r50_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]
from .version import __version__
from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
from .train_engine import *
from .test_engine import *
from .rpn_ops import *
from .bbox_ops import *
from .mask_ops import *
from .losses import *
from .eval import *
from .post_processing import *
from .utils import *
from .anchor import * # noqa: F401, F403
from .bbox import * # noqa: F401, F403
from .mask import * # noqa: F401, F403
from .loss import * # noqa: F401, F403
from .evaluation import * # noqa: F401, F403
from .post_processing import * # noqa: F401, F403
from .utils import * # noqa: F401, F403
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target
__all__ = ['AnchorGenerator', 'anchor_target']
......@@ -50,15 +50,18 @@ class AnchorGenerator(object):
return yy, xx
def grid_anchors(self, featmap_size, stride=16, device='cuda'):
base_anchors = self.base_anchors.to(device)
feat_h, feat_w = featmap_size
shift_x = torch.arange(0, feat_w, device=device) * stride
shift_y = torch.arange(0, feat_h, device=device) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
shifts = shifts.type_as(base_anchors)
# first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4)
base_anchors = self.base_anchors.to(device)
all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.view(-1, 4)
# first A rows correspond to A anchors of (0, 0) in feature map,
......
import torch
import numpy as np
from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling)
from ..bbox import bbox_assign, bbox2delta, bbox_sampling
from ..utils import multi_apply
def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list,
img_shapes, target_means, target_stds, cfg):
"""Compute anchor regression and classification targets
def anchor_target(anchor_list, valid_flag_list, gt_bboxes_list, img_metas,
target_means, target_stds, cfg):
"""Compute regression and classification targets for anchors.
Args:
anchor_list(list): anchors of each feature map level
featuremap_sizes(list): feature map sizes
gt_bboxes_list(list): ground truth bbox of images in a mini-batch
img_shapes(list): shape of each image in a mini-batch
cfg(dict): configs
anchor_list (list[list]): Multi level anchors of each image.
valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns:
tuple
"""
if len(featmap_sizes) == len(anchor_list):
all_anchors = torch.cat(anchor_list, 0)
anchor_nums = [anchors.size(0) for anchors in anchor_list]
use_isomerism_anchors = False
elif len(img_shapes) == len(anchor_list):
# using different anchors for different images
all_anchors_list = [
torch.cat(anchor_list[img_id], 0)
for img_id in range(len(img_shapes))
]
anchor_nums = [anchors.size(0) for anchors in anchor_list[0]]
use_isomerism_anchors = True
else:
raise ValueError('length of anchor_list should be equal to number of '
'feature lvls or number of images in a batch')
all_labels = []
all_label_weights = []
all_bbox_targets = []
all_bbox_weights = []
num_total_sampled = 0
for img_id in range(len(img_shapes)):
if isinstance(valid_flag_list[img_id], list):
valid_flags = torch.cat(valid_flag_list[img_id], 0)
else:
valid_flags = valid_flag_list[img_id]
if use_isomerism_anchors:
all_anchors = all_anchors_list[img_id]
inside_flags = anchor_inside_flags(all_anchors, valid_flags,
img_shapes[img_id][:2],
cfg.allowed_border)
if not inside_flags.any():
return None
gt_bboxes = gt_bboxes_list[img_id]
anchor_targets = anchor_target_single(all_anchors, inside_flags,
gt_bboxes, target_means,
target_stds, cfg)
(labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds) = anchor_targets
all_labels.append(labels)
all_label_weights.append(label_weights)
all_bbox_targets.append(bbox_targets)
all_bbox_weights.append(bbox_weights)
num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1)
all_labels = torch.stack(all_labels, 0)
all_label_weights = torch.stack(all_label_weights, 0)
all_bbox_targets = torch.stack(all_bbox_targets, 0)
all_bbox_weights = torch.stack(all_bbox_weights, 0)
# split into different feature levels
labels_list = []
label_weights_list = []
bbox_targets_list = []
bbox_weights_list = []
num_imgs = len(img_metas)
assert len(anchor_list) == len(valid_flag_list) == num_imgs
# anchor number of multi levels
num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
# concat all level anchors and flags to a single tensor
for i in range(num_imgs):
assert len(anchor_list[i]) == len(valid_flag_list[i])
anchor_list[i] = torch.cat(anchor_list[i])
valid_flag_list[i] = torch.cat(valid_flag_list[i])
# compute targets for each image
means_replicas = [target_means for _ in range(num_imgs)]
stds_replicas = [target_stds for _ in range(num_imgs)]
cfg_replicas = [cfg for _ in range(num_imgs)]
(all_labels, all_label_weights, all_bbox_targets,
all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(
anchor_target_single, anchor_list, valid_flag_list, gt_bboxes_list,
img_metas, means_replicas, stds_replicas, cfg_replicas)
# no valid anchors
if any([labels is None for labels in all_labels]):
return None
# sampled anchors of all images
num_total_samples = sum([
max(pos_inds.numel() + neg_inds.numel(), 1)
for pos_inds, neg_inds in zip(pos_inds_list, neg_inds_list)
])
# split targets to a list w.r.t. multiple levels
labels_list = images_to_levels(all_labels, num_level_anchors)
label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
return (labels_list, label_weights_list, bbox_targets_list,
bbox_weights_list, num_total_samples)
def images_to_levels(target, num_level_anchors):
"""Convert targets by image to targets by feature level.
[target_img0, target_img1] -> [target_level0, target_level1, ...]
"""
target = torch.stack(target, 0)
level_targets = []
start = 0
for anchor_num in anchor_nums:
end = start + anchor_num
labels_list.append(all_labels[:, start:end].squeeze(0))
label_weights_list.append(all_label_weights[:, start:end].squeeze(0))
bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0))
bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0))
for n in num_level_anchors:
end = start + n
level_targets.append(target[:, start:end].squeeze(0))
start = end
return (labels_list, label_weights_list, bbox_targets_list,
bbox_weights_list, num_total_sampled)
return level_targets
def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
target_stds, cfg):
num_total_anchors = all_anchors.size(0)
anchors = all_anchors[inside_flags, :]
def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta,
target_means, target_stds, cfg):
inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
img_meta['img_shape'][:2],
cfg.allowed_border)
if not inside_flags.any():
return (None, ) * 6
# assign gt and sample anchors
anchors = flat_anchors[inside_flags, :]
assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign(
anchors,
gt_bboxes,
......@@ -99,14 +94,13 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
bbox_targets = torch.zeros_like(anchors)
bbox_weights = torch.zeros_like(anchors)
labels = torch.zeros_like(assigned_gt_inds)
label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float)
label_weights = torch.zeros_like(assigned_gt_inds, dtype=anchors.dtype)
if len(pos_inds) > 0:
pos_inds = unique(pos_inds)
pos_anchors = anchors[pos_inds, :]
pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :]
pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox,
target_means, target_stds)
pos_bbox_targets = bbox2delta(pos_anchors, pos_gt_bbox, target_means,
target_stds)
bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0
labels[pos_inds] = 1
......@@ -115,10 +109,10 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
else:
label_weights[pos_inds] = cfg.pos_weight
if len(neg_inds) > 0:
neg_inds = unique(neg_inds)
label_weights[neg_inds] = 1.0
# map up to original set of anchors
num_total_anchors = flat_anchors.size(0)
labels = unmap(labels, num_total_anchors, inside_flags)
label_weights = unmap(label_weights, num_total_anchors, inside_flags)
bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
......@@ -127,24 +121,20 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds)
def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0):
img_h, img_w = img_shape.float()
def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0:
inside_flags = valid_flags & \
(all_anchors[:, 0] >= -allowed_border) & \
(all_anchors[:, 1] >= -allowed_border) & \
(all_anchors[:, 2] < img_w + allowed_border) & \
(all_anchors[:, 3] < img_h + allowed_border)
(flat_anchors[:, 0] >= -allowed_border) & \
(flat_anchors[:, 1] >= -allowed_border) & \
(flat_anchors[:, 2] < img_w + allowed_border) & \
(flat_anchors[:, 3] < img_h + allowed_border)
else:
inside_flags = valid_flags
return inside_flags
def unique(tensor):
if tensor.is_cuda:
u_tensor = np.unique(tensor.cpu().numpy())
return tensor.new_tensor(u_tensor)
else:
return torch.unique(tensor)
def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of
......
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
sample_bboxes)
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
'bbox2result', 'bbox_target'
]
import mmcv
import torch
from .geometry import bbox_overlaps
from .transforms import bbox_transform, bbox_transform_inv
from .transforms import bbox2delta
from ..utils import multi_apply
def bbox_target(pos_proposals_list,
......@@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list,
reg_num_classes=1,
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
return_list=False):
img_per_gpu = len(pos_proposals_list)
all_labels = []
all_label_weights = []
all_bbox_targets = []
all_bbox_weights = []
for img_id in range(img_per_gpu):
pos_proposals = pos_proposals_list[img_id]
neg_proposals = neg_proposals_list[img_id]
pos_gt_bboxes = pos_gt_bboxes_list[img_id]
pos_gt_labels = pos_gt_labels_list[img_id]
debug_img = debug_imgs[img_id] if cfg.debug else None
labels, label_weights, bbox_targets, bbox_weights = proposal_target_single(
pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
reg_num_classes, cfg, target_means, target_stds)
all_labels.append(labels)
all_label_weights.append(label_weights)
all_bbox_targets.append(bbox_targets)
all_bbox_weights.append(bbox_weights)
concat=True):
labels, label_weights, bbox_targets, bbox_weights = multi_apply(
proposal_target_single,
pos_proposals_list,
neg_proposals_list,
pos_gt_bboxes_list,
pos_gt_labels_list,
cfg=cfg,
reg_num_classes=reg_num_classes,
target_means=target_means,
target_stds=target_stds)
if return_list:
return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights
labels = torch.cat(all_labels, 0)
label_weights = torch.cat(all_label_weights, 0)
bbox_targets = torch.cat(all_bbox_targets, 0)
bbox_weights = torch.cat(all_bbox_weights, 0)
if concat:
labels = torch.cat(labels, 0)
label_weights = torch.cat(label_weights, 0)
bbox_targets = torch.cat(bbox_targets, 0)
bbox_weights = torch.cat(bbox_weights, 0)
return labels, label_weights, bbox_targets, bbox_weights
......@@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals,
neg_proposals,
pos_gt_bboxes,
pos_gt_labels,
reg_num_classes,
cfg,
reg_num_classes=1,
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]):
num_pos = pos_proposals.size(0)
......@@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals,
labels[:num_pos] = pos_gt_labels
pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
label_weights[:num_pos] = pos_weight
pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes,
target_means, target_stds)
pos_bbox_targets = bbox2delta(pos_proposals, pos_gt_bboxes,
target_means, target_stds)
bbox_targets[:num_pos, :] = pos_bbox_targets
bbox_weights[:num_pos, :] = 1
if num_neg > 0:
......
......@@ -5,6 +5,11 @@ from .geometry import bbox_overlaps
def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use numpy
to randperm the indices.
"""
assert len(gallery) >= num
if isinstance(gallery, list):
gallery = np.array(gallery)
......@@ -12,38 +17,42 @@ def random_choice(gallery, num):
np.random.shuffle(cands)
rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long()
if gallery.is_cuda:
rand_inds = rand_inds.cuda(gallery.get_device())
rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
return gallery[rand_inds]
def bbox_assign(proposals,
gt_bboxes,
gt_crowd_bboxes=None,
gt_bboxes_ignore=None,
gt_labels=None,
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=.0,
crowd_thr=-1):
"""Assign a corresponding gt bbox or background to each proposal/anchor
This function assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt.
If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground)
with crowd bboxes over crowd_thr will be ignored
"""Assign a corresponding gt bbox or background to each proposal/anchor.
Each proposals will be assigned with `-1`, `0`, or a positive integer.
- -1: don't care
- 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
If `gt_bboxes_ignore` is specified, bboxes which have iof (intersection
over foreground) with `gt_bboxes_ignore` above `crowd_thr` will be ignored.
Args:
proposals(Tensor): proposals or RPN anchors, shape (n, 4)
gt_bboxes(Tensor): shape (k, 4)
gt_crowd_bboxes(Tensor): shape(m, 4)
gt_labels(Tensor, optional): shape (k, )
pos_iou_thr(float): iou threshold for positive bboxes
neg_iou_thr(float or tuple): iou threshold for negative bboxes
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
for RPN, it is usually set as 0, for Fast R-CNN,
it is usually set as pos_iou_thr
crowd_thr: ignore proposals which have iof(intersection over foreground) with
crowd bboxes over crowd_thr
proposals (Tensor): Proposals or RPN anchors, shape (n, 4).
gt_bboxes (Tensor): Ground truth bboxes, shape (k, 4).
gt_bboxes_ignore (Tensor, optional): shape(m, 4).
gt_labels (Tensor, optional): shape (k, ).
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum iou for a bbox to be considered as a
positive bbox. For RPN, it is usually set as 0.3, for Fast R-CNN,
it is usually set as pos_iou_thr
crowd_thr (float): IoF threshold for ignoring bboxes. Negative value
for not ignoring any bboxes.
Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
"""
......@@ -54,45 +63,50 @@ def bbox_assign(proposals,
raise ValueError('No gt bbox or proposals')
# ignore proposals according to crowd bboxes
if (crowd_thr > 0) and (gt_crowd_bboxes is
not None) and (gt_crowd_bboxes.numel() > 0):
crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof')
if (crowd_thr > 0) and (gt_bboxes_ignore is
not None) and (gt_bboxes_ignore.numel() > 0):
crowd_overlaps = bbox_overlaps(proposals, gt_bboxes_ignore, mode='iof')
crowd_max_overlaps, _ = crowd_overlaps.max(dim=1)
crowd_bboxes_inds = torch.nonzero(
crowd_max_overlaps > crowd_thr).long()
if crowd_bboxes_inds.numel() > 0:
overlaps[crowd_bboxes_inds, :] = -1
return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr,
return bbox_assign_wrt_overlaps(overlaps, gt_labels, pos_iou_thr,
neg_iou_thr, min_pos_iou)
def bbox_assign_via_overlaps(overlaps,
def bbox_assign_wrt_overlaps(overlaps,
gt_labels=None,
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=.0):
"""Assign a corresponding gt bbox or background to each proposal/anchor
This function assign a gt bbox to every proposal, each proposals will be
"""Assign a corresponding gt bbox or background to each proposal/anchor.
This method assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters:
1. assign every anchor to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox
4. for each gt bbox, assign its nearest proposals(may be more than one)
to itself
Args:
overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k)
gt_labels(Tensor, optional): shape (k, )
pos_iou_thr(float): iou threshold for positive bboxes
neg_iou_thr(float or tuple): iou threshold for negative bboxes
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox,
for RPN, it is usually set as 0, for Fast R-CNN,
it is usually set as pos_iou_thr
overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
shape(n, k).
gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou (float): Minimum IoU for a bbox to be considered as a
positive bbox. This argument only affects the 4th step.
Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
max_overlaps), shape (n, )
"""
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default
......@@ -138,8 +152,9 @@ def bbox_assign_via_overlaps(overlaps,
return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors
def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg
......@@ -180,15 +195,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
return sampled_inds
def sample_negatives(assigned_gt_inds,
num_expected,
max_overlaps=None,
balance_thr=0,
hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors
negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
and easy(iou < balance_thr), around equal number of bg are sampled
from each set.
def bbox_sampling_neg(assigned_gt_inds,
num_expected,
max_overlaps=None,
balance_thr=0,
hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors.
Negative samples are split into 2 set: hard (balance_thr <= iou <
neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
"""
neg_inds = torch.nonzero(assigned_gt_inds == 0)
if neg_inds.numel() != 0:
......@@ -241,55 +257,87 @@ def bbox_sampling(assigned_gt_inds,
max_overlaps=None,
neg_balance_thr=0,
neg_hard_fraction=0.5):
"""Sample positive and negative bboxes given assigned results.
Args:
assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
num_expected (int): Expected total samples (pos and neg).
pos_fraction (float): Positive sample fraction.
neg_pos_ub (float): Negative/Positive upper bound.
pos_balance_sampling(bool): Whether to sample positive samples around
each gt bbox evenly.
max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
Used for negative balance sampling only.
neg_balance_thr (float, optional): IoU threshold for simple/hard
negative balance sampling.
neg_hard_fraction (float, optional): Fraction of hard negative samples
for negative balance sampling.
Returns:
tuple[Tensor]: positive bbox indices, negative bbox indices.
"""
num_expected_pos = int(num_expected * pos_fraction)
pos_inds = sample_positives(assigned_gt_inds, num_expected_pos,
pos_balance_sampling)
pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
pos_balance_sampling)
# We found that sampled indices have duplicated items occasionally.
# (mab be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel()
num_neg_max = int(
neg_pos_ub *
num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr,
neg_hard_fraction)
neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr,
neg_hard_fraction)
neg_inds = neg_inds.unique()
return pos_inds, neg_inds
def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
"""Sample positive and negative bboxes.
def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list,
gt_labels_list, cfg):
cfg_list = [cfg for _ in range(len(proposals_list))]
results = map(sample_proposals_single, proposals_list, gt_bboxes_list,
gt_crowds_list, gt_labels_list, cfg_list)
# list of tuple to tuple of list
return tuple(map(list, zip(*results)))
This is a simple implementation of bbox sampling given candidates and
ground truth bboxes, which includes 3 steps.
1. Assign gt to each bbox.
2. Add gt bboxes to the sampling pool (optional).
3. Perform positive and negative sampling.
def sample_proposals_single(proposals,
gt_bboxes,
gt_crowds,
gt_labels,
cfg):
proposals = proposals[:, :4]
Args:
bboxes (Tensor): Boxes to be sampled from.
gt_bboxes (Tensor): Ground truth bboxes.
gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
`crowd` bboxes are considered as ignored.
gt_labels (Tensor): Class labels of ground truth bboxes.
cfg (dict): Sampling configs.
Returns:
tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels
"""
bboxes = bboxes[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(
proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr,
cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr)
bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.crowd_thr)
if cfg.add_gt_as_proposals:
proposals = torch.cat([gt_bboxes, proposals], dim=0)
bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device)
1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_proposals = proposals[pos_inds]
neg_proposals = proposals[neg_inds]
pos_bboxes = bboxes[pos_inds]
neg_bboxes = bboxes[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_inds, neg_inds, pos_proposals, neg_proposals,
pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels)
return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels)
......@@ -3,7 +3,7 @@ import numpy as np
import torch
def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
assert proposals.size() == gt.size()
proposals = proposals.float()
......@@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
return deltas
def bbox_transform_inv(rois,
deltas,
means=[0, 0, 0, 0],
stds=[1, 1, 1, 1],
max_shape=None,
wh_ratio_clip=16 / 1000):
def delta2bbox(rois,
deltas,
means=[0, 0, 0, 0],
stds=[1, 1, 1, 1],
max_shape=None,
wh_ratio_clip=16 / 1000):
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means
......@@ -69,10 +69,14 @@ def bbox_transform_inv(rois,
def bbox_flip(bboxes, img_shape):
"""Flip bboxes horizontally
"""Flip bboxes horizontally.
Args:
bboxes(Tensor): shape (..., 4*k)
img_shape(Tensor): image shape
bboxes(Tensor or ndarray): Shape (..., 4*k)
img_shape(tuple): Image shape.
Returns:
Same type as `bboxes`: Flipped bboxes.
"""
if isinstance(bboxes, torch.Tensor):
assert bboxes.shape[-1] % 4 == 0
......@@ -84,25 +88,28 @@ def bbox_flip(bboxes, img_shape):
return mmcv.bbox_flip(bboxes, img_shape)
def bbox_mapping(bboxes, img_shape, flip):
def bbox_mapping(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from the original image scale to testing scale"""
new_bboxes = bboxes * img_shape[-1]
new_bboxes = bboxes * scale_factor
if flip:
new_bboxes = bbox_flip(new_bboxes, img_shape)
return new_bboxes
def bbox_mapping_back(bboxes, img_shape, flip):
def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from testing scale to original image scale"""
new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
new_bboxes = new_bboxes / img_shape[-1]
new_bboxes = new_bboxes / scale_factor
return new_bboxes
def bbox2roi(bbox_list):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (Tensor): a list of bboxes corresponding to a list of images
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
"""
......@@ -129,11 +136,13 @@ def roi2bbox(rois):
def bbox2result(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
num_classes (int): class number, including background class
Returns:
list(ndarray): bbox results of each class
"""
......
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps,
bbox_sampling, sample_positives, sample_negatives,
sample_proposals)
from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip,
bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox,
bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives',
'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
'bbox_target', 'sample_proposals'
]
from .class_names import (voc_classes, imagenet_det_classes,
imagenet_vid_classes, coco_classes, dataset_aliases,
get_classes)
from .coco_utils import coco_eval, fast_eval_recall, results2json
from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook,
CocoDistEvalmAPHook)
from .mean_ap import average_precision, eval_map, print_map_summary
from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
plot_iou_recall)
__all__ = [
'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision',
'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
'fast_eval_recall', 'results2json', 'DistEvalHook',
'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
'plot_num_recall', 'plot_iou_recall'
]
......@@ -95,7 +95,7 @@ def get_classes(dataset):
if mmcv.is_str(dataset):
if dataset in alias2name:
labels = eval(alias2name[dataset] + '_labels()')
labels = eval(alias2name[dataset] + '_classes()')
else:
raise ValueError('Unrecognized dataset: {}'.format(dataset))
else:
......
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from .recall import eval_recalls
def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if res_type == 'proposal_fast':
ar = fast_eval_recall(result_file, coco, max_dets)
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
img_ids = coco.getImgIds()
for res_type in result_types:
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
def fast_eval_recall(results,
coco,
max_dets,
iou_thrs=np.arange(0.5, 0.96, 0.05)):
if mmcv.is_str(results):
assert results.endswith('.pkl')
results = mmcv.load(results)
elif not isinstance(results, list):
raise TypeError(
'results must be a list of numpy arrays or a filename, not {}'.
format(type(results)))
gt_bboxes = []
img_ids = coco.getImgIds()
for i in range(len(img_ids)):
ann_ids = coco.getAnnIds(imgIds=img_ids[i])
ann_info = coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
ar = recalls.mean(axis=1)
return ar
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def results2json(dataset, results, out_file):
if isinstance(results[0], list):
json_results = det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment