"examples/vscode:/vscode.git/clone" did not exist on "37aa99c51081c1cfddd9c91dcc1ca1c79196d55a"
Unverified Commit 7d343fd2 authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #8 from hellock/dev

API cleaning and code refactoring (WIP)
parents 0e0b9246 630687f4
...@@ -104,4 +104,6 @@ venv.bak/ ...@@ -104,4 +104,6 @@ venv.bak/
.mypy_cache/ .mypy_cache/
# cython generated cpp # cython generated cpp
mmdet/ops/nms/*.cpp mmdet/ops/nms/*.cpp
\ No newline at end of file mmdet/version.py
data
### MMCV
- [ ] Implement the attr 'get' of 'Config'
- [ ] Config bugs: None type to '{}' with addict
- [ ] Default logger should be only with gpu0
- [ ] Unit Test: mmcv and mmcv.torchpack
### MMDetection
#### Basic
- [ ] Implement training function without distributed
- [ ] Verify nccl/nccl2/gloo
- [ ] Replace UGLY code: params plug in 'args' to reach a global flow
- [ ] Replace 'print' by 'logger'
#### Testing
- [ ] Implement distributed testing
- [ ] Implement single gpu testing
#### Refactor
- [ ] Re-consider params names
- [ ] Refactor functions in 'core'
- [ ] Merge single test & aug test as one function, so as other redundancy
#### New features
- [ ] Plug loss params into Config
- [ ] Multi-head communication
# model settings # model settings
model = dict( model = dict(
pretrained= type='FasterRCNN',
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', pretrained='modelzoo://resnet50',
backbone=dict( backbone=dict(
type='resnet', type='resnet',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
style='fb'), style='pytorch'),
neck=dict( neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
...@@ -18,15 +18,14 @@ model = dict( ...@@ -18,15 +18,14 @@ model = dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
coarsest_stride=32,
anchor_scales=[8], anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0], anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64], anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True), use_sigmoid_cls=True),
roi_block=dict( bbox_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
...@@ -40,28 +39,23 @@ model = dict( ...@@ -40,28 +39,23 @@ model = dict(
target_means=[0., 0., 0., 0.], target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2], target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False)) reg_class_agnostic=False))
meta_params = dict( # model training and testing settings
rpn_train_cfg = dict( train_cfg = dict(
rpn=dict(
pos_fraction=0.5, pos_fraction=0.5,
pos_balance_sampling=False, pos_balance_sampling=False,
neg_pos_ub=256, neg_pos_ub=256,
allowed_border=0, allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256, anchor_batch_size=256,
pos_iou_thr=0.7, pos_iou_thr=0.7,
neg_iou_thr=0.3, neg_iou_thr=0.3,
neg_balance_thr=0, neg_balance_thr=0,
min_pos_iou=1e-3, min_pos_iou=0.3,
pos_weight=-1, pos_weight=-1,
smoothl1_beta=1 / 9.0, smoothl1_beta=1 / 9.0,
debug=False), debug=False),
rpn_test_cfg = dict( rcnn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn_train_cfg = dict(
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
crowd_thr=1.1, crowd_thr=1.1,
...@@ -71,55 +65,84 @@ meta_params = dict( ...@@ -71,55 +65,84 @@ meta_params = dict(
pos_balance_sampling=False, pos_balance_sampling=False,
neg_pos_ub=512, neg_pos_ub=512,
neg_balance_thr=0, neg_balance_thr=0,
min_pos_iou=1.1,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False))
rcnn_test_cfg = dict(score_thr=1e-3, max_per_img=100, nms_thr=0.5) test_cfg = dict(
) rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(score_thr=0.05, max_per_img=100, nms_thr=0.5))
# dataset settings # dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
std=[58.395, 57.12, 57.375], data = dict(
to_rgb=True) imgs_per_gpu=2,
img_per_gpu = 1 workers_per_gpu=2,
data_workers = 2 train=dict(
train_dataset = dict( type=dataset_type,
ann_file=data_root + 'annotations/instances_train2017.json', ann_file=data_root + 'annotations/instances_train2017.json',
img_prefix=data_root + 'train2017/', img_prefix=data_root + 'train2017/',
img_scale=(1333, 800), img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg, img_norm_cfg=img_norm_cfg,
size_divisor=32, size_divisor=32,
flip_ratio=0.5) flip_ratio=0.5,
test_dataset = dict( with_mask=False,
ann_file=data_root + 'annotations/instances_val2017.json', with_crowd=True,
img_prefix=data_root + 'val2017/', with_label=True),
img_scale=(1333, 800), val=dict(
img_norm_cfg=img_norm_cfg, type=dataset_type,
size_divisor=32) ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy # learning policy
lr_policy = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=500, warmup_iters=500,
warmup_ratio=0.333, warmup_ratio=1.0 / 3,
step=[8, 11]) step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), # dict(type='TensorboardLoggerHook')
]) ])
# yapf:enable # yapf:enable
work_dir = './model/r50_fpn_frcnn_1x' # runtime settings
total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None load_from = None
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
# model settings # model settings
model = dict( model = dict(
pretrained= type='MaskRCNN',
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', pretrained='modelzoo://resnet50',
backbone=dict( backbone=dict(
type='resnet', type='resnet',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
style='fb'), style='pytorch'),
neck=dict( neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
...@@ -18,15 +18,14 @@ model = dict( ...@@ -18,15 +18,14 @@ model = dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
coarsest_stride=32,
anchor_scales=[8], anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0], anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64], anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True), use_sigmoid_cls=True),
roi_block=dict( bbox_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
...@@ -40,8 +39,8 @@ model = dict( ...@@ -40,8 +39,8 @@ model = dict(
target_means=[0., 0., 0., 0.], target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2], target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=False), reg_class_agnostic=False),
mask_block=dict( mask_roi_extractor=dict(
type='SingleLevelRoI', type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256, out_channels=256,
featmap_strides=[4, 8, 16, 32]), featmap_strides=[4, 8, 16, 32]),
...@@ -51,28 +50,23 @@ model = dict( ...@@ -51,28 +50,23 @@ model = dict(
in_channels=256, in_channels=256,
conv_out_channels=256, conv_out_channels=256,
num_classes=81)) num_classes=81))
meta_params = dict( # model training and testing settings
rpn_train_cfg=dict( train_cfg = dict(
rpn=dict(
pos_fraction=0.5, pos_fraction=0.5,
pos_balance_sampling=False, pos_balance_sampling=False,
neg_pos_ub=256, neg_pos_ub=256,
allowed_border=0, allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256, anchor_batch_size=256,
pos_iou_thr=0.7, pos_iou_thr=0.7,
neg_iou_thr=0.3, neg_iou_thr=0.3,
neg_balance_thr=0, neg_balance_thr=0,
min_pos_iou=1e-3, min_pos_iou=0.3,
pos_weight=-1, pos_weight=-1,
smoothl1_beta=1 / 9.0, smoothl1_beta=1 / 9.0,
debug=False), debug=False),
rpn_test_cfg=dict( rcnn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn_train_cfg=dict(
mask_size=28, mask_size=28,
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
...@@ -83,54 +77,85 @@ meta_params = dict( ...@@ -83,54 +77,85 @@ meta_params = dict(
pos_balance_sampling=False, pos_balance_sampling=False,
neg_pos_ub=512, neg_pos_ub=512,
neg_balance_thr=0, neg_balance_thr=0,
min_pos_iou=1.1,
pos_weight=-1, pos_weight=-1,
debug=False), debug=False))
rcnn_test_cfg=dict( test_cfg = dict(
score_thr=1e-3, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5)) rpn=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, max_per_img=100, nms_thr=0.5, mask_thr_binary=0.5))
# dataset settings # dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_per_gpu = 1 data = dict(
data_workers = 2 imgs_per_gpu=2,
train_dataset = dict( workers_per_gpu=2,
with_mask=True, train=dict(
ann_file=data_root + 'annotations/instances_train2017.json', type=dataset_type,
img_prefix=data_root + 'train2017/', ann_file=data_root + 'annotations/instances_train2017.json',
img_scale=(1333, 800), img_prefix=data_root + 'train2017/',
img_norm_cfg=img_norm_cfg, img_scale=(1333, 800),
size_divisor=32, img_norm_cfg=img_norm_cfg,
flip_ratio=0.5) size_divisor=32,
test_dataset = dict( flip_ratio=0.5,
ann_file=data_root + 'annotations/instances_val2017.json', with_mask=True,
img_prefix=data_root + 'val2017/', with_crowd=True,
img_scale=(1333, 800), with_label=True),
img_norm_cfg=img_norm_cfg, val=dict(
size_divisor=32) type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=True,
with_crowd=True,
with_label=True),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy # learning policy
lr_policy = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=500, warmup_iters=500,
warmup_ratio=0.333, warmup_ratio=1.0 / 3,
step=[8, 11]) step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), # dict(type='TensorboardLoggerHook')
]) ])
# yapf:enable # yapf:enable
work_dir = './model/r50_fpn_mask_rcnn_1x' # runtime settings
total_epochs = 12
device_ids = range(8)
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/mask_rcnn_r50_fpn_1x'
load_from = None load_from = None
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
# model settings # model settings
model = dict( model = dict(
pretrained= type='RPN',
'/mnt/lustre/pangjiangmiao/initmodel/pytorch/resnet50-19c8e357.pth', pretrained='modelzoo://resnet50',
backbone=dict( backbone=dict(
type='resnet', type='resnet',
depth=50, depth=50,
num_stages=4, num_stages=4,
out_indices=(0, 1, 2, 3), out_indices=(0, 1, 2, 3),
frozen_stages=1, frozen_stages=1,
style='fb'), style='pytorch'),
neck=dict( neck=dict(
type='FPN', type='FPN',
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
...@@ -18,28 +18,30 @@ model = dict( ...@@ -18,28 +18,30 @@ model = dict(
type='RPNHead', type='RPNHead',
in_channels=256, in_channels=256,
feat_channels=256, feat_channels=256,
coarsest_stride=32,
anchor_scales=[8], anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0], anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64], anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
use_sigmoid_cls=True)) use_sigmoid_cls=True))
meta_params = dict( # model training and testing settings
rpn_train_cfg=dict( train_cfg = dict(
rpn=dict(
pos_fraction=0.5, pos_fraction=0.5,
pos_balance_sampling=False, pos_balance_sampling=False,
neg_pos_ub=256, neg_pos_ub=256,
allowed_border=0, allowed_border=0,
crowd_thr=1.1,
anchor_batch_size=256, anchor_batch_size=256,
pos_iou_thr=0.7, pos_iou_thr=0.7,
neg_iou_thr=0.3, neg_iou_thr=0.3,
neg_balance_thr=0, neg_balance_thr=0,
min_pos_iou=1e-3, min_pos_iou=0.3,
pos_weight=-1, pos_weight=-1,
smoothl1_beta=1 / 9.0, smoothl1_beta=1 / 9.0,
debug=False), debug=False))
rpn_test_cfg=dict( test_cfg = dict(
rpn=dict(
nms_across_levels=False, nms_across_levels=False,
nms_pre=2000, nms_pre=2000,
nms_post=2000, nms_post=2000,
...@@ -47,49 +49,70 @@ meta_params = dict( ...@@ -47,49 +49,70 @@ meta_params = dict(
nms_thr=0.7, nms_thr=0.7,
min_bbox_size=0)) min_bbox_size=0))
# dataset settings # dataset settings
data_root = '/mnt/lustre/pangjiangmiao/dataset/coco/' dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
img_per_gpu = 1 data = dict(
data_workers = 2 imgs_per_gpu=2,
train_dataset = dict( workers_per_gpu=2,
ann_file=data_root + 'annotations/instances_train2017.json', train=dict(
img_prefix=data_root + 'train2017/', type=dataset_type,
img_scale=(1333, 800), ann_file=data_root + 'annotations/instances_train2017.json',
img_norm_cfg=img_norm_cfg, img_prefix=data_root + 'train2017/',
size_divisor=32, img_scale=(1333, 800),
flip_ratio=0.5) img_norm_cfg=img_norm_cfg,
test_dataset = dict( size_divisor=32,
ann_file=data_root + 'annotations/instances_val2017.json', flip_ratio=0.5,
img_prefix=data_root + 'val2017/', with_mask=False,
img_scale=(1333, 800), with_crowd=False,
img_norm_cfg=img_norm_cfg, with_label=False),
size_divisor=32, val=dict(
test_mode=True) type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_crowd=False,
with_label=False),
test=dict(
type=dataset_type,
ann_file=data_root + 'annotations/instances_val2017.json',
img_prefix=data_root + 'val2017/',
img_scale=(1333, 800),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
# optimizer # optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001) optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
grad_clip_config = dict(grad_clip=True, max_norm=35, norm_type=2) # runner configs
# learning policy optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_policy = dict( lr_config = dict(
policy='step', policy='step',
warmup='linear', warmup='linear',
warmup_iters=500, warmup_iters=500,
warmup_ratio=0.333, warmup_ratio=1.0 / 3,
step=[8, 11]) step=[8, 11])
max_epoch = 12
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
dist_params = dict(backend='nccl', port='29500', master_ip='127.0.0.1')
# logging settings
log_level = 'INFO'
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
dict(type='TextLoggerHook'), dict(type='TextLoggerHook'),
# ('TensorboardLoggerHook', dict(log_dir=work_dir + '/log')), # dict(type='TensorboardLoggerHook')
]) ])
# yapf:enable # yapf:enable
work_dir = './model/r50_fpn_1x' # runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/rpn_r50_fpn_1x'
load_from = None load_from = None
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
from .version import __version__ from .version import __version__, short_version
__all__ = ['__version__', 'short_version']
from .train_engine import * from .anchor import * # noqa: F401, F403
from .test_engine import * from .bbox import * # noqa: F401, F403
from .rpn_ops import * from .mask import * # noqa: F401, F403
from .bbox_ops import * from .loss import * # noqa: F401, F403
from .mask_ops import * from .evaluation import * # noqa: F401, F403
from .losses import * from .post_processing import * # noqa: F401, F403
from .eval import * from .utils import * # noqa: F401, F403
from .post_processing import *
from .utils import *
from .anchor_generator import AnchorGenerator
from .anchor_target import anchor_target
__all__ = ['AnchorGenerator', 'anchor_target']
...@@ -50,15 +50,18 @@ class AnchorGenerator(object): ...@@ -50,15 +50,18 @@ class AnchorGenerator(object):
return yy, xx return yy, xx
def grid_anchors(self, featmap_size, stride=16, device='cuda'): def grid_anchors(self, featmap_size, stride=16, device='cuda'):
base_anchors = self.base_anchors.to(device)
feat_h, feat_w = featmap_size feat_h, feat_w = featmap_size
shift_x = torch.arange(0, feat_w, device=device) * stride shift_x = torch.arange(0, feat_w, device=device) * stride
shift_y = torch.arange(0, feat_h, device=device) * stride shift_y = torch.arange(0, feat_h, device=device) * stride
shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)
shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1) shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)
shifts = shifts.type_as(base_anchors)
# first feat_w elements correspond to the first row of shifts # first feat_w elements correspond to the first row of shifts
# add A anchors (1, A, 4) to K shifts (K, 1, 4) to get # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get
# shifted anchors (K, A, 4), reshape to (K*A, 4) # shifted anchors (K, A, 4), reshape to (K*A, 4)
base_anchors = self.base_anchors.to(device)
all_anchors = base_anchors[None, :, :] + shifts[:, None, :] all_anchors = base_anchors[None, :, :] + shifts[:, None, :]
all_anchors = all_anchors.view(-1, 4) all_anchors = all_anchors.view(-1, 4)
# first A rows correspond to A anchors of (0, 0) in feature map, # first A rows correspond to A anchors of (0, 0) in feature map,
......
import torch import torch
import numpy as np
from ..bbox_ops import (bbox_assign, bbox_transform, bbox_sampling)
from ..bbox import bbox_assign, bbox2delta, bbox_sampling
from ..utils import multi_apply
def anchor_target(anchor_list, valid_flag_list, featmap_sizes, gt_bboxes_list,
img_shapes, target_means, target_stds, cfg): def anchor_target(anchor_list, valid_flag_list, gt_bboxes_list, img_metas,
"""Compute anchor regression and classification targets target_means, target_stds, cfg):
"""Compute regression and classification targets for anchors.
Args: Args:
anchor_list(list): anchors of each feature map level anchor_list (list[list]): Multi level anchors of each image.
featuremap_sizes(list): feature map sizes valid_flag_list (list[list]): Multi level valid flags of each image.
gt_bboxes_list(list): ground truth bbox of images in a mini-batch gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_shapes(list): shape of each image in a mini-batch img_metas (list[dict]): Meta info of each image.
cfg(dict): configs target_means (Iterable): Mean value of regression targets.
target_stds (Iterable): Std value of regression targets.
cfg (dict): RPN train configs.
Returns: Returns:
tuple tuple
""" """
if len(featmap_sizes) == len(anchor_list): num_imgs = len(img_metas)
all_anchors = torch.cat(anchor_list, 0) assert len(anchor_list) == len(valid_flag_list) == num_imgs
anchor_nums = [anchors.size(0) for anchors in anchor_list]
use_isomerism_anchors = False # anchor number of multi levels
elif len(img_shapes) == len(anchor_list): num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]
# using different anchors for different images # concat all level anchors and flags to a single tensor
all_anchors_list = [ for i in range(num_imgs):
torch.cat(anchor_list[img_id], 0) assert len(anchor_list[i]) == len(valid_flag_list[i])
for img_id in range(len(img_shapes)) anchor_list[i] = torch.cat(anchor_list[i])
] valid_flag_list[i] = torch.cat(valid_flag_list[i])
anchor_nums = [anchors.size(0) for anchors in anchor_list[0]]
use_isomerism_anchors = True # compute targets for each image
else: means_replicas = [target_means for _ in range(num_imgs)]
raise ValueError('length of anchor_list should be equal to number of ' stds_replicas = [target_stds for _ in range(num_imgs)]
'feature lvls or number of images in a batch') cfg_replicas = [cfg for _ in range(num_imgs)]
all_labels = [] (all_labels, all_label_weights, all_bbox_targets,
all_label_weights = [] all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(
all_bbox_targets = [] anchor_target_single, anchor_list, valid_flag_list, gt_bboxes_list,
all_bbox_weights = [] img_metas, means_replicas, stds_replicas, cfg_replicas)
num_total_sampled = 0 # no valid anchors
for img_id in range(len(img_shapes)): if any([labels is None for labels in all_labels]):
if isinstance(valid_flag_list[img_id], list): return None
valid_flags = torch.cat(valid_flag_list[img_id], 0) # sampled anchors of all images
else: num_total_samples = sum([
valid_flags = valid_flag_list[img_id] max(pos_inds.numel() + neg_inds.numel(), 1)
if use_isomerism_anchors: for pos_inds, neg_inds in zip(pos_inds_list, neg_inds_list)
all_anchors = all_anchors_list[img_id] ])
inside_flags = anchor_inside_flags(all_anchors, valid_flags, # split targets to a list w.r.t. multiple levels
img_shapes[img_id][:2], labels_list = images_to_levels(all_labels, num_level_anchors)
cfg.allowed_border) label_weights_list = images_to_levels(all_label_weights, num_level_anchors)
if not inside_flags.any(): bbox_targets_list = images_to_levels(all_bbox_targets, num_level_anchors)
return None bbox_weights_list = images_to_levels(all_bbox_weights, num_level_anchors)
gt_bboxes = gt_bboxes_list[img_id] return (labels_list, label_weights_list, bbox_targets_list,
anchor_targets = anchor_target_single(all_anchors, inside_flags, bbox_weights_list, num_total_samples)
gt_bboxes, target_means,
target_stds, cfg)
(labels, label_weights, bbox_targets, bbox_weights, pos_inds, def images_to_levels(target, num_level_anchors):
neg_inds) = anchor_targets """Convert targets by image to targets by feature level.
all_labels.append(labels)
all_label_weights.append(label_weights) [target_img0, target_img1] -> [target_level0, target_level1, ...]
all_bbox_targets.append(bbox_targets) """
all_bbox_weights.append(bbox_weights) target = torch.stack(target, 0)
num_total_sampled += max(pos_inds.numel() + neg_inds.numel(), 1) level_targets = []
all_labels = torch.stack(all_labels, 0)
all_label_weights = torch.stack(all_label_weights, 0)
all_bbox_targets = torch.stack(all_bbox_targets, 0)
all_bbox_weights = torch.stack(all_bbox_weights, 0)
# split into different feature levels
labels_list = []
label_weights_list = []
bbox_targets_list = []
bbox_weights_list = []
start = 0 start = 0
for anchor_num in anchor_nums: for n in num_level_anchors:
end = start + anchor_num end = start + n
labels_list.append(all_labels[:, start:end].squeeze(0)) level_targets.append(target[:, start:end].squeeze(0))
label_weights_list.append(all_label_weights[:, start:end].squeeze(0))
bbox_targets_list.append(all_bbox_targets[:, start:end].squeeze(0))
bbox_weights_list.append(all_bbox_weights[:, start:end].squeeze(0))
start = end start = end
return (labels_list, label_weights_list, bbox_targets_list, return level_targets
bbox_weights_list, num_total_sampled)
def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, def anchor_target_single(flat_anchors, valid_flags, gt_bboxes, img_meta,
target_stds, cfg): target_means, target_stds, cfg):
num_total_anchors = all_anchors.size(0) inside_flags = anchor_inside_flags(flat_anchors, valid_flags,
anchors = all_anchors[inside_flags, :] img_meta['img_shape'][:2],
cfg.allowed_border)
if not inside_flags.any():
return (None, ) * 6
# assign gt and sample anchors
anchors = flat_anchors[inside_flags, :]
assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign( assigned_gt_inds, argmax_overlaps, max_overlaps = bbox_assign(
anchors, anchors,
gt_bboxes, gt_bboxes,
...@@ -99,14 +94,13 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, ...@@ -99,14 +94,13 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
bbox_targets = torch.zeros_like(anchors) bbox_targets = torch.zeros_like(anchors)
bbox_weights = torch.zeros_like(anchors) bbox_weights = torch.zeros_like(anchors)
labels = torch.zeros_like(assigned_gt_inds) labels = torch.zeros_like(assigned_gt_inds)
label_weights = torch.zeros_like(assigned_gt_inds, dtype=torch.float) label_weights = torch.zeros_like(assigned_gt_inds, dtype=anchors.dtype)
if len(pos_inds) > 0: if len(pos_inds) > 0:
pos_inds = unique(pos_inds)
pos_anchors = anchors[pos_inds, :] pos_anchors = anchors[pos_inds, :]
pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :] pos_gt_bbox = gt_bboxes[assigned_gt_inds[pos_inds] - 1, :]
pos_bbox_targets = bbox_transform(pos_anchors, pos_gt_bbox, pos_bbox_targets = bbox2delta(pos_anchors, pos_gt_bbox, target_means,
target_means, target_stds) target_stds)
bbox_targets[pos_inds, :] = pos_bbox_targets bbox_targets[pos_inds, :] = pos_bbox_targets
bbox_weights[pos_inds, :] = 1.0 bbox_weights[pos_inds, :] = 1.0
labels[pos_inds] = 1 labels[pos_inds] = 1
...@@ -115,10 +109,10 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, ...@@ -115,10 +109,10 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
else: else:
label_weights[pos_inds] = cfg.pos_weight label_weights[pos_inds] = cfg.pos_weight
if len(neg_inds) > 0: if len(neg_inds) > 0:
neg_inds = unique(neg_inds)
label_weights[neg_inds] = 1.0 label_weights[neg_inds] = 1.0
# map up to original set of anchors # map up to original set of anchors
num_total_anchors = flat_anchors.size(0)
labels = unmap(labels, num_total_anchors, inside_flags) labels = unmap(labels, num_total_anchors, inside_flags)
label_weights = unmap(label_weights, num_total_anchors, inside_flags) label_weights = unmap(label_weights, num_total_anchors, inside_flags)
bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags) bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)
...@@ -127,24 +121,20 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means, ...@@ -127,24 +121,20 @@ def anchor_target_single(all_anchors, inside_flags, gt_bboxes, target_means,
return (labels, label_weights, bbox_targets, bbox_weights, pos_inds, return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,
neg_inds) neg_inds)
def anchor_inside_flags(all_anchors, valid_flags, img_shape, allowed_border=0):
img_h, img_w = img_shape.float() def anchor_inside_flags(flat_anchors, valid_flags, img_shape,
allowed_border=0):
img_h, img_w = img_shape[:2]
if allowed_border >= 0: if allowed_border >= 0:
inside_flags = valid_flags & \ inside_flags = valid_flags & \
(all_anchors[:, 0] >= -allowed_border) & \ (flat_anchors[:, 0] >= -allowed_border) & \
(all_anchors[:, 1] >= -allowed_border) & \ (flat_anchors[:, 1] >= -allowed_border) & \
(all_anchors[:, 2] < img_w + allowed_border) & \ (flat_anchors[:, 2] < img_w + allowed_border) & \
(all_anchors[:, 3] < img_h + allowed_border) (flat_anchors[:, 3] < img_h + allowed_border)
else: else:
inside_flags = valid_flags inside_flags = valid_flags
return inside_flags return inside_flags
def unique(tensor):
if tensor.is_cuda:
u_tensor = np.unique(tensor.cpu().numpy())
return tensor.new_tensor(u_tensor)
else:
return torch.unique(tensor)
def unmap(data, count, inds, fill=0): def unmap(data, count, inds, fill=0):
""" Unmap a subset of item (data) back to the original set of items (of """ Unmap a subset of item (data) back to the original set of items (of
......
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_wrt_overlaps,
bbox_sampling, bbox_sampling_pos, bbox_sampling_neg,
sample_bboxes)
from .transforms import (bbox2delta, delta2bbox, bbox_flip, bbox_mapping,
bbox_mapping_back, bbox2roi, roi2bbox, bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_wrt_overlaps', 'bbox_sampling', 'bbox_sampling_pos',
'bbox_sampling_neg', 'sample_bboxes', 'bbox2delta', 'delta2bbox',
'bbox_flip', 'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox',
'bbox2result', 'bbox_target'
]
import mmcv
import torch import torch
from .geometry import bbox_overlaps from .transforms import bbox2delta
from .transforms import bbox_transform, bbox_transform_inv from ..utils import multi_apply
def bbox_target(pos_proposals_list, def bbox_target(pos_proposals_list,
...@@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list, ...@@ -13,33 +12,23 @@ def bbox_target(pos_proposals_list,
reg_num_classes=1, reg_num_classes=1,
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0], target_stds=[1.0, 1.0, 1.0, 1.0],
return_list=False): concat=True):
img_per_gpu = len(pos_proposals_list) labels, label_weights, bbox_targets, bbox_weights = multi_apply(
all_labels = [] proposal_target_single,
all_label_weights = [] pos_proposals_list,
all_bbox_targets = [] neg_proposals_list,
all_bbox_weights = [] pos_gt_bboxes_list,
for img_id in range(img_per_gpu): pos_gt_labels_list,
pos_proposals = pos_proposals_list[img_id] cfg=cfg,
neg_proposals = neg_proposals_list[img_id] reg_num_classes=reg_num_classes,
pos_gt_bboxes = pos_gt_bboxes_list[img_id] target_means=target_means,
pos_gt_labels = pos_gt_labels_list[img_id] target_stds=target_stds)
debug_img = debug_imgs[img_id] if cfg.debug else None
labels, label_weights, bbox_targets, bbox_weights = proposal_target_single(
pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
reg_num_classes, cfg, target_means, target_stds)
all_labels.append(labels)
all_label_weights.append(label_weights)
all_bbox_targets.append(bbox_targets)
all_bbox_weights.append(bbox_weights)
if return_list: if concat:
return all_labels, all_label_weights, all_bbox_targets, all_bbox_weights labels = torch.cat(labels, 0)
label_weights = torch.cat(label_weights, 0)
labels = torch.cat(all_labels, 0) bbox_targets = torch.cat(bbox_targets, 0)
label_weights = torch.cat(all_label_weights, 0) bbox_weights = torch.cat(bbox_weights, 0)
bbox_targets = torch.cat(all_bbox_targets, 0)
bbox_weights = torch.cat(all_bbox_weights, 0)
return labels, label_weights, bbox_targets, bbox_weights return labels, label_weights, bbox_targets, bbox_weights
...@@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals, ...@@ -47,8 +36,8 @@ def proposal_target_single(pos_proposals,
neg_proposals, neg_proposals,
pos_gt_bboxes, pos_gt_bboxes,
pos_gt_labels, pos_gt_labels,
reg_num_classes,
cfg, cfg,
reg_num_classes=1,
target_means=[.0, .0, .0, .0], target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]): target_stds=[1.0, 1.0, 1.0, 1.0]):
num_pos = pos_proposals.size(0) num_pos = pos_proposals.size(0)
...@@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals, ...@@ -62,8 +51,8 @@ def proposal_target_single(pos_proposals,
labels[:num_pos] = pos_gt_labels labels[:num_pos] = pos_gt_labels
pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight
label_weights[:num_pos] = pos_weight label_weights[:num_pos] = pos_weight
pos_bbox_targets = bbox_transform(pos_proposals, pos_gt_bboxes, pos_bbox_targets = bbox2delta(pos_proposals, pos_gt_bboxes,
target_means, target_stds) target_means, target_stds)
bbox_targets[:num_pos, :] = pos_bbox_targets bbox_targets[:num_pos, :] = pos_bbox_targets
bbox_weights[:num_pos, :] = 1 bbox_weights[:num_pos, :] = 1
if num_neg > 0: if num_neg > 0:
......
...@@ -5,6 +5,11 @@ from .geometry import bbox_overlaps ...@@ -5,6 +5,11 @@ from .geometry import bbox_overlaps
def random_choice(gallery, num): def random_choice(gallery, num):
"""Random select some elements from the gallery.
It seems that Pytorch's implementation is slower than numpy so we use numpy
to randperm the indices.
"""
assert len(gallery) >= num assert len(gallery) >= num
if isinstance(gallery, list): if isinstance(gallery, list):
gallery = np.array(gallery) gallery = np.array(gallery)
...@@ -12,38 +17,42 @@ def random_choice(gallery, num): ...@@ -12,38 +17,42 @@ def random_choice(gallery, num):
np.random.shuffle(cands) np.random.shuffle(cands)
rand_inds = cands[:num] rand_inds = cands[:num]
if not isinstance(gallery, np.ndarray): if not isinstance(gallery, np.ndarray):
rand_inds = torch.from_numpy(rand_inds).long() rand_inds = torch.from_numpy(rand_inds).long().to(gallery.device)
if gallery.is_cuda:
rand_inds = rand_inds.cuda(gallery.get_device())
return gallery[rand_inds] return gallery[rand_inds]
def bbox_assign(proposals, def bbox_assign(proposals,
gt_bboxes, gt_bboxes,
gt_crowd_bboxes=None, gt_bboxes_ignore=None,
gt_labels=None, gt_labels=None,
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=.0, min_pos_iou=.0,
crowd_thr=-1): crowd_thr=-1):
"""Assign a corresponding gt bbox or background to each proposal/anchor """Assign a corresponding gt bbox or background to each proposal/anchor.
This function assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means Each proposals will be assigned with `-1`, `0`, or a positive integer.
negative sample, positive number is the index (1-based) of assigned gt.
If gt_crowd_bboxes is not None, proposals which have iof(intersection over foreground) - -1: don't care
with crowd bboxes over crowd_thr will be ignored - 0: negative sample, no assigned gt
- positive integer: positive sample, index (1-based) of assigned gt
If `gt_bboxes_ignore` is specified, bboxes which have iof (intersection
over foreground) with `gt_bboxes_ignore` above `crowd_thr` will be ignored.
Args: Args:
proposals(Tensor): proposals or RPN anchors, shape (n, 4) proposals (Tensor): Proposals or RPN anchors, shape (n, 4).
gt_bboxes(Tensor): shape (k, 4) gt_bboxes (Tensor): Ground truth bboxes, shape (k, 4).
gt_crowd_bboxes(Tensor): shape(m, 4) gt_bboxes_ignore (Tensor, optional): shape(m, 4).
gt_labels(Tensor, optional): shape (k, ) gt_labels (Tensor, optional): shape (k, ).
pos_iou_thr(float): iou threshold for positive bboxes pos_iou_thr (float): IoU threshold for positive bboxes.
neg_iou_thr(float or tuple): iou threshold for negative bboxes neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, min_pos_iou (float): Minimum iou for a bbox to be considered as a
for RPN, it is usually set as 0, for Fast R-CNN, positive bbox. For RPN, it is usually set as 0.3, for Fast R-CNN,
it is usually set as pos_iou_thr it is usually set as pos_iou_thr
crowd_thr: ignore proposals which have iof(intersection over foreground) with crowd_thr (float): IoF threshold for ignoring bboxes. Negative value
crowd bboxes over crowd_thr for not ignoring any bboxes.
Returns: Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, )
""" """
...@@ -54,45 +63,50 @@ def bbox_assign(proposals, ...@@ -54,45 +63,50 @@ def bbox_assign(proposals,
raise ValueError('No gt bbox or proposals') raise ValueError('No gt bbox or proposals')
# ignore proposals according to crowd bboxes # ignore proposals according to crowd bboxes
if (crowd_thr > 0) and (gt_crowd_bboxes is if (crowd_thr > 0) and (gt_bboxes_ignore is
not None) and (gt_crowd_bboxes.numel() > 0): not None) and (gt_bboxes_ignore.numel() > 0):
crowd_overlaps = bbox_overlaps(proposals, gt_crowd_bboxes, mode='iof') crowd_overlaps = bbox_overlaps(proposals, gt_bboxes_ignore, mode='iof')
crowd_max_overlaps, _ = crowd_overlaps.max(dim=1) crowd_max_overlaps, _ = crowd_overlaps.max(dim=1)
crowd_bboxes_inds = torch.nonzero( crowd_bboxes_inds = torch.nonzero(
crowd_max_overlaps > crowd_thr).long() crowd_max_overlaps > crowd_thr).long()
if crowd_bboxes_inds.numel() > 0: if crowd_bboxes_inds.numel() > 0:
overlaps[crowd_bboxes_inds, :] = -1 overlaps[crowd_bboxes_inds, :] = -1
return bbox_assign_via_overlaps(overlaps, gt_labels, pos_iou_thr, return bbox_assign_wrt_overlaps(overlaps, gt_labels, pos_iou_thr,
neg_iou_thr, min_pos_iou) neg_iou_thr, min_pos_iou)
def bbox_assign_via_overlaps(overlaps, def bbox_assign_wrt_overlaps(overlaps,
gt_labels=None, gt_labels=None,
pos_iou_thr=0.5, pos_iou_thr=0.5,
neg_iou_thr=0.5, neg_iou_thr=0.5,
min_pos_iou=.0): min_pos_iou=.0):
"""Assign a corresponding gt bbox or background to each proposal/anchor """Assign a corresponding gt bbox or background to each proposal/anchor.
This function assign a gt bbox to every proposal, each proposals will be
This method assign a gt bbox to every proposal, each proposals will be
assigned with -1, 0, or a positive number. -1 means don't care, 0 means assigned with -1, 0, or a positive number. -1 means don't care, 0 means
negative sample, positive number is the index (1-based) of assigned gt. negative sample, positive number is the index (1-based) of assigned gt.
The assignment is done in following steps, the order matters: The assignment is done in following steps, the order matters:
1. assign every anchor to -1 1. assign every anchor to -1
2. assign proposals whose iou with all gts < neg_iou_thr to 0 2. assign proposals whose iou with all gts < neg_iou_thr to 0
3. for each anchor, if the iou with its nearest gt >= pos_iou_thr, 3. for each anchor, if the iou with its nearest gt >= pos_iou_thr,
assign it to that bbox assign it to that bbox
4. for each gt bbox, assign its nearest proposals(may be more than one) 4. for each gt bbox, assign its nearest proposals(may be more than one)
to itself to itself
Args: Args:
overlaps(Tensor): overlaps between n proposals and k gt_bboxes, shape(n, k) overlaps (Tensor): Overlaps between n proposals and k gt_bboxes,
gt_labels(Tensor, optional): shape (k, ) shape(n, k).
pos_iou_thr(float): iou threshold for positive bboxes gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).
neg_iou_thr(float or tuple): iou threshold for negative bboxes pos_iou_thr (float): IoU threshold for positive bboxes.
min_pos_iou(float): minimum iou for a bbox to be considered as a positive bbox, neg_iou_thr (float or tuple): IoU threshold for negative bboxes.
for RPN, it is usually set as 0, for Fast R-CNN, min_pos_iou (float): Minimum IoU for a bbox to be considered as a
it is usually set as pos_iou_thr positive bbox. This argument only affects the 4th step.
Returns: Returns:
tuple: (assigned_gt_inds, argmax_overlaps, max_overlaps), shape (n, ) tuple: (assigned_gt_inds, [assigned_labels], argmax_overlaps,
max_overlaps), shape (n, )
""" """
num_bboxes, num_gts = overlaps.size(0), overlaps.size(1) num_bboxes, num_gts = overlaps.size(0), overlaps.size(1)
# 1. assign -1 by default # 1. assign -1 by default
...@@ -138,8 +152,9 @@ def bbox_assign_via_overlaps(overlaps, ...@@ -138,8 +152,9 @@ def bbox_assign_via_overlaps(overlaps,
return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps return assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps
def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): def bbox_sampling_pos(assigned_gt_inds, num_expected, balance_sampling=True):
"""Balance sampling for positive bboxes/anchors """Balance sampling for positive bboxes/anchors.
1. calculate average positive num for each gt: num_per_gt 1. calculate average positive num for each gt: num_per_gt
2. sample at most num_per_gt positives for each gt 2. sample at most num_per_gt positives for each gt
3. random sampling from rest anchors if not enough fg 3. random sampling from rest anchors if not enough fg
...@@ -180,15 +195,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True): ...@@ -180,15 +195,16 @@ def sample_positives(assigned_gt_inds, num_expected, balance_sampling=True):
return sampled_inds return sampled_inds
def sample_negatives(assigned_gt_inds, def bbox_sampling_neg(assigned_gt_inds,
num_expected, num_expected,
max_overlaps=None, max_overlaps=None,
balance_thr=0, balance_thr=0,
hard_fraction=0.5): hard_fraction=0.5):
"""Balance sampling for negative bboxes/anchors """Balance sampling for negative bboxes/anchors.
negative samples are split into 2 set: hard(balance_thr <= iou < neg_iou_thr)
and easy(iou < balance_thr), around equal number of bg are sampled Negative samples are split into 2 set: hard (balance_thr <= iou <
from each set. neg_iou_thr) and easy(iou < balance_thr). The sampling ratio is controlled
by `hard_fraction`.
""" """
neg_inds = torch.nonzero(assigned_gt_inds == 0) neg_inds = torch.nonzero(assigned_gt_inds == 0)
if neg_inds.numel() != 0: if neg_inds.numel() != 0:
...@@ -241,55 +257,87 @@ def bbox_sampling(assigned_gt_inds, ...@@ -241,55 +257,87 @@ def bbox_sampling(assigned_gt_inds,
max_overlaps=None, max_overlaps=None,
neg_balance_thr=0, neg_balance_thr=0,
neg_hard_fraction=0.5): neg_hard_fraction=0.5):
"""Sample positive and negative bboxes given assigned results.
Args:
assigned_gt_inds (Tensor): Assigned gt indices for each bbox.
num_expected (int): Expected total samples (pos and neg).
pos_fraction (float): Positive sample fraction.
neg_pos_ub (float): Negative/Positive upper bound.
pos_balance_sampling(bool): Whether to sample positive samples around
each gt bbox evenly.
max_overlaps (Tensor, optional): For each bbox, the max IoU of all gts.
Used for negative balance sampling only.
neg_balance_thr (float, optional): IoU threshold for simple/hard
negative balance sampling.
neg_hard_fraction (float, optional): Fraction of hard negative samples
for negative balance sampling.
Returns:
tuple[Tensor]: positive bbox indices, negative bbox indices.
"""
num_expected_pos = int(num_expected * pos_fraction) num_expected_pos = int(num_expected * pos_fraction)
pos_inds = sample_positives(assigned_gt_inds, num_expected_pos, pos_inds = bbox_sampling_pos(assigned_gt_inds, num_expected_pos,
pos_balance_sampling) pos_balance_sampling)
# We found that sampled indices have duplicated items occasionally.
# (mab be a bug of PyTorch)
pos_inds = pos_inds.unique()
num_sampled_pos = pos_inds.numel() num_sampled_pos = pos_inds.numel()
num_neg_max = int( num_neg_max = int(
neg_pos_ub * neg_pos_ub *
num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub) num_sampled_pos) if num_sampled_pos > 0 else int(neg_pos_ub)
num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos) num_expected_neg = min(num_neg_max, num_expected - num_sampled_pos)
neg_inds = sample_negatives(assigned_gt_inds, num_expected_neg, neg_inds = bbox_sampling_neg(assigned_gt_inds, num_expected_neg,
max_overlaps, neg_balance_thr, max_overlaps, neg_balance_thr,
neg_hard_fraction) neg_hard_fraction)
neg_inds = neg_inds.unique()
return pos_inds, neg_inds return pos_inds, neg_inds
def sample_bboxes(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels, cfg):
"""Sample positive and negative bboxes.
def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list, This is a simple implementation of bbox sampling given candidates and
gt_labels_list, cfg): ground truth bboxes, which includes 3 steps.
cfg_list = [cfg for _ in range(len(proposals_list))]
results = map(sample_proposals_single, proposals_list, gt_bboxes_list,
gt_crowds_list, gt_labels_list, cfg_list)
# list of tuple to tuple of list
return tuple(map(list, zip(*results)))
1. Assign gt to each bbox.
2. Add gt bboxes to the sampling pool (optional).
3. Perform positive and negative sampling.
def sample_proposals_single(proposals, Args:
gt_bboxes, bboxes (Tensor): Boxes to be sampled from.
gt_crowds, gt_bboxes (Tensor): Ground truth bboxes.
gt_labels, gt_bboxes_ignore (Tensor): Ignored ground truth bboxes. In MS COCO,
cfg): `crowd` bboxes are considered as ignored.
proposals = proposals[:, :4] gt_labels (Tensor): Class labels of ground truth bboxes.
cfg (dict): Sampling configs.
Returns:
tuple[Tensor]: pos_bboxes, neg_bboxes, pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels
"""
bboxes = bboxes[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \ assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign( bbox_assign(bboxes, gt_bboxes, gt_bboxes_ignore, gt_labels,
proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr, cfg.pos_iou_thr, cfg.neg_iou_thr, cfg.min_pos_iou,
cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr) cfg.crowd_thr)
if cfg.add_gt_as_proposals: if cfg.add_gt_as_proposals:
proposals = torch.cat([gt_bboxes, proposals], dim=0) bboxes = torch.cat([gt_bboxes, bboxes], dim=0)
gt_assign_self = torch.arange( gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device) 1, len(gt_labels) + 1, dtype=torch.long, device=bboxes.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds]) assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels]) assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling( pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub, assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr) cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_proposals = proposals[pos_inds]
neg_proposals = proposals[neg_inds] pos_bboxes = bboxes[pos_inds]
neg_bboxes = bboxes[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1 pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds] pos_gt_labels = assigned_labels[pos_inds]
return (pos_inds, neg_inds, pos_proposals, neg_proposals, return (pos_bboxes, neg_bboxes, pos_assigned_gt_inds, pos_gt_bboxes,
pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels) pos_gt_labels)
...@@ -3,7 +3,7 @@ import numpy as np ...@@ -3,7 +3,7 @@ import numpy as np
import torch import torch
def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
assert proposals.size() == gt.size() assert proposals.size() == gt.size()
proposals = proposals.float() proposals = proposals.float()
...@@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): ...@@ -31,12 +31,12 @@ def bbox_transform(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]):
return deltas return deltas
def bbox_transform_inv(rois, def delta2bbox(rois,
deltas, deltas,
means=[0, 0, 0, 0], means=[0, 0, 0, 0],
stds=[1, 1, 1, 1], stds=[1, 1, 1, 1],
max_shape=None, max_shape=None,
wh_ratio_clip=16 / 1000): wh_ratio_clip=16 / 1000):
means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
denorm_deltas = deltas * stds + means denorm_deltas = deltas * stds + means
...@@ -69,10 +69,14 @@ def bbox_transform_inv(rois, ...@@ -69,10 +69,14 @@ def bbox_transform_inv(rois,
def bbox_flip(bboxes, img_shape): def bbox_flip(bboxes, img_shape):
"""Flip bboxes horizontally """Flip bboxes horizontally.
Args: Args:
bboxes(Tensor): shape (..., 4*k) bboxes(Tensor or ndarray): Shape (..., 4*k)
img_shape(Tensor): image shape img_shape(tuple): Image shape.
Returns:
Same type as `bboxes`: Flipped bboxes.
""" """
if isinstance(bboxes, torch.Tensor): if isinstance(bboxes, torch.Tensor):
assert bboxes.shape[-1] % 4 == 0 assert bboxes.shape[-1] % 4 == 0
...@@ -84,25 +88,28 @@ def bbox_flip(bboxes, img_shape): ...@@ -84,25 +88,28 @@ def bbox_flip(bboxes, img_shape):
return mmcv.bbox_flip(bboxes, img_shape) return mmcv.bbox_flip(bboxes, img_shape)
def bbox_mapping(bboxes, img_shape, flip): def bbox_mapping(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from the original image scale to testing scale""" """Map bboxes from the original image scale to testing scale"""
new_bboxes = bboxes * img_shape[-1] new_bboxes = bboxes * scale_factor
if flip: if flip:
new_bboxes = bbox_flip(new_bboxes, img_shape) new_bboxes = bbox_flip(new_bboxes, img_shape)
return new_bboxes return new_bboxes
def bbox_mapping_back(bboxes, img_shape, flip): def bbox_mapping_back(bboxes, img_shape, scale_factor, flip):
"""Map bboxes from testing scale to original image scale""" """Map bboxes from testing scale to original image scale"""
new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes
new_bboxes = new_bboxes / img_shape[-1] new_bboxes = new_bboxes / scale_factor
return new_bboxes return new_bboxes
def bbox2roi(bbox_list): def bbox2roi(bbox_list):
"""Convert a list of bboxes to roi format. """Convert a list of bboxes to roi format.
Args: Args:
bbox_list (Tensor): a list of bboxes corresponding to a list of images bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns: Returns:
Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]
""" """
...@@ -129,11 +136,13 @@ def roi2bbox(rois): ...@@ -129,11 +136,13 @@ def roi2bbox(rois):
def bbox2result(bboxes, labels, num_classes): def bbox2result(bboxes, labels, num_classes):
"""Convert detection results to a list of numpy arrays """Convert detection results to a list of numpy arrays.
Args: Args:
bboxes (Tensor): shape (n, 5) bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, ) labels (Tensor): shape (n, )
num_classes (int): class number, including background class num_classes (int): class number, including background class
Returns: Returns:
list(ndarray): bbox results of each class list(ndarray): bbox results of each class
""" """
......
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps,
bbox_sampling, sample_positives, sample_negatives,
sample_proposals)
from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip,
bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox,
bbox2result)
from .bbox_target import bbox_target
__all__ = [
'bbox_overlaps', 'random_choice', 'bbox_assign',
'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives',
'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
'bbox_target', 'sample_proposals'
]
from .class_names import (voc_classes, imagenet_det_classes, from .class_names import (voc_classes, imagenet_det_classes,
imagenet_vid_classes, coco_classes, dataset_aliases, imagenet_vid_classes, coco_classes, dataset_aliases,
get_classes) get_classes)
from .coco_utils import coco_eval, fast_eval_recall, results2json
from .eval_hooks import (DistEvalHook, CocoDistEvalRecallHook,
CocoDistEvalmAPHook)
from .mean_ap import average_precision, eval_map, print_map_summary from .mean_ap import average_precision, eval_map, print_map_summary
from .recall import (eval_recalls, print_recall_summary, plot_num_recall, from .recall import (eval_recalls, print_recall_summary, plot_num_recall,
plot_iou_recall) plot_iou_recall)
__all__ = [ __all__ = [
'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes', 'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',
'coco_classes', 'dataset_aliases', 'get_classes', 'average_precision', 'coco_classes', 'dataset_aliases', 'get_classes', 'coco_eval',
'fast_eval_recall', 'results2json', 'DistEvalHook',
'CocoDistEvalRecallHook', 'CocoDistEvalmAPHook', 'average_precision',
'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary', 'eval_map', 'print_map_summary', 'eval_recalls', 'print_recall_summary',
'plot_num_recall', 'plot_iou_recall' 'plot_num_recall', 'plot_iou_recall'
] ]
...@@ -95,7 +95,7 @@ def get_classes(dataset): ...@@ -95,7 +95,7 @@ def get_classes(dataset):
if mmcv.is_str(dataset): if mmcv.is_str(dataset):
if dataset in alias2name: if dataset in alias2name:
labels = eval(alias2name[dataset] + '_labels()') labels = eval(alias2name[dataset] + '_classes()')
else: else:
raise ValueError('Unrecognized dataset: {}'.format(dataset)) raise ValueError('Unrecognized dataset: {}'.format(dataset))
else: else:
......
import mmcv
import numpy as np
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from .recall import eval_recalls
def coco_eval(result_file, result_types, coco, max_dets=(100, 300, 1000)):
for res_type in result_types:
assert res_type in [
'proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'
]
if mmcv.is_str(coco):
coco = COCO(coco)
assert isinstance(coco, COCO)
if res_type == 'proposal_fast':
ar = fast_eval_recall(result_file, coco, max_dets)
for i, num in enumerate(max_dets):
print('AR@{}\t= {:.4f}'.format(num, ar[i]))
return
assert result_file.endswith('.json')
coco_dets = coco.loadRes(result_file)
img_ids = coco.getImgIds()
for res_type in result_types:
iou_type = 'bbox' if res_type == 'proposal' else res_type
cocoEval = COCOeval(coco, coco_dets, iou_type)
cocoEval.params.imgIds = img_ids
if res_type == 'proposal':
cocoEval.params.useCats = 0
cocoEval.params.maxDets = list(max_dets)
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()
def fast_eval_recall(results,
coco,
max_dets,
iou_thrs=np.arange(0.5, 0.96, 0.05)):
if mmcv.is_str(results):
assert results.endswith('.pkl')
results = mmcv.load(results)
elif not isinstance(results, list):
raise TypeError(
'results must be a list of numpy arrays or a filename, not {}'.
format(type(results)))
gt_bboxes = []
img_ids = coco.getImgIds()
for i in range(len(img_ids)):
ann_ids = coco.getAnnIds(imgIds=img_ids[i])
ann_info = coco.loadAnns(ann_ids)
if len(ann_info) == 0:
gt_bboxes.append(np.zeros((0, 4)))
continue
bboxes = []
for ann in ann_info:
if ann.get('ignore', False) or ann['iscrowd']:
continue
x1, y1, w, h = ann['bbox']
bboxes.append([x1, y1, x1 + w - 1, y1 + h - 1])
bboxes = np.array(bboxes, dtype=np.float32)
if bboxes.shape[0] == 0:
bboxes = np.zeros((0, 4))
gt_bboxes.append(bboxes)
recalls = eval_recalls(
gt_bboxes, results, max_dets, iou_thrs, print_summary=False)
ar = recalls.mean(axis=1)
return ar
def xyxy2xywh(bbox):
_bbox = bbox.tolist()
return [
_bbox[0],
_bbox[1],
_bbox[2] - _bbox[0] + 1,
_bbox[3] - _bbox[1] + 1,
]
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
json_results.append(data)
return json_results
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
json_results.append(data)
return json_results
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def results2json(dataset, results, out_file):
if isinstance(results[0], list):
json_results = det2json(dataset, results)
elif isinstance(results[0], tuple):
json_results = segm2json(dataset, results)
elif isinstance(results[0], np.ndarray):
json_results = proposal2json(dataset, results)
else:
raise TypeError('invalid type of results')
mmcv.dump(json_results, out_file)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment