Commit c6fde230 authored by pangjm's avatar pangjm
Browse files

Merge branch 'master' of github.com:open-mmlab/mmdetection

Conflicts:
	tools/train.py
parents e74519bb 826a5613
from .custom import CustomDataset
from .coco import CocoDataset
from .loader import GroupSampler, DistributedGroupSampler, build_dataloader
from .utils import to_tensor, random_scale, show_ann
from .utils import to_tensor, random_scale, show_ann, get_dataset
from .concat_dataset import ConcatDataset
from .repeat_dataset import RepeatDataset
__all__ = [
'CustomDataset', 'CocoDataset', 'GroupSampler', 'DistributedGroupSampler',
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann'
'build_dataloader', 'to_tensor', 'random_scale', 'show_ann',
'get_dataset', 'ConcatDataset', 'RepeatDataset',
]
import numpy as np
from torch.utils.data.dataset import ConcatDataset as _ConcatDataset
class ConcatDataset(_ConcatDataset):
"""
Same as torch.utils.data.dataset.ConcatDataset, but
concat the group flag for image aspect ratio.
"""
def __init__(self, datasets):
"""
flag: Images with aspect ratio greater than 1 will be set as group 1,
otherwise group 0.
"""
super(ConcatDataset, self).__init__(datasets)
if hasattr(datasets[0], 'flag'):
flags = []
for i in range(0, len(datasets)):
flags.append(datasets[i].flag)
self.flag = np.concatenate(flags)
import numpy as np
class RepeatDataset(object):
def __init__(self, dataset, times):
self.dataset = dataset
self.times = times
if hasattr(self.dataset, 'flag'):
self.flag = np.tile(self.dataset.flag, times)
self._original_length = len(self.dataset)
def __getitem__(self, idx):
return self.dataset[idx % self._original_length]
def __len__(self):
return self.times * self._original_length
import copy
from collections import Sequence
import mmcv
from mmcv.runner import obj_from_dict
import torch
import matplotlib.pyplot as plt
import numpy as np
from .concat_dataset import ConcatDataset
from .repeat_dataset import RepeatDataset
from .. import datasets
def to_tensor(data):
......@@ -67,3 +72,45 @@ def show_ann(coco, img, ann_info):
plt.axis('off')
coco.showAnns(ann_info)
plt.show()
def get_dataset(data_cfg):
if data_cfg['type'] == 'RepeatDataset':
return RepeatDataset(
get_dataset(data_cfg['dataset']), data_cfg['times'])
if isinstance(data_cfg['ann_file'], (list, tuple)):
ann_files = data_cfg['ann_file']
num_dset = len(ann_files)
else:
ann_files = [data_cfg['ann_file']]
num_dset = 1
if 'proposal_file' in data_cfg.keys():
if isinstance(data_cfg['proposal_file'], (list, tuple)):
proposal_files = data_cfg['proposal_file']
else:
proposal_files = [data_cfg['proposal_file']]
else:
proposal_files = [None] * num_dset
assert len(proposal_files) == num_dset
if isinstance(data_cfg['img_prefix'], (list, tuple)):
img_prefixes = data_cfg['img_prefix']
else:
img_prefixes = [data_cfg['img_prefix']] * num_dset
assert len(img_prefixes) == num_dset
dsets = []
for i in range(num_dset):
data_info = copy.deepcopy(data_cfg)
data_info['ann_file'] = ann_files[i]
data_info['proposal_file'] = proposal_files[i]
data_info['img_prefix'] = img_prefixes[i]
dset = obj_from_dict(data_info, datasets)
dsets.append(dset)
if len(dsets) > 1:
dset = ConcatDataset(dsets)
else:
dset = dsets[0]
return dset
import torch
import torch.nn as nn
import torch.nn.functional as F
......@@ -99,7 +100,7 @@ class BBoxHead(nn.Module):
img_shape,
scale_factor,
rescale=False,
nms_cfg=None):
cfg=None):
if isinstance(cls_score, list):
cls_score = sum(cls_score) / float(len(cls_score))
scores = F.softmax(cls_score, dim=1) if cls_score is not None else None
......@@ -114,11 +115,80 @@ class BBoxHead(nn.Module):
if rescale:
bboxes /= scale_factor
if nms_cfg is None:
if cfg is None:
return bboxes, scores
else:
det_bboxes, det_labels = multiclass_nms(
bboxes, scores, nms_cfg.score_thr, nms_cfg.nms_thr,
nms_cfg.max_per_img)
bboxes, scores, cfg.score_thr, cfg.nms, cfg.max_per_img)
return det_bboxes, det_labels
def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):
"""Refine bboxes during training.
Args:
rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,
and bs is the sampled RoIs per image.
labels (Tensor): Shape (n*bs, ).
bbox_preds (Tensor): Shape (n*bs, 4) or (n*bs, 4*#class).
pos_is_gts (list[Tensor]): Flags indicating if each positive bbox
is a gt bbox.
img_metas (list[dict]): Meta info of each image.
Returns:
list[Tensor]: Refined bboxes of each image in a mini-batch.
"""
img_ids = rois[:, 0].long().unique(sorted=True)
assert img_ids.numel() == len(img_metas)
bboxes_list = []
for i in range(len(img_metas)):
inds = torch.nonzero(rois[:, 0] == i).squeeze()
num_rois = inds.numel()
bboxes_ = rois[inds, 1:]
label_ = labels[inds]
bbox_pred_ = bbox_preds[inds]
img_meta_ = img_metas[i]
pos_is_gts_ = pos_is_gts[i]
bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,
img_meta_)
# filter gt bboxes
pos_keep = 1 - pos_is_gts_
keep_inds = pos_is_gts_.new_ones(num_rois)
keep_inds[:len(pos_is_gts_)] = pos_keep
bboxes_list.append(bboxes[keep_inds])
return bboxes_list
def regress_by_class(self, rois, label, bbox_pred, img_meta):
"""Regress the bbox for the predicted class. Used in Cascade R-CNN.
Args:
rois (Tensor): shape (n, 4) or (n, 5)
label (Tensor): shape (n, )
bbox_pred (Tensor): shape (n, 4*(#class+1)) or (n, 4)
img_meta (dict): Image meta info.
Returns:
Tensor: Regressed bboxes, the same shape as input rois.
"""
assert rois.size(1) == 4 or rois.size(1) == 5
if not self.reg_class_agnostic:
label = label * 4
inds = torch.stack((label, label + 1, label + 2, label + 3), 1)
bbox_pred = torch.gather(bbox_pred, 1, inds)
assert bbox_pred.size(1) == 4
if rois.size(1) == 4:
new_rois = delta2bbox(rois, bbox_pred, self.target_means,
self.target_stds, img_meta['img_shape'])
else:
bboxes = delta2bbox(rois[:, 1:], bbox_pred, self.target_means,
self.target_stds, img_meta['img_shape'])
new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)
return new_rois
......@@ -2,11 +2,12 @@ from mmcv.runner import obj_from_dict
from torch import nn
from . import (backbones, necks, roi_extractors, rpn_heads, bbox_heads,
mask_heads)
mask_heads, single_stage_heads)
__all__ = [
'build_backbone', 'build_neck', 'build_rpn_head', 'build_roi_extractor',
'build_bbox_head', 'build_mask_head', 'build_detector'
'build_bbox_head', 'build_mask_head', 'build_single_stage_head',
'build_detector'
]
......@@ -47,6 +48,10 @@ def build_mask_head(cfg):
return build(cfg, mask_heads)
def build_single_stage_head(cfg):
return build(cfg, single_stage_heads)
def build_detector(cfg, train_cfg=None, test_cfg=None):
from . import detectors
return build(cfg, detectors, dict(train_cfg=train_cfg, test_cfg=test_cfg))
from .base import BaseDetector
from .single_stage import SingleStageDetector
from .two_stage import TwoStageDetector
from .rpn import RPN
from .fast_rcnn import FastRCNN
from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN
from .cascade_rcnn import CascadeRCNN
from .retinanet import RetinaNet
__all__ = [
'BaseDetector', 'TwoStageDetector', 'RPN', 'FastRCNN', 'FasterRCNN',
'MaskRCNN'
'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',
'FastRCNN', 'FasterRCNN', 'MaskRCNN', 'CascadeRCNN', 'RetinaNet'
]
......@@ -4,6 +4,7 @@ from abc import ABCMeta, abstractmethod
import mmcv
import numpy as np
import torch.nn as nn
import pycocotools.mask as maskUtils
from mmdet.core import tensor2imgs, get_classes
......@@ -86,6 +87,11 @@ class BaseDetector(nn.Module):
img_norm_cfg,
dataset='coco',
score_thr=0.3):
if isinstance(result, tuple):
bbox_result, segm_result = result
else:
bbox_result, segm_result = result, None
img_tensor = data['img'][0]
img_metas = data['img_meta'][0].data[0]
imgs = tensor2imgs(img_tensor, **img_norm_cfg)
......@@ -102,12 +108,23 @@ class BaseDetector(nn.Module):
for img, img_meta in zip(imgs, img_metas):
h, w, _ = img_meta['img_shape']
img_show = img[:h, :w, :]
bboxes = np.vstack(bbox_result)
# draw segmentation masks
if segm_result is not None:
segms = mmcv.concat_list(segm_result)
inds = np.where(bboxes[:, -1] > score_thr)[0]
for i in inds:
color_mask = np.random.randint(
0, 256, (1, 3), dtype=np.uint8)
mask = maskUtils.decode(segms[i]).astype(np.bool)
img_show[mask] = img_show[mask] * 0.5 + color_mask * 0.5
# draw bounding boxes
labels = [
np.full(bbox.shape[0], i, dtype=np.int32)
for i, bbox in enumerate(result)
for i, bbox in enumerate(bbox_result)
]
labels = np.concatenate(labels)
bboxes = np.vstack(result)
mmcv.imshow_det_bboxes(
img_show,
bboxes,
......
from __future__ import division
import torch
import torch.nn as nn
from .base import BaseDetector
from .test_mixins import RPNTestMixin
from .. import builder
from mmdet.core import (assign_and_sample, bbox2roi, bbox2result, multi_apply,
merge_aug_masks)
class CascadeRCNN(BaseDetector, RPNTestMixin):
def __init__(self,
num_stages,
backbone,
neck=None,
rpn_head=None,
bbox_roi_extractor=None,
bbox_head=None,
mask_roi_extractor=None,
mask_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
assert bbox_roi_extractor is not None
assert bbox_head is not None
super(CascadeRCNN, self).__init__()
self.num_stages = num_stages
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
else:
raise NotImplementedError
if rpn_head is not None:
self.rpn_head = builder.build_rpn_head(rpn_head)
if bbox_head is not None:
self.bbox_roi_extractor = nn.ModuleList()
self.bbox_head = nn.ModuleList()
if not isinstance(bbox_roi_extractor, list):
bbox_roi_extractor = [
bbox_roi_extractor for _ in range(num_stages)
]
if not isinstance(bbox_head, list):
bbox_head = [bbox_head for _ in range(num_stages)]
assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages
for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):
self.bbox_roi_extractor.append(
builder.build_roi_extractor(roi_extractor))
self.bbox_head.append(builder.build_bbox_head(head))
if mask_head is not None:
self.mask_roi_extractor = nn.ModuleList()
self.mask_head = nn.ModuleList()
if not isinstance(mask_roi_extractor, list):
mask_roi_extractor = [
mask_roi_extractor for _ in range(num_stages)
]
if not isinstance(mask_head, list):
mask_head = [mask_head for _ in range(num_stages)]
assert len(mask_roi_extractor) == len(mask_head) == self.num_stages
for roi_extractor, head in zip(mask_roi_extractor, mask_head):
self.mask_roi_extractor.append(
builder.build_roi_extractor(roi_extractor))
self.mask_head.append(builder.build_mask_head(head))
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
@property
def with_rpn(self):
return hasattr(self, 'rpn_head') and self.rpn_head is not None
def init_weights(self, pretrained=None):
super(CascadeRCNN, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
if self.with_rpn:
self.rpn_head.init_weights()
for i in range(self.num_stages):
if self.with_bbox:
self.bbox_roi_extractor[i].init_weights()
self.bbox_head[i].init_weights()
if self.with_mask:
self.mask_roi_extractor[i].init_weights()
self.mask_head[i].init_weights()
def extract_feat(self, img):
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self,
img,
img_meta,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
gt_masks=None,
proposals=None):
x = self.extract_feat(img)
losses = dict()
if self.with_rpn:
rpn_outs = self.rpn_head(x)
rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
self.train_cfg.rpn)
rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
losses.update(rpn_losses)
proposal_inputs = rpn_outs + (img_meta, self.test_cfg.rpn)
proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
else:
proposal_list = proposals
for i in range(self.num_stages):
rcnn_train_cfg = self.train_cfg.rcnn[i]
lw = self.train_cfg.stage_loss_weights[i]
# assign gts and sample proposals
assign_results, sampling_results = multi_apply(
assign_and_sample,
proposal_list,
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
cfg=rcnn_train_cfg)
# bbox head forward and loss
bbox_roi_extractor = self.bbox_roi_extractor[i]
bbox_head = self.bbox_head[i]
rois = bbox2roi([res.bboxes for res in sampling_results])
bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],
rois)
cls_score, bbox_pred = bbox_head(bbox_feats)
bbox_targets = bbox_head.get_target(sampling_results, gt_bboxes,
gt_labels, rcnn_train_cfg)
loss_bbox = bbox_head.loss(cls_score, bbox_pred, *bbox_targets)
for name, value in loss_bbox.items():
losses['s{}.{}'.format(i, name)] = (value * lw if
'loss' in name else value)
# mask head forward and loss
if self.with_mask:
mask_roi_extractor = self.mask_roi_extractor[i]
mask_head = self.mask_head[i]
pos_rois = bbox2roi(
[res.pos_bboxes for res in sampling_results])
mask_feats = mask_roi_extractor(
x[:mask_roi_extractor.num_inputs], pos_rois)
mask_pred = mask_head(mask_feats)
mask_targets = mask_head.get_target(sampling_results, gt_masks,
rcnn_train_cfg)
pos_labels = torch.cat(
[res.pos_gt_labels for res in sampling_results])
loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)
for name, value in loss_mask.items():
losses['s{}.{}'.format(i, name)] = (value * lw
if 'loss' in name else
value)
# refine bboxes
if i < self.num_stages - 1:
pos_is_gts = [res.pos_is_gt for res in sampling_results]
roi_labels = bbox_targets[0] # bbox_targets is a tuple
with torch.no_grad():
proposal_list = bbox_head.refine_bboxes(
rois, roi_labels, bbox_pred, pos_is_gts, img_meta)
return losses
def simple_test(self, img, img_meta, proposals=None, rescale=False):
x = self.extract_feat(img)
proposal_list = self.simple_test_rpn(
x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
img_shape = img_meta[0]['img_shape']
ori_shape = img_meta[0]['ori_shape']
scale_factor = img_meta[0]['scale_factor']
# "ms" in variable names means multi-stage
ms_bbox_result = {}
ms_segm_result = {}
ms_scores = []
rcnn_test_cfg = self.test_cfg.rcnn
rois = bbox2roi(proposal_list)
for i in range(self.num_stages):
bbox_roi_extractor = self.bbox_roi_extractor[i]
bbox_head = self.bbox_head[i]
bbox_feats = bbox_roi_extractor(
x[:len(bbox_roi_extractor.featmap_strides)], rois)
cls_score, bbox_pred = bbox_head(bbox_feats)
ms_scores.append(cls_score)
if self.test_cfg.keep_all_stages:
det_bboxes, det_labels = bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
cfg=rcnn_test_cfg)
bbox_result = bbox2result(det_bboxes, det_labels,
bbox_head.num_classes)
ms_bbox_result['stage{}'.format(i)] = bbox_result
if self.with_mask:
mask_roi_extractor = self.mask_roi_extractor[i]
mask_head = self.mask_head[i]
if det_bboxes.shape[0] == 0:
segm_result = [
[] for _ in range(mask_head.num_classes - 1)
]
else:
_bboxes = (det_bboxes[:, :4] * scale_factor
if rescale else det_bboxes)
mask_rois = bbox2roi([_bboxes])
mask_feats = mask_roi_extractor(
x[:len(mask_roi_extractor.featmap_strides)],
mask_rois)
mask_pred = mask_head(mask_feats)
segm_result = mask_head.get_seg_masks(
mask_pred, _bboxes, det_labels, rcnn_test_cfg,
ori_shape, scale_factor, rescale)
ms_segm_result['stage{}'.format(i)] = segm_result
if i < self.num_stages - 1:
bbox_label = cls_score.argmax(dim=1)
rois = bbox_head.regress_by_class(rois, bbox_label, bbox_pred,
img_meta[0])
cls_score = sum(ms_scores) / self.num_stages
det_bboxes, det_labels = self.bbox_head[-1].get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
cfg=rcnn_test_cfg)
bbox_result = bbox2result(det_bboxes, det_labels,
self.bbox_head[-1].num_classes)
ms_bbox_result['ensemble'] = bbox_result
if self.with_mask:
if det_bboxes.shape[0] == 0:
segm_result = [
[] for _ in range(self.mask_head[-1].num_classes - 1)
]
else:
_bboxes = (det_bboxes[:, :4] * scale_factor
if rescale else det_bboxes)
mask_rois = bbox2roi([_bboxes])
aug_masks = []
for i in range(self.num_stages):
mask_roi_extractor = self.mask_roi_extractor[i]
mask_feats = mask_roi_extractor(
x[:len(mask_roi_extractor.featmap_strides)], mask_rois)
mask_pred = self.mask_head[i](mask_feats)
aug_masks.append(mask_pred.sigmoid().cpu().numpy())
merged_masks = merge_aug_masks(aug_masks,
[img_meta] * self.num_stages,
self.test_cfg.rcnn)
segm_result = self.mask_head[-1].get_seg_masks(
merged_masks, _bboxes, det_labels, rcnn_test_cfg,
ori_shape, scale_factor, rescale)
ms_segm_result['ensemble'] = segm_result
if not self.test_cfg.keep_all_stages:
if self.with_mask:
results = (ms_bbox_result['ensemble'],
ms_segm_result['ensemble'])
else:
results = ms_bbox_result['ensemble']
else:
if self.with_mask:
results = {
stage: (ms_bbox_result[stage], ms_segm_result[stage])
for stage in ms_bbox_result
}
else:
results = ms_bbox_result
return results
def aug_test(self, img, img_meta, proposals=None, rescale=False):
raise NotImplementedError
def show_result(self, data, result, img_norm_cfg, **kwargs):
if self.with_mask:
ms_bbox_result, ms_segm_result = result
if isinstance(ms_bbox_result, dict):
result = (ms_bbox_result['ensemble'],
ms_segm_result['ensemble'])
else:
if isinstance(result, dict):
result = result['ensemble']
super(CascadeRCNN, self).show_result(data, result, img_norm_cfg,
**kwargs)
......@@ -25,10 +25,3 @@ class MaskRCNN(TwoStageDetector):
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
def show_result(self, data, result, img_norm_cfg, **kwargs):
# TODO: show segmentation masks
assert isinstance(result, tuple)
assert len(result) == 2 # (bbox_results, segm_results)
super(MaskRCNN, self).show_result(data, result[0], img_norm_cfg,
**kwargs)
from .single_stage import SingleStageDetector
class RetinaNet(SingleStageDetector):
def __init__(self,
backbone,
neck,
bbox_head,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,
test_cfg, pretrained)
import torch.nn as nn
from .base import BaseDetector
from .. import builder
from mmdet.core import bbox2result
class SingleStageDetector(BaseDetector):
def __init__(self,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SingleStageDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
self.bbox_head = builder.build_single_stage_head(bbox_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
def init_weights(self, pretrained=None):
super(SingleStageDetector, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
self.bbox_head.init_weights()
def extract_feat(self, img):
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self, img, img_metas, gt_bboxes, gt_labels):
x = self.extract_feat(img)
outs = self.bbox_head(x)
loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
losses = self.bbox_head.loss(*loss_inputs)
return losses
def simple_test(self, img, img_meta, rescale=False):
x = self.extract_feat(img)
outs = self.bbox_head(x)
bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
bbox_list = self.bbox_head.get_det_bboxes(*bbox_inputs)
bbox_results = [
bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)
for det_bboxes, det_labels in bbox_list
]
return bbox_results[0]
def aug_test(self, imgs, img_metas, rescale=False):
raise NotImplementedError
......@@ -47,7 +47,7 @@ class BBoxTestMixin(object):
img_shape,
scale_factor,
rescale=rescale,
nms_cfg=rcnn_test_cfg)
cfg=rcnn_test_cfg)
return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
......@@ -73,15 +73,15 @@ class BBoxTestMixin(object):
img_shape,
scale_factor,
rescale=False,
nms_cfg=None)
cfg=None)
aug_bboxes.append(bboxes)
aug_scores.append(scores)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes, merged_scores = merge_aug_bboxes(
aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn)
aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
det_bboxes, det_labels = multiclass_nms(
merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr,
self.test_cfg.rcnn.nms_thr, self.test_cfg.rcnn.max_per_img)
merged_bboxes, merged_scores, rcnn_test_cfg.score_thr,
rcnn_test_cfg.nms, rcnn_test_cfg.max_per_img)
return det_bboxes, det_labels
......
......@@ -160,7 +160,7 @@ class RPNHead(nn.Module):
if cls_reg_targets is None:
return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_samples) = cls_reg_targets
num_total_pos, num_total_neg) = cls_reg_targets
losses_cls, losses_reg = multi_apply(
self.loss_single,
rpn_cls_scores,
......@@ -169,7 +169,7 @@ class RPNHead(nn.Module):
label_weights_list,
bbox_targets_list,
bbox_weights_list,
num_total_samples=num_total_samples,
num_total_samples=num_total_pos + num_total_neg,
cfg=cfg)
return dict(loss_rpn_cls=losses_cls, loss_rpn_reg=losses_reg)
......@@ -234,13 +234,13 @@ class RPNHead(nn.Module):
proposals = proposals[valid_inds, :]
scores = scores[valid_inds]
proposals = torch.cat([proposals, scores.unsqueeze(-1)], dim=-1)
nms_keep = nms(proposals, cfg.nms_thr)[:cfg.nms_post]
proposals = proposals[nms_keep, :]
proposals, _ = nms(proposals, cfg.nms_thr)
proposals = proposals[:cfg.nms_post, :]
mlvl_proposals.append(proposals)
proposals = torch.cat(mlvl_proposals, 0)
if cfg.nms_across_levels:
nms_keep = nms(proposals, cfg.nms_thr)[:cfg.max_num]
proposals = proposals[nms_keep, :]
proposals, _ = nms(proposals, cfg.nms_thr)
proposals = proposals[:cfg.max_num, :]
else:
scores = proposals[:, 4]
_, order = scores.sort(0, descending=True)
......
from .retina_head import RetinaHead
__all__ = ['RetinaHead']
from __future__ import division
import numpy as np
import torch
import torch.nn as nn
from mmdet.core import (AnchorGenerator, anchor_target, multi_apply,
delta2bbox, weighted_smoothl1,
weighted_sigmoid_focal_loss, multiclass_nms)
from ..utils import normal_init, bias_init_with_prob
class RetinaHead(nn.Module):
"""Head of RetinaNet.
/ cls_convs - retina_cls (3x3 conv)
input -
\ reg_convs - retina_reg (3x3 conv)
Args:
in_channels (int): Number of channels in the input feature map.
num_classes (int): Class number (including background).
stacked_convs (int): Number of convolutional layers added for cls and
reg branch.
feat_channels (int): Number of channels for the RPN feature map.
scales_per_octave (int): Number of anchor scales per octave.
octave_base_scale (int): Base octave scale. Anchor scales are computed
as `s*2^(i/n)`, for i in [0, n-1], where s is `octave_base_scale`
and n is `scales_per_octave`.
anchor_ratios (Iterable): Anchor aspect ratios.
anchor_strides (Iterable): Anchor strides.
target_means (Iterable): Mean values of regression targets.
target_stds (Iterable): Std values of regression targets.
""" # noqa: W605
def __init__(self,
in_channels,
num_classes,
stacked_convs=4,
feat_channels=256,
octave_base_scale=4,
scales_per_octave=3,
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[8, 16, 32, 64, 128],
anchor_base_sizes=None,
target_means=(.0, .0, .0, .0),
target_stds=(1.0, 1.0, 1.0, 1.0)):
super(RetinaHead, self).__init__()
self.in_channels = in_channels
self.num_classes = num_classes
self.octave_base_scale = octave_base_scale
self.scales_per_octave = scales_per_octave
self.anchor_ratios = anchor_ratios
self.anchor_strides = anchor_strides
self.anchor_base_sizes = list(
anchor_strides) if anchor_base_sizes is None else anchor_base_sizes
self.target_means = target_means
self.target_stds = target_stds
self.anchor_generators = []
for anchor_base in self.anchor_base_sizes:
octave_scales = np.array(
[2**(i / scales_per_octave) for i in range(scales_per_octave)])
anchor_scales = octave_scales * octave_base_scale
self.anchor_generators.append(
AnchorGenerator(anchor_base, anchor_scales, anchor_ratios))
self.relu = nn.ReLU(inplace=True)
self.num_anchors = int(
len(self.anchor_ratios) * self.scales_per_octave)
self.cls_out_channels = self.num_classes - 1
self.bbox_pred_dim = 4
self.stacked_convs = stacked_convs
self.cls_convs = nn.ModuleList()
self.reg_convs = nn.ModuleList()
for i in range(self.stacked_convs):
chn = in_channels if i == 0 else feat_channels
self.cls_convs.append(
nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1))
self.reg_convs.append(
nn.Conv2d(chn, feat_channels, 3, stride=1, padding=1))
self.retina_cls = nn.Conv2d(
feat_channels,
self.num_anchors * self.cls_out_channels,
3,
stride=1,
padding=1)
self.retina_reg = nn.Conv2d(
feat_channels,
self.num_anchors * self.bbox_pred_dim,
3,
stride=1,
padding=1)
self.debug_imgs = None
def init_weights(self):
for m in self.cls_convs:
normal_init(m, std=0.01)
for m in self.reg_convs:
normal_init(m, std=0.01)
bias_cls = bias_init_with_prob(0.01)
normal_init(self.retina_cls, std=0.01, bias=bias_cls)
normal_init(self.retina_reg, std=0.01)
def forward_single(self, x):
cls_feat = x
reg_feat = x
for cls_conv in self.cls_convs:
cls_feat = self.relu(cls_conv(cls_feat))
for reg_conv in self.reg_convs:
reg_feat = self.relu(reg_conv(reg_feat))
cls_score = self.retina_cls(cls_feat)
bbox_pred = self.retina_reg(reg_feat)
return cls_score, bbox_pred
def forward(self, feats):
return multi_apply(self.forward_single, feats)
def get_anchors(self, featmap_sizes, img_metas):
"""Get anchors according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
Returns:
tuple: anchors of each image, valid flags of each image
"""
num_imgs = len(img_metas)
num_levels = len(featmap_sizes)
# since feature map sizes of all images are the same, we only compute
# anchors for one time
multi_level_anchors = []
for i in range(num_levels):
anchors = self.anchor_generators[i].grid_anchors(
featmap_sizes[i], self.anchor_strides[i])
multi_level_anchors.append(anchors)
anchor_list = [multi_level_anchors for _ in range(num_imgs)]
# for each image, we compute valid flags of multi level anchors
valid_flag_list = []
for img_id, img_meta in enumerate(img_metas):
multi_level_flags = []
for i in range(num_levels):
anchor_stride = self.anchor_strides[i]
feat_h, feat_w = featmap_sizes[i]
h, w, _ = img_meta['pad_shape']
valid_feat_h = min(int(np.ceil(h / anchor_stride)), feat_h)
valid_feat_w = min(int(np.ceil(w / anchor_stride)), feat_w)
flags = self.anchor_generators[i].valid_flags(
(feat_h, feat_w), (valid_feat_h, valid_feat_w))
multi_level_flags.append(flags)
valid_flag_list.append(multi_level_flags)
return anchor_list, valid_flag_list
def loss_single(self, cls_score, bbox_pred, labels, label_weights,
bbox_targets, bbox_weights, num_pos_samples, cfg):
# classification loss
labels = labels.contiguous().view(-1, self.cls_out_channels)
label_weights = label_weights.contiguous().view(
-1, self.cls_out_channels)
cls_score = cls_score.permute(0, 2, 3, 1).contiguous().view(
-1, self.cls_out_channels)
loss_cls = weighted_sigmoid_focal_loss(
cls_score,
labels,
label_weights,
cfg.gamma,
cfg.alpha,
avg_factor=num_pos_samples)
# regression loss
bbox_targets = bbox_targets.contiguous().view(-1, 4)
bbox_weights = bbox_weights.contiguous().view(-1, 4)
bbox_pred = bbox_pred.permute(0, 2, 3, 1).contiguous().view(-1, 4)
loss_reg = weighted_smoothl1(
bbox_pred,
bbox_targets,
bbox_weights,
beta=cfg.smoothl1_beta,
avg_factor=num_pos_samples)
return loss_cls, loss_reg
def loss(self, cls_scores, bbox_preds, gt_bboxes, gt_labels, img_metas,
cfg):
featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
assert len(featmap_sizes) == len(self.anchor_generators)
anchor_list, valid_flag_list = self.get_anchors(
featmap_sizes, img_metas)
cls_reg_targets = anchor_target(
anchor_list,
valid_flag_list,
gt_bboxes,
img_metas,
self.target_means,
self.target_stds,
cfg,
gt_labels_list=gt_labels,
cls_out_channels=self.cls_out_channels,
sampling=False)
if cls_reg_targets is None:
return None
(labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,
num_total_pos, num_total_neg) = cls_reg_targets
losses_cls, losses_reg = multi_apply(
self.loss_single,
cls_scores,
bbox_preds,
labels_list,
label_weights_list,
bbox_targets_list,
bbox_weights_list,
num_pos_samples=num_total_pos,
cfg=cfg)
return dict(loss_cls=losses_cls, loss_reg=losses_reg)
def get_det_bboxes(self,
cls_scores,
bbox_preds,
img_metas,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds)
num_levels = len(cls_scores)
mlvl_anchors = [
self.anchor_generators[i].grid_anchors(cls_scores[i].size()[-2:],
self.anchor_strides[i])
for i in range(num_levels)
]
result_list = []
for img_id in range(len(img_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
img_shape = img_metas[img_id]['img_shape']
scale_factor = img_metas[img_id]['scale_factor']
results = self._get_det_bboxes_single(
cls_score_list, bbox_pred_list, mlvl_anchors, img_shape,
scale_factor, cfg, rescale)
result_list.append(results)
return result_list
def _get_det_bboxes_single(self,
cls_scores,
bbox_preds,
mlvl_anchors,
img_shape,
scale_factor,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_proposals = []
mlvl_scores = []
for cls_score, bbox_pred, anchors in zip(cls_scores, bbox_preds,
mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
cls_score = cls_score.permute(1, 2, 0).contiguous().view(
-1, self.cls_out_channels)
scores = cls_score.sigmoid()
bbox_pred = bbox_pred.permute(1, 2, 0).contiguous().view(-1, 4)
proposals = delta2bbox(anchors, bbox_pred, self.target_means,
self.target_stds, img_shape)
if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:
maxscores, _ = scores.max(dim=1)
_, topk_inds = maxscores.topk(cfg.nms_pre)
proposals = proposals[topk_inds, :]
scores = scores[topk_inds, :]
mlvl_proposals.append(proposals)
mlvl_scores.append(scores)
mlvl_proposals = torch.cat(mlvl_proposals)
if rescale:
mlvl_proposals /= scale_factor
mlvl_scores = torch.cat(mlvl_scores)
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([padding, mlvl_scores], dim=1)
det_bboxes, det_labels = multiclass_nms(mlvl_proposals, mlvl_scores,
cfg.score_thr, cfg.nms,
cfg.max_per_img)
return det_bboxes, det_labels
from .conv_module import ConvModule
from .norm import build_norm_layer
from .weight_init import xavier_init, normal_init, uniform_init, kaiming_init
from .weight_init import (xavier_init, normal_init, uniform_init, kaiming_init,
bias_init_with_prob)
__all__ = [
'ConvModule', 'build_norm_layer', 'xavier_init', 'normal_init',
'uniform_init', 'kaiming_init'
'uniform_init', 'kaiming_init', 'bias_init_with_prob'
]
import numpy as np
import torch.nn as nn
......@@ -37,3 +38,9 @@ def kaiming_init(module,
module.weight, mode=mode, nonlinearity=nonlinearity)
if hasattr(module, 'bias'):
nn.init.constant_(module.bias, bias)
def bias_init_with_prob(prior_prob):
""" initialize conv/fc bias value according to giving probablity"""
bias_init = float(-np.log((1 - prior_prob) / prior_prob))
return bias_init
......@@ -3,6 +3,7 @@
# Copyright (c) University of Maryland, College Park
# Licensed under The MIT License [see LICENSE for details]
# Written by Navaneeth Bodla and Bharat Singh
# Modified by Kai Chen
# ----------------------------------------------------------
import numpy as np
......@@ -15,12 +16,13 @@ cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
return a if a <= b else b
def cpu_soft_nms(
np.ndarray[float, ndim=2] boxes_in,
float iou_thr,
unsigned int method=1,
float sigma=0.5,
float Nt=0.3,
float threshold=0.001,
unsigned int method=0
float min_score=0.001,
):
boxes = boxes_in.copy()
cdef unsigned int N = boxes.shape[0]
......@@ -36,11 +38,11 @@ def cpu_soft_nms(
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
tx1 = boxes[i, 0]
ty1 = boxes[i, 1]
tx2 = boxes[i, 2]
ty2 = boxes[i, 3]
ts = boxes[i, 4]
ti = inds[i]
pos = i + 1
......@@ -52,26 +54,26 @@ def cpu_soft_nms(
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
boxes[i, 0] = boxes[maxpos, 0]
boxes[i, 1] = boxes[maxpos, 1]
boxes[i, 2] = boxes[maxpos, 2]
boxes[i, 3] = boxes[maxpos, 3]
boxes[i, 4] = boxes[maxpos, 4]
inds[i] = inds[maxpos]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
boxes[maxpos, 0] = tx1
boxes[maxpos, 1] = ty1
boxes[maxpos, 2] = tx2
boxes[maxpos, 3] = ty2
boxes[maxpos, 4] = ts
inds[maxpos] = ti
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
tx1 = boxes[i, 0]
ty1 = boxes[i, 1]
tx2 = boxes[i, 2]
ty2 = boxes[i, 3]
ts = boxes[i, 4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below
......@@ -89,35 +91,35 @@ def cpu_soft_nms(
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
ov = iw * ih / ua # iou between max box and detection box
if method == 1: # linear
if ov > Nt:
if method == 1: # linear
if ov > iou_thr:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
elif method == 2: # gaussian
weight = np.exp(-(ov * ov) / sigma)
else: # original NMS
if ov > iou_thr:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
boxes[pos, 4] = weight * boxes[pos, 4]
# if box score falls below threshold, discard the box by
# swapping with last box update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
inds[pos] = inds[N-1]
if boxes[pos, 4] < min_score:
boxes[pos, 0] = boxes[N-1, 0]
boxes[pos, 1] = boxes[N-1, 1]
boxes[pos, 2] = boxes[N-1, 2]
boxes[pos, 3] = boxes[N-1, 3]
boxes[pos, 4] = boxes[N-1, 4]
inds[pos] = inds[N - 1]
N = N - 1
pos = pos - 1
pos = pos + 1
return boxes[:N], inds[:N]
\ No newline at end of file
return boxes[:N], inds[:N]
......@@ -19,7 +19,7 @@ memory_pool = {}
def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
np.int32_t device_id=0):
cdef int boxes_num = dets.shape[0]
cdef int boxes_dim = dets.shape[1]
cdef int boxes_dim = 5
cdef int num_out
cdef size_t base
cdef np.ndarray[np.int32_t, ndim=1] \
......@@ -29,7 +29,7 @@ def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
cdef np.ndarray[np.int_t, ndim=1] \
order = scores.argsort()[::-1]
cdef np.ndarray[np.float32_t, ndim=2] \
sorted_dets = dets[order, :]
sorted_dets = dets[order, :5]
cdef float cthresh = thresh
if device_id not in memory_pool:
with nogil:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment