Unverified Commit afe5ce0a authored by Kai Chen's avatar Kai Chen Committed by GitHub
Browse files

Merge pull request #1 from OceanPang/dev

faster-rcnn & mask-rcnn train and test support 
parents 0401cccd 782ba019
### MMCV
- [ ] Implement the attr 'get' of 'Config'
- [ ] Config bugs: None type to '{}' with addict
- [ ] Default logger should be only with gpu0
- [ ] Unit Test: mmcv and mmcv.torchpack
### MMDetection
#### Basic
- [ ] Implement training function without distributed
- [ ] Verify nccl/nccl2/gloo
- [ ] Replace UGLY code: params plug in 'args' to reach a global flow
- [ ] Replace 'print' by 'logger'
#### Testing
- [ ] Implement distributed testing
- [ ] Implement single gpu testing
#### Refactor
- [ ] Re-consider params names
- [ ] Refactor functions in 'core'
- [ ] Merge single test & aug test as one function, so as other redundancy
#### New features
- [ ] Plug loss params into Config
- [ ] Multi-head communication
from .geometry import bbox_overlaps
from .sampling import (random_choice, bbox_assign, bbox_assign_via_overlaps,
bbox_sampling, sample_positives, sample_negatives,
sample_proposals)
bbox_sampling, sample_positives, sample_negatives)
from .transforms import (bbox_transform, bbox_transform_inv, bbox_flip,
bbox_mapping, bbox_mapping_back, bbox2roi, roi2bbox,
bbox2result)
......@@ -12,5 +11,5 @@ __all__ = [
'bbox_assign_via_overlaps', 'bbox_sampling', 'sample_positives',
'sample_negatives', 'bbox_transform', 'bbox_transform_inv', 'bbox_flip',
'bbox_mapping', 'bbox_mapping_back', 'bbox2roi', 'roi2bbox', 'bbox2result',
'bbox_target', 'sample_proposals'
'bbox_target'
]
......@@ -255,38 +255,3 @@ def bbox_sampling(assigned_gt_inds,
neg_hard_fraction)
neg_inds = neg_inds.unique()
return pos_inds, neg_inds
def sample_proposals(proposals_list, gt_bboxes_list, gt_crowds_list,
gt_labels_list, cfg):
cfg_list = [cfg for _ in range(len(proposals_list))]
results = map(sample_proposals_single, proposals_list, gt_bboxes_list,
gt_crowds_list, gt_labels_list, cfg_list)
# list of tuple to tuple of list
return tuple(map(list, zip(*results)))
def sample_proposals_single(proposals, gt_bboxes, gt_crowds, gt_labels, cfg):
proposals = proposals[:, :4]
assigned_gt_inds, assigned_labels, argmax_overlaps, max_overlaps = \
bbox_assign(
proposals, gt_bboxes, gt_crowds, gt_labels, cfg.pos_iou_thr,
cfg.neg_iou_thr, cfg.pos_iou_thr, cfg.crowd_thr)
if cfg.add_gt_as_proposals:
proposals = torch.cat([gt_bboxes, proposals], dim=0)
gt_assign_self = torch.arange(
1, len(gt_labels) + 1, dtype=torch.long, device=proposals.device)
assigned_gt_inds = torch.cat([gt_assign_self, assigned_gt_inds])
assigned_labels = torch.cat([gt_labels, assigned_labels])
pos_inds, neg_inds = bbox_sampling(
assigned_gt_inds, cfg.roi_batch_size, cfg.pos_fraction, cfg.neg_pos_ub,
cfg.pos_balance_sampling, max_overlaps, cfg.neg_balance_thr)
pos_proposals = proposals[pos_inds]
neg_proposals = proposals[neg_inds]
pos_assigned_gt_inds = assigned_gt_inds[pos_inds] - 1
pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
pos_gt_labels = assigned_labels[pos_inds]
return (pos_inds, neg_inds, pos_proposals, neg_proposals,
pos_assigned_gt_inds, pos_gt_bboxes, pos_gt_labels)
......@@ -58,7 +58,7 @@ def mask_cross_entropy(pred, target, label):
inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)
pred_slice = pred[inds, label].squeeze(1)
return F.binary_cross_entropy_with_logits(
pred_slice, target, reduction='sum')[None]
pred_slice, target, reduction='elementwise_mean')[None]
def weighted_mask_cross_entropy(pred, target, weight, label):
......
from .segms import (flip_segms, polys_to_mask, mask_to_bbox,
polys_to_mask_wrt_box, polys_to_boxes, rle_mask_voting,
rle_mask_nms, rle_masks_to_boxes)
from .utils import split_combined_gt_polys
from .utils import split_combined_polys
from .mask_target import mask_target
__all__ = [
'flip_segms', 'polys_to_mask', 'mask_to_bbox', 'polys_to_mask_wrt_box',
'polys_to_boxes', 'rle_mask_voting', 'rle_mask_nms', 'rle_masks_to_boxes',
'split_combined_gt_polys', 'mask_target'
'split_combined_polys', 'mask_target'
]
......@@ -4,27 +4,31 @@ import numpy as np
from .segms import polys_to_mask_wrt_box
def mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_polys_list,
img_meta, cfg):
def mask_target(pos_proposals_list,
pos_assigned_gt_inds_list,
gt_polys_list,
img_meta,
cfg):
cfg_list = [cfg for _ in range(len(pos_proposals_list))]
img_metas = [img_meta for _ in range(len(pos_proposals_list))]
mask_targets = map(mask_target_single, pos_proposals_list,
pos_assigned_gt_inds_list, gt_polys_list, img_metas,
pos_assigned_gt_inds_list, gt_polys_list, img_meta,
cfg_list)
mask_targets = torch.cat(tuple(mask_targets), dim=0)
return mask_targets
def mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_polys,
img_meta, cfg):
def mask_target_single(pos_proposals,
pos_assigned_gt_inds,
gt_polys,
img_meta,
cfg):
mask_size = cfg.mask_size
num_pos = pos_proposals.size(0)
mask_targets = pos_proposals.new_zeros((num_pos, mask_size, mask_size))
if num_pos > 0:
pos_proposals = pos_proposals.cpu().numpy()
pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()
scale_factor = img_meta['scale_factor'][0].cpu().numpy()
scale_factor = img_meta['scale_factor']
for i in range(num_pos):
bbox = pos_proposals[i, :] / scale_factor
polys = gt_polys[pos_assigned_gt_inds[i]]
......
import mmcv
def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
def split_combined_polys(polys, poly_lens, polys_per_mask):
"""Split the combined 1-D polys into masks.
A mask is represented as a list of polys, and a poly is represented as
......@@ -9,9 +9,9 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
tensor. Here we need to split the tensor into original representations.
Args:
gt_polys (list): a list (length = image num) of 1-D tensors
gt_poly_lens (list): a list (length = image num) of poly length
num_polys_per_mask (list): a list (length = image num) of poly number
polys (list): a list (length = image num) of 1-D tensors
poly_lens (list): a list (length = image num) of poly length
polys_per_mask (list): a list (length = image num) of poly number
of each mask
Returns:
......@@ -19,13 +19,12 @@ def split_combined_gt_polys(gt_polys, gt_poly_lens, num_polys_per_mask):
list (length = poly num) of numpy array
"""
mask_polys_list = []
for img_id in range(len(gt_polys)):
gt_polys_single = gt_polys[img_id].cpu().numpy()
gt_polys_lens_single = gt_poly_lens[img_id].cpu().numpy().tolist()
num_polys_per_mask_single = num_polys_per_mask[
img_id].cpu().numpy().tolist()
for img_id in range(len(polys)):
polys_single = polys[img_id]
polys_lens_single = poly_lens[img_id].tolist()
polys_per_mask_single = polys_per_mask[img_id].tolist()
split_gt_polys = mmcv.slice_list(gt_polys_single, gt_polys_lens_single)
mask_polys = mmcv.slice_list(split_gt_polys, num_polys_per_mask_single)
split_polys = mmcv.slice_list(polys_single, polys_lens_single)
mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)
mask_polys_list.append(mask_polys)
return mask_polys_list
......@@ -54,9 +54,9 @@ def merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):
"""
recovered_bboxes = []
for bboxes, img_info in zip(aug_bboxes, img_metas):
img_shape = img_info['img_shape']
scale_factor = img_info['scale_factor']
flip = img_info['flip']
img_shape = img_info[0]['img_shape']
scale_factor = img_info[0]['scale_factor']
flip = img_info[0]['flip']
bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)
recovered_bboxes.append(bboxes)
bboxes = torch.stack(recovered_bboxes).mean(dim=0)
......@@ -75,7 +75,7 @@ def merge_aug_scores(aug_scores):
return np.mean(aug_scores, axis=0)
def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
def merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):
"""Merge augmented mask prediction.
Args:
......@@ -87,7 +87,7 @@ def merge_aug_masks(aug_masks, bboxes, img_metas, rcnn_test_cfg, weights=None):
tuple: (bboxes, scores)
"""
recovered_masks = [
mask if not img_info['flip'][0] else mask[..., ::-1]
mask if not img_info[0]['flip'] else mask[..., ::-1]
for mask, img_info in zip(aug_masks, img_metas)
]
if weights is None:
......
from .dist_utils import *
from .hooks import *
from .misc import *
from .dist_utils import (init_dist, reduce_grads, DistOptimizerHook,
DistSamplerSeedHook)
from .hooks import (EmptyCacheHook, DistEvalHook, DistEvalRecallHook,
CocoDistEvalmAPHook)
from .misc import tensor2imgs, unmap, results2json, multi_apply
__all__ = [
'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook',
'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook',
'CocoDistEvalmAPHook', 'tensor2imgs', 'unmap', 'results2json',
'multi_apply'
]
......@@ -8,10 +8,6 @@ from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
from torch.nn.utils import clip_grad
from mmcv.torchpack import Hook, OptimizerHook
__all__ = [
'init_dist', 'reduce_grads', 'DistOptimizerHook', 'DistSamplerSeedHook'
]
def init_dist(launcher, backend='nccl', **kwargs):
if mp.get_start_method(allow_none=True) is None:
......
......@@ -13,11 +13,6 @@ from pycocotools.cocoeval import COCOeval
from ..eval import eval_recalls
__all__ = [
'EmptyCacheHook', 'DistEvalHook', 'DistEvalRecallHook',
'CocoDistEvalmAPHook'
]
class EmptyCacheHook(Hook):
......
......@@ -4,9 +4,6 @@ import mmcv
import numpy as np
from six.moves import map, zip
__all__ = ['tensor2imgs', 'multi_apply', 'unmap', 'results2json']
def tensor2imgs(tensor, mean=(0, 0, 0), std=(1, 1, 1), to_rgb=True):
num_imgs = tensor.size(0)
mean = np.array(mean, dtype=np.float32)
......@@ -48,54 +45,54 @@ def xyxy2xywh(bbox):
]
def det2json(dataset, results):
def proposal2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
bboxes = results[idx]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
data['category_id'] = 1
json_results.append(data)
return json_results
def segm2json(dataset, results):
def det2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
result = results[idx]
for label in range(len(result)):
bboxes = result[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
def proposal2json(dataset, results):
def segm2json(dataset, results):
json_results = []
for idx in range(len(dataset)):
img_id = dataset.img_ids[idx]
bboxes = results[idx]
det, seg = results[idx]
for label in range(len(det)):
bboxes = det[label]
segms = seg[label]
for i in range(bboxes.shape[0]):
data = dict()
data['image_id'] = img_id
data['bbox'] = xyxy2xywh(bboxes[i])
data['score'] = float(bboxes[i][4])
data['category_id'] = 1
data['category_id'] = dataset.cat_ids[label]
segms[i]['counts'] = segms[i]['counts'].decode()
data['segmentation'] = segms[i]
json_results.append(data)
return json_results
......
......@@ -109,7 +109,7 @@ class BBoxHead(nn.Module):
# TODO: add clip here
if rescale:
bboxes /= scale_factor.float()
bboxes /= scale_factor
if nms_cfg is None:
return bboxes, scores
......
from .base import BaseDetector
from .rpn import RPN
from .faster_rcnn import FasterRCNN
from .mask_rcnn import MaskRCNN
__all__ = ['BaseDetector', 'RPN']
__all__ = ['BaseDetector', 'RPN', 'FasterRCNN', 'MaskRCNN']
import torch
import torch.nn as nn
from .. import builder
from mmdet.core import (bbox2roi, bbox_mapping, split_combined_gt_polys,
bbox2result, multiclass_nms, merge_aug_proposals,
merge_aug_bboxes, merge_aug_masks, sample_proposals)
class Detector(nn.Module):
def __init__(self,
backbone,
neck=None,
rpn_head=None,
roi_block=None,
bbox_head=None,
mask_block=None,
mask_head=None,
rpn_train_cfg=None,
rpn_test_cfg=None,
rcnn_train_cfg=None,
rcnn_test_cfg=None,
pretrained=None):
super(Detector, self).__init__()
self.backbone = builder.build_backbone(backbone)
self.with_neck = True if neck is not None else False
if self.with_neck:
self.neck = builder.build_neck(neck)
self.with_rpn = True if rpn_head is not None else False
if self.with_rpn:
self.rpn_head = builder.build_rpn_head(rpn_head)
self.rpn_train_cfg = rpn_train_cfg
self.rpn_test_cfg = rpn_test_cfg
self.with_bbox = True if bbox_head is not None else False
if self.with_bbox:
self.bbox_roi_extractor = builder.build_roi_extractor(roi_block)
self.bbox_head = builder.build_bbox_head(bbox_head)
self.rcnn_train_cfg = rcnn_train_cfg
self.rcnn_test_cfg = rcnn_test_cfg
self.with_mask = True if mask_head is not None else False
if self.with_mask:
self.mask_roi_extractor = builder.build_roi_extractor(mask_block)
self.mask_head = builder.build_mask_head(mask_head)
self.init_weights(pretrained=pretrained)
def init_weights(self, pretrained=None):
if pretrained is not None:
print('load model from: {}'.format(pretrained))
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
if self.with_rpn:
self.rpn_head.init_weights()
if self.with_bbox:
self.bbox_roi_extractor.init_weights()
self.bbox_head.init_weights()
if self.with_mask:
self.mask_roi_extractor.init_weights()
self.mask_head.init_weights()
def forward(self,
img,
img_meta,
gt_bboxes=None,
proposals=None,
gt_labels=None,
gt_bboxes_ignore=None,
gt_mask_polys=None,
gt_poly_lens=None,
num_polys_per_mask=None,
return_loss=True,
return_bboxes=True,
rescale=False):
assert proposals is not None or self.with_rpn, "Only one of proposals file and RPN can exist."
if not return_loss:
return self.test(img, img_meta, proposals, rescale)
else:
losses = dict()
img_shapes = img_meta['img_shape']
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
if self.with_rpn:
rpn_outs = self.rpn_head(x)
rpn_loss_inputs = rpn_outs + (gt_bboxes, img_shapes,
self.rpn_train_cfg)
rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
losses.update(rpn_losses)
if self.with_bbox:
if self.with_rpn:
proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
else:
proposal_list = proposals
(pos_inds, neg_inds, pos_proposals, neg_proposals,
pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels) = sample_proposals(
proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels,
self.rcnn_train_cfg)
labels, label_weights, bbox_targets, bbox_weights = \
self.bbox_head.get_bbox_target(
pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
self.rcnn_train_cfg)
rois = bbox2roi([
torch.cat([pos, neg], dim=0)
for pos, neg in zip(pos_proposals, neg_proposals)
])
# TODO: a more flexible way to configurate feat maps
roi_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
cls_score, bbox_pred = self.bbox_head(roi_feats)
loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, labels,
label_weights, bbox_targets,
bbox_weights)
losses.update(loss_bbox)
if self.with_mask:
gt_polys = split_combined_gt_polys(gt_mask_polys, gt_poly_lens,
num_polys_per_mask)
mask_targets = self.mask_head.get_mask_target(
pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta,
self.rcnn_train_cfg)
pos_rois = bbox2roi(pos_proposals)
mask_feats = self.mask_roi_extractor(
x[:self.mask_roi_extractor.num_inputs], pos_rois)
mask_pred = self.mask_head(mask_feats)
losses['loss_mask'] = self.mask_head.loss(mask_pred, mask_targets,
torch.cat(pos_gt_labels))
return losses
def test(self, imgs, img_metas, proposals=None, rescale=False):
"""Test w/ or w/o augmentations."""
assert isinstance(imgs, list) and isinstance(img_metas, list)
assert len(imgs) == len(img_metas)
img_per_gpu = imgs[0].size(0)
assert img_per_gpu == 1
if len(imgs) == 1:
return self.simple_test(imgs[0], img_metas[0], proposals, rescale)
else:
return self.aug_test(imgs, img_metas, proposals, rescale)
def simple_test_rpn(self, x, img_meta):
img_shapes = img_meta['img_shape']
scale_factor = img_meta['scale_factor']
rpn_outs = self.rpn_head(x)
proposal_inputs = rpn_outs + (img_shapes, self.rpn_test_cfg)
proposal_list = self.rpn_head.get_proposals(*proposal_inputs)[0]
return proposal_list
def simple_test_bboxes(self, x, img_meta, proposals, rescale=False):
"""Test only det bboxes without augmentation."""
rois = bbox2roi(proposals)
roi_feats = self.bbox_roi_extractor(
x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
cls_score, bbox_pred = self.bbox_head(roi_feats)
# image shape of the first image in the batch (only one)
img_shape = img_meta['img_shape'][0]
scale_factor = img_meta['scale_factor']
det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
nms_cfg=self.rcnn_test_cfg)
return det_bboxes, det_labels
def simple_test_mask(self,
x,
img_meta,
det_bboxes,
det_labels,
rescale=False):
# image shape of the first image in the batch (only one)
img_shape = img_meta['img_shape'][0]
scale_factor = img_meta['scale_factor']
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
else:
# if det_bboxes is rescaled to the original image size, we need to
# rescale it back to the testing scale to obtain RoIs.
_bboxes = (det_bboxes[:, :4] * scale_factor.float()
if rescale else det_bboxes)
mask_rois = bbox2roi([_bboxes])
mask_feats = self.mask_roi_extractor(
x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
mask_pred = self.mask_head(mask_feats)
segm_result = self.mask_head.get_seg_masks(
mask_pred,
det_bboxes,
det_labels,
self.rcnn_test_cfg,
ori_scale=img_meta['ori_shape'])
return segm_result
def simple_test(self, img, img_meta, proposals=None, rescale=False):
"""Test without augmentation."""
# get feature maps
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
if self.with_rpn:
proposals = self.simple_test_rpn(x, img_meta)
if self.with_bbox:
# BUG proposals shape?
det_bboxes, det_labels = self.simple_test_bboxes(
x, img_meta, [proposals], rescale=rescale)
bbox_result = bbox2result(det_bboxes, det_labels,
self.bbox_head.num_classes)
if not self.with_mask:
return bbox_result
segm_result = self.simple_test_mask(
x, img_meta, det_bboxes, det_labels, rescale=rescale)
return bbox_result, segm_result
else:
proposals[:, :4] /= img_meta['scale_factor'].float()
return proposals.cpu().numpy()
# TODO aug test haven't been verified
def aug_test_bboxes(self, imgs, img_metas):
"""Test with augmentations for det bboxes."""
# step 1: get RPN proposals for augmented images, apply NMS to the
# union of all proposals.
aug_proposals = []
for img, img_meta in zip(imgs, img_metas):
x = self.backbone(img)
if self.neck is not None:
x = self.neck(x)
rpn_outs = self.rpn_head(x)
proposal_inputs = rpn_outs + (img_meta['shape_scale'],
self.rpn_test_cfg)
proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
assert len(proposal_list) == 1
aug_proposals.append(proposal_list[0]) # len(proposal_list) = 1
# after merging, proposals will be rescaled to the original image size
merged_proposals = merge_aug_proposals(aug_proposals, img_metas,
self.rpn_test_cfg)
# step 2: Given merged proposals, predict bboxes for augmented images,
# output the union of these bboxes.
aug_bboxes = []
aug_scores = []
for img, img_meta in zip(imgs, img_metas):
# only one image in the batch
img_shape = img_meta['shape_scale'][0]
flip = img_meta['flip'][0]
proposals = bbox_mapping(merged_proposals[:, :4], img_shape, flip)
rois = bbox2roi([proposals])
# recompute feature maps to save GPU memory
x = self.backbone(img)
if self.neck is not None:
x = self.neck(x)
roi_feats = self.bbox_roi_extractor(
x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
cls_score, bbox_pred = self.bbox_head(roi_feats)
bboxes, scores = self.bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
rescale=False,
nms_cfg=None)
aug_bboxes.append(bboxes)
aug_scores.append(scores)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes, merged_scores = merge_aug_bboxes(
aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg)
det_bboxes, det_labels = multiclass_nms(
merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr,
self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img)
return det_bboxes, det_labels
def aug_test_mask(self,
imgs,
img_metas,
det_bboxes,
det_labels,
rescale=False):
# step 3: Given merged bboxes, predict masks for augmented images,
# scores of masks are averaged across augmented images.
if rescale:
_det_bboxes = det_bboxes
else:
_det_bboxes = det_bboxes.clone()
_det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1]
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
else:
aug_masks = []
for img, img_meta in zip(imgs, img_metas):
img_shape = img_meta['shape_scale'][0]
flip = img_meta['flip'][0]
_bboxes = bbox_mapping(det_bboxes[:, :4], img_shape, flip)
mask_rois = bbox2roi([_bboxes])
x = self.backbone(img)
if self.neck is not None:
x = self.neck(x)
mask_feats = self.mask_roi_extractor(
x[:len(self.mask_roi_extractor.featmap_strides)],
mask_rois)
mask_pred = self.mask_head(mask_feats)
# convert to numpy array to save memory
aug_masks.append(mask_pred.sigmoid().cpu().numpy())
merged_masks = merge_aug_masks(aug_masks, img_metas,
self.rcnn_test_cfg)
segm_result = self.mask_head.get_seg_masks(
merged_masks, _det_bboxes, det_labels,
img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale)
return segm_result
def aug_test(self, imgs, img_metas, rescale=False):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
if imgs[0].
"""
# aug test det bboxes
det_bboxes, det_labels = self.aug_test_bboxes(imgs, img_metas)
if rescale:
_det_bboxes = det_bboxes
else:
_det_bboxes = det_bboxes.clone()
_det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1]
bbox_result = bbox2result(_det_bboxes, det_labels,
self.bbox_head.num_classes)
if not self.with_mask:
return bbox_result
segm_result = self.aug_test_mask(
imgs, img_metas, det_bboxes, det_labels, rescale=rescale)
return bbox_result, segm_result
from .two_stage import TwoStageDetector
class FasterRCNN(TwoStageDetector):
def __init__(self,
backbone,
neck,
rpn_head,
bbox_roi_extractor,
bbox_head,
train_cfg,
test_cfg,
pretrained=None):
super(FasterRCNN, self).__init__(
backbone=backbone,
neck=neck,
rpn_head=rpn_head,
bbox_roi_extractor=bbox_roi_extractor,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
from .two_stage import TwoStageDetector
class MaskRCNN(TwoStageDetector):
def __init__(self,
backbone,
neck,
rpn_head,
bbox_roi_extractor,
bbox_head,
mask_roi_extractor,
mask_head,
train_cfg,
test_cfg,
pretrained=None):
super(MaskRCNN, self).__init__(
backbone=backbone,
neck=neck,
rpn_head=rpn_head,
bbox_roi_extractor=bbox_roi_extractor,
bbox_head=bbox_head,
mask_roi_extractor=mask_roi_extractor,
mask_head=mask_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained)
......@@ -2,7 +2,7 @@ import mmcv
from mmdet.core import tensor2imgs, bbox_mapping
from .base import BaseDetector
from .testing_mixins import RPNTestMixin
from .test_mixins import RPNTestMixin
from .. import builder
......
......@@ -50,7 +50,7 @@ class BBoxTestMixin(object):
nms_cfg=rcnn_test_cfg)
return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposals, rcnn_test_cfg):
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
aug_bboxes = []
aug_scores = []
for x, img_meta in zip(feats, img_metas):
......@@ -58,8 +58,9 @@ class BBoxTestMixin(object):
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
flip = img_meta[0]['flip']
proposals = bbox_mapping(proposals[:, :4], img_shape, scale_factor,
flip)
# TODO more flexible
proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
scale_factor, flip)
rois = bbox2roi([proposals])
# recompute feature maps to save GPU memory
roi_feats = self.bbox_roi_extractor(
......@@ -70,16 +71,17 @@ class BBoxTestMixin(object):
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=False,
nms_cfg=None)
aug_bboxes.append(bboxes)
aug_scores.append(scores)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes, merged_scores = merge_aug_bboxes(
aug_bboxes, aug_scores, img_metas, self.rcnn_test_cfg)
aug_bboxes, aug_scores, img_metas, self.test_cfg.rcnn)
det_bboxes, det_labels = multiclass_nms(
merged_bboxes, merged_scores, self.rcnn_test_cfg.score_thr,
self.rcnn_test_cfg.nms_thr, self.rcnn_test_cfg.max_per_img)
merged_bboxes, merged_scores, self.test_cfg.rcnn.score_thr,
self.test_cfg.rcnn.nms_thr, self.test_cfg.rcnn.max_per_img)
return det_bboxes, det_labels
......@@ -92,7 +94,7 @@ class MaskTestMixin(object):
det_labels,
rescale=False):
# image shape of the first image in the batch (only one)
img_shape = img_meta[0]['img_shape']
ori_shape = img_meta[0]['ori_shape']
scale_factor = img_meta[0]['scale_factor']
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
......@@ -106,21 +108,11 @@ class MaskTestMixin(object):
x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
mask_pred = self.mask_head(mask_feats)
segm_result = self.mask_head.get_seg_masks(
mask_pred, det_bboxes, det_labels, img_shape,
self.rcnn_test_cfg, rescale)
mask_pred, det_bboxes, det_labels, self.test_cfg.rcnn,
ori_shape)
return segm_result
def aug_test_mask(self,
feats,
img_metas,
det_bboxes,
det_labels,
rescale=False):
if rescale:
_det_bboxes = det_bboxes
else:
_det_bboxes = det_bboxes.clone()
_det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes - 1)]
else:
......@@ -139,8 +131,10 @@ class MaskTestMixin(object):
# convert to numpy array to save memory
aug_masks.append(mask_pred.sigmoid().cpu().numpy())
merged_masks = merge_aug_masks(aug_masks, img_metas,
self.rcnn_test_cfg)
self.test_cfg.rcnn)
ori_shape = img_metas[0][0]['ori_shape']
segm_result = self.mask_head.get_seg_masks(
merged_masks, _det_bboxes, det_labels,
img_metas[0]['shape_scale'][0], self.rcnn_test_cfg, rescale)
merged_masks, det_bboxes, det_labels, self.test_cfg.rcnn,
ori_shape)
return segm_result
import torch
import torch.nn as nn
from .base import Detector
from .testing_mixins import RPNTestMixin, BBoxTestMixin
from .base import BaseDetector
from .test_mixins import RPNTestMixin, BBoxTestMixin, MaskTestMixin
from .. import builder
from mmdet.core import bbox2roi, bbox2result, sample_proposals
from mmdet.core import bbox2roi, bbox2result, split_combined_polys, multi_apply
class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
MaskTestMixin):
def __init__(self,
backbone,
......@@ -15,13 +16,16 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
rpn_head=None,
bbox_roi_extractor=None,
bbox_head=None,
mask_roi_extractor=None,
mask_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(Detector, self).__init__()
super(TwoStageDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
self.with_neck = True if neck is not None else False
assert self.with_neck, "TwoStageDetector must be implemented with FPN now."
if self.with_neck:
self.neck = builder.build_neck(neck)
......@@ -35,6 +39,12 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
bbox_roi_extractor)
self.bbox_head = builder.build_bbox_head(bbox_head)
self.with_mask = True if mask_head is not None else False
if self.with_mask:
self.mask_roi_extractor = builder.build_roi_extractor(
mask_roi_extractor)
self.mask_head = builder.build_mask_head(mask_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
......@@ -68,6 +78,7 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
gt_bboxes,
gt_bboxes_ignore,
gt_labels,
gt_masks=None,
proposals=None):
losses = dict()
......@@ -80,22 +91,22 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
rpn_losses = self.rpn_head.loss(*rpn_loss_inputs)
losses.update(rpn_losses)
proposal_inputs = rpn_outs + (img_meta, self.self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_meta, self.test_cfg.rpn)
proposal_list = self.rpn_head.get_proposals(*proposal_inputs)
else:
proposal_list = proposals
(pos_inds, neg_inds, pos_proposals, neg_proposals,
pos_assigned_gt_inds,
pos_gt_bboxes, pos_gt_labels) = sample_proposals(
proposal_list, gt_bboxes, gt_bboxes_ignore, gt_labels,
self.train_cfg.rcnn)
if self.with_bbox:
rcnn_train_cfg_list = [
self.train_cfg.rcnn for _ in range(len(proposal_list))
]
(pos_proposals, neg_proposals, pos_assigned_gt_inds, pos_gt_bboxes,
pos_gt_labels) = multi_apply(
self.bbox_roi_extractor.sample_proposals, proposal_list,
gt_bboxes, gt_bboxes_ignore, gt_labels, rcnn_train_cfg_list)
labels, label_weights, bbox_targets, bbox_weights = \
self.bbox_head.get_bbox_target(
pos_proposals, neg_proposals, pos_gt_bboxes, pos_gt_labels,
self.train_cfg.rcnn)
self.bbox_head.get_bbox_target(pos_proposals, neg_proposals,
pos_gt_bboxes, pos_gt_labels, self.train_cfg.rcnn)
rois = bbox2roi([
torch.cat([pos, neg], dim=0)
......@@ -111,23 +122,42 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
bbox_weights)
losses.update(loss_bbox)
if self.with_mask:
gt_polys = split_combined_polys(**gt_masks)
mask_targets = self.mask_head.get_mask_target(
pos_proposals, pos_assigned_gt_inds, gt_polys, img_meta,
self.train_cfg.rcnn)
pos_rois = bbox2roi(pos_proposals)
mask_feats = self.mask_roi_extractor(
x[:self.mask_roi_extractor.num_inputs], pos_rois)
mask_pred = self.mask_head(mask_feats)
loss_mask = self.mask_head.loss(mask_pred, mask_targets,
torch.cat(pos_gt_labels))
losses.update(loss_mask)
return losses
def simple_test(self, img, img_meta, proposals=None, rescale=False):
"""Test without augmentation."""
assert proposals == None, "Fast RCNN hasn't been implemented."
assert self.with_bbox, "Bbox head must be implemented."
x = self.extract_feat(img)
if proposals is None:
proposals = self.simple_test_rpn(x, img_meta)
if self.with_bbox:
# BUG proposals shape?
proposal_list = self.simple_test_rpn(
x, img_meta, self.test_cfg.rpn) if proposals is None else proposals
det_bboxes, det_labels = self.simple_test_bboxes(
x, img_meta, [proposals], rescale=rescale)
bbox_result = bbox2result(det_bboxes, det_labels,
x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)
bbox_results = bbox2result(det_bboxes, det_labels,
self.bbox_head.num_classes)
return bbox_result
if self.with_mask:
segm_results = self.simple_test_mask(
x, img_meta, det_bboxes, det_labels, rescale=rescale)
return bbox_results, segm_results
else:
proposals[:, :4] /= img_meta['scale_factor'].float()
return proposals.cpu().numpy()
return bbox_results
def aug_test(self, imgs, img_metas, rescale=False):
"""Test with augmentations.
......@@ -135,15 +165,28 @@ class TwoStageDetector(Detector, RPNTestMixin, BBoxTestMixin):
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
proposals = self.aug_test_rpn(
self.extract_feats(imgs), img_metas, self.rpn_test_cfg)
# recompute self.extract_feats(imgs) because of 'yield' and memory
proposal_list = self.aug_test_rpn(
self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
det_bboxes, det_labels = self.aug_test_bboxes(
self.extract_feats(imgs), img_metas, proposals, self.rcnn_test_cfg)
self.extract_feats(imgs), img_metas, proposal_list,
self.test_cfg.rcnn)
if rescale:
_det_bboxes = det_bboxes
else:
_det_bboxes = det_bboxes.clone()
_det_bboxes[:, :4] *= img_metas[0]['shape_scale'][0][-1]
bbox_result = bbox2result(_det_bboxes, det_labels,
_det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
bbox_results = bbox2result(_det_bboxes, det_labels,
self.bbox_head.num_classes)
return bbox_result
# det_bboxes always keep the original scale
if self.with_mask:
segm_results = self.aug_test_mask(
self.extract_feats(imgs),
img_metas,
det_bboxes,
det_labels)
return bbox_results, segm_results
else:
return bbox_results
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment