Commit ba492be7 authored by zhangwenwei's avatar zhangwenwei
Browse files

Use MMDet API and pass CI

parent 9466dff7
...@@ -2,10 +2,10 @@ import numpy as np ...@@ -2,10 +2,10 @@ import numpy as np
from mmdet3d.core.bbox import box_np_ops from mmdet3d.core.bbox import box_np_ops
from mmdet3d.utils import build_from_cfg from mmdet3d.utils import build_from_cfg
from mmdet.datasets.pipelines import RandomFlip
from mmdet.datasets.registry import PIPELINES from mmdet.datasets.registry import PIPELINES
from ..registry import OBJECTSAMPLERS from ..registry import OBJECTSAMPLERS
from .data_augment_utils import noise_per_object_v3_ from .data_augment_utils import noise_per_object_v3_
from .transforms import RandomFlip
@PIPELINES.register_module @PIPELINES.register_module
......
from mmdet3d.utils import Registry from mmdet.utils import Registry
OBJECTSAMPLERS = Registry('object_sampler') OBJECTSAMPLERS = Registry('Object sampler')
from .anchor_heads import * # noqa: F401,F403 from .anchor_heads import * # noqa: F401,F403
from .backbones import * # noqa: F401,F403 from .backbones import * # noqa: F401,F403
from .bbox_heads import * # noqa: F401,F403 from .bbox_heads import * # noqa: F401,F403
from .builder import (build_backbone, build_detector, build_head, build_loss, from .builder import (build_backbone, build_detector, build_fusion_layer,
build_neck, build_roi_extractor, build_shared_head) build_head, build_loss, build_middle_encoder, build_neck,
build_roi_extractor, build_shared_head,
build_voxel_encoder)
from .detectors import * # noqa: F401,F403 from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403 from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403 from .losses import * # noqa: F401,F403
from .middle_encoders import * # noqa: F401,F403 from .middle_encoders import * # noqa: F401,F403
from .necks import * # noqa: F401,F403 from .necks import * # noqa: F401,F403
from .registry import (BACKBONES, DETECTORS, HEADS, LOSSES, MIDDLE_ENCODERS, from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
NECKS, ROI_EXTRACTORS, SHARED_HEADS, VOXEL_ENCODERS)
from .roi_extractors import * # noqa: F401,F403 from .roi_extractors import * # noqa: F401,F403
from .voxel_encoders import * # noqa: F401,F403 from .voxel_encoders import * # noqa: F401,F403
__all__ = [ __all__ = [
'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES', 'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 'FUSION_LAYERS', 'build_backbone',
'VOXEL_ENCODERS', 'MIDDLE_ENCODERS', 'DETECTORS', 'build_backbone',
'build_neck', 'build_roi_extractor', 'build_shared_head', 'build_head', 'build_neck', 'build_roi_extractor', 'build_shared_head', 'build_head',
'build_loss', 'build_detector' 'build_loss', 'build_detector', 'build_fusion_layer',
'build_middle_encoder', 'build_voxel_encoder'
] ]
...@@ -4,7 +4,7 @@ from mmcv.cnn import normal_init ...@@ -4,7 +4,7 @@ from mmcv.cnn import normal_init
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from ..registry import HEADS from mmdet.models import HEADS
from ..utils import bias_init_with_prob from ..utils import bias_init_with_prob
from .second_head import SECONDHead from .second_head import SECONDHead
......
...@@ -10,8 +10,8 @@ from mmdet3d.core import (PseudoSampler, box_torch_ops, ...@@ -10,8 +10,8 @@ from mmdet3d.core import (PseudoSampler, box_torch_ops,
build_assigner, build_bbox_coder, build_sampler, build_assigner, build_bbox_coder, build_sampler,
multi_apply) multi_apply)
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS
from ..builder import build_loss from ..builder import build_loss
from ..registry import HEADS
from ..utils import bias_init_with_prob from ..utils import bias_init_with_prob
from .train_mixins import AnchorTrainMixin from .train_mixins import AnchorTrainMixin
......
import numpy as np import numpy as np
import torch import torch
from mmdet3d.core import box_torch_ops, images_to_levels, multi_apply from mmdet3d.core import box_torch_ops, multi_apply
from mmdet.core import images_to_levels
class AnchorTrainMixin(object): class AnchorTrainMixin(object):
......
from mmdet.models.backbone import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt from mmdet.models.backbones import SSDVGG, HRNet, ResNet, ResNetV1d, ResNeXt
from .second import SECOND from .second import SECOND
__all__ = ['ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'SECOND'] __all__ = ['ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet', 'SECOND']
...@@ -3,8 +3,8 @@ from functools import partial ...@@ -3,8 +3,8 @@ from functools import partial
import torch.nn as nn import torch.nn as nn
from mmcv.runner import load_checkpoint from mmcv.runner import load_checkpoint
from ..registry import BACKBONES from mmdet.models import BACKBONES
from ..utils import build_norm_layer from mmdet.ops import build_norm_layer
class Empty(nn.Module): class Empty(nn.Module):
......
from torch import nn from mmdet.models.builder import build
from mmdet.models.registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS, from mmdet.models.registry import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,
ROI_EXTRACTORS, SHARED_HEADS) ROI_EXTRACTORS, SHARED_HEADS)
from ..utils import build_from_cfg
from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS from .registry import FUSION_LAYERS, MIDDLE_ENCODERS, VOXEL_ENCODERS
def build(cfg, registry, default_args=None):
if isinstance(cfg, list):
modules = [
build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
]
return nn.Sequential(*modules)
else:
return build_from_cfg(cfg, registry, default_args)
def build_backbone(cfg): def build_backbone(cfg):
return build(cfg, BACKBONES) return build(cfg, BACKBONES)
......
...@@ -3,12 +3,10 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2, ...@@ -3,12 +3,10 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
DynamicMVXFasterRCNNV3) DynamicMVXFasterRCNNV3)
from .mvx_single_stage import MVXSingleStageDetector from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
from .single_stage import SingleStageDetector
from .two_stage import TwoStageDetector
from .voxelnet import DynamicVoxelNet, VoxelNet from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [ __all__ = [
'BaseDetector', 'SingleStageDetector', 'VoxelNet', 'DynamicVoxelNet', 'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'TwoStageDetector', 'MVXSingleStageDetector', 'MVXTwoStageDetector', 'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2', 'DynamicMVXFasterRCNNV3' 'DynamicMVXFasterRCNNV3'
] ]
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from mmdet.models.registry import DETECTORS from mmdet.models import DETECTORS
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
......
...@@ -3,7 +3,7 @@ import torch.nn as nn ...@@ -3,7 +3,7 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmdet3d.ops import Voxelization from mmdet3d.ops import Voxelization
from mmdet.models.registry import DETECTORS from mmdet.models import DETECTORS
from .. import builder from .. import builder
from .base import BaseDetector from .base import BaseDetector
......
...@@ -2,17 +2,14 @@ import torch ...@@ -2,17 +2,14 @@ import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmdet3d.core import (bbox2result_coco, bbox2roi, build_assigner,
build_sampler)
from mmdet3d.ops import Voxelization from mmdet3d.ops import Voxelization
from mmdet.models.registry import DETECTORS from mmdet.models import DETECTORS
from .. import builder from .. import builder
from .base import BaseDetector from .base import BaseDetector
from .test_mixins import BBoxTestMixin, RPNTestMixin
@DETECTORS.register_module @DETECTORS.register_module
class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): class MVXTwoStageDetector(BaseDetector):
def __init__(self, def __init__(self,
pts_voxel_layer=None, pts_voxel_layer=None,
...@@ -24,10 +21,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -24,10 +21,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
img_neck=None, img_neck=None,
pts_neck=None, pts_neck=None,
pts_bbox_head=None, pts_bbox_head=None,
img_bbox_head=None, img_roi_head=None,
img_shared_head=None,
img_rpn_head=None, img_rpn_head=None,
img_bbox_roi_extractor=None,
train_cfg=None, train_cfg=None,
test_cfg=None, test_cfg=None,
pretrained=None): pretrained=None):
...@@ -59,14 +54,10 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -59,14 +54,10 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
self.img_backbone = builder.build_backbone(img_backbone) self.img_backbone = builder.build_backbone(img_backbone)
if img_neck is not None: if img_neck is not None:
self.img_neck = builder.build_neck(img_neck) self.img_neck = builder.build_neck(img_neck)
if img_shared_head is not None:
self.img_shared_head = builder.build_shared_head(img_shared_head)
if img_rpn_head is not None: if img_rpn_head is not None:
self.img_rpn_head = builder.build_head(img_rpn_head) self.img_rpn_head = builder.build_head(img_rpn_head)
if img_bbox_head is not None: if img_roi_head is not None:
self.img_bbox_roi_extractor = builder.build_roi_extractor( self.img_roi_head = builder.build_head(img_roi_head)
img_bbox_roi_extractor)
self.img_bbox_head = builder.build_head(img_bbox_head)
self.train_cfg = train_cfg self.train_cfg = train_cfg
self.test_cfg = test_cfg self.test_cfg = test_cfg
...@@ -140,9 +131,6 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -140,9 +131,6 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
return None return None
if self.with_img_neck: if self.with_img_neck:
img_feats = self.img_neck(img_feats) img_feats = self.img_neck(img_feats)
if torch.isnan(img_feats[0]).any():
import pdb
pdb.set_trace()
return img_feats return img_feats
def extract_pts_feat(self, pts, img_feats, img_meta): def extract_pts_feat(self, pts, img_feats, img_meta):
...@@ -227,7 +215,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -227,7 +215,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
gt_bboxes, gt_bboxes,
gt_labels, gt_labels,
gt_bboxes_ignore=None, gt_bboxes_ignore=None,
proposals=None): proposals=None,
**kwargs):
losses = dict() losses = dict()
# RPN forward and loss # RPN forward and loss
if self.with_img_rpn: if self.with_img_rpn:
...@@ -245,45 +234,14 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -245,45 +234,14 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
else: else:
proposal_list = proposals proposal_list = proposals
# assign gts and sample proposals
if self.with_img_bbox:
bbox_assigner = build_assigner(self.train_cfg.img_rcnn.assigner)
bbox_sampler = build_sampler(
self.train_cfg.img_rcnn.sampler, context=self)
num_imgs = len(img_meta)
if gt_bboxes_ignore is None:
gt_bboxes_ignore = [None for _ in range(num_imgs)]
sampling_results = []
for i in range(num_imgs):
assign_result = bbox_assigner.assign(proposal_list[i],
gt_bboxes[i],
gt_bboxes_ignore[i],
gt_labels[i])
sampling_result = bbox_sampler.sample(
assign_result,
proposal_list[i],
gt_bboxes[i],
gt_labels[i],
feats=[lvl_feat[i][None] for lvl_feat in x])
sampling_results.append(sampling_result)
# bbox head forward and loss # bbox head forward and loss
if self.with_img_bbox: img_roi_losses = self.roi_head.forward_train(x, img_meta,
rois = bbox2roi([res.bboxes for res in sampling_results]) proposal_list, gt_bboxes,
# TODO: a more flexible way to decide which feature maps to use gt_labels,
bbox_feats = self.img_bbox_roi_extractor( gt_bboxes_ignore,
x[:self.img_bbox_roi_extractor.num_inputs], rois) **kwargs)
if self.with_shared_head:
bbox_feats = self.img_shared_head(bbox_feats)
cls_score, bbox_pred = self.img_bbox_head(bbox_feats)
bbox_targets = self.img_bbox_head.get_target(
sampling_results, gt_bboxes, gt_labels,
self.train_cfg.img_rcnn)
loss_bbox = self.img_bbox_head.loss(cls_score, bbox_pred,
*bbox_targets)
losses.update(loss_bbox)
losses.update(img_roi_losses)
return losses return losses
def forward_test(self, **kwargs): def forward_test(self, **kwargs):
...@@ -303,42 +261,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin): ...@@ -303,42 +261,8 @@ class MVXTwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin):
else: else:
proposal_list = proposals proposal_list = proposals
det_bboxes, det_labels = self.simple_test_bboxes( return self.img_roi_head.simple_test(
x, x, proposal_list, img_meta, rescale=rescale)
img_meta,
proposal_list,
self.test_cfg.img_rcnn,
rescale=rescale)
bbox_results = bbox2result_coco(det_bboxes, det_labels,
self.img_bbox_head.num_classes)
return bbox_results
def simple_test_bboxes(self,
x,
img_meta,
proposals,
rcnn_test_cfg,
rescale=False):
"""Test only det bboxes without augmentation."""
rois = bbox2roi(proposals)
roi_feats = self.img_bbox_roi_extractor(
x[:len(self.img_bbox_roi_extractor.featmap_strides)], rois)
if self.with_img_shared_head:
roi_feats = self.img_shared_head(roi_feats)
cls_score, bbox_pred = self.img_bbox_head(roi_feats)
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
det_bboxes, det_labels = self.img_bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
cfg=rcnn_test_cfg)
return det_bboxes, det_labels
def simple_test_rpn(self, x, img_meta, rpn_test_cfg): def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
rpn_outs = self.img_rpn_head(x) rpn_outs = self.img_rpn_head(x)
......
import torch.nn as nn
from mmdet3d.core import bbox2result_coco
from mmdet.models.registry import DETECTORS
from .. import builder
from .base import BaseDetector
@DETECTORS.register_module
class SingleStageDetector(BaseDetector):
"""Base class for single-stage detectors.
Single-stage detectors directly and densely predict bounding boxes on the
output features of the backbone+neck.
"""
def __init__(self,
backbone,
neck=None,
bbox_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SingleStageDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
self.bbox_head = builder.build_head(bbox_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
def init_weights(self, pretrained=None):
super(SingleStageDetector, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
self.bbox_head.init_weights()
def extract_feat(self, img):
"""Directly extract features from the backbone+neck
"""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmedetection/tools/get_flops.py`
"""
x = self.extract_feat(img)
outs = self.bbox_head(x)
return outs
def forward_train(self,
img,
img_metas,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None):
x = self.extract_feat(img)
outs = self.bbox_head(x)
loss_inputs = outs + (gt_bboxes, gt_labels, img_metas, self.train_cfg)
losses = self.bbox_head.loss(
*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
return losses
def simple_test(self, img, img_meta, rescale=False):
x = self.extract_feat(img)
outs = self.bbox_head(x)
bbox_inputs = outs + (img_meta, self.test_cfg, rescale)
bbox_list = self.bbox_head.get_bboxes(*bbox_inputs)
bbox_results = [
bbox2result_coco(det_bboxes, det_labels,
self.bbox_head.num_classes)
for det_bboxes, det_labels in bbox_list
]
return bbox_results[0]
def aug_test(self, imgs, img_metas, rescale=False):
raise NotImplementedError
import logging
import sys
import torch
from mmdet3d.core import (bbox2roi, bbox_mapping, merge_aug_bboxes,
merge_aug_masks, merge_aug_proposals, multiclass_nms)
logger = logging.getLogger(__name__)
if sys.version_info >= (3, 7):
from mmdet3d.utils.contextmanagers import completed
class RPNTestMixin(object):
if sys.version_info >= (3, 7):
async def async_test_rpn(self, x, img_meta, rpn_test_cfg):
sleep_interval = rpn_test_cfg.pop('async_sleep_interval', 0.025)
async with completed(
__name__, 'rpn_head_forward',
sleep_interval=sleep_interval):
rpn_outs = self.rpn_head(x)
proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
return proposal_list
def simple_test_rpn(self, x, img_meta, rpn_test_cfg):
rpn_outs = self.rpn_head(x)
proposal_inputs = rpn_outs + (img_meta, rpn_test_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
return proposal_list
def aug_test_rpn(self, feats, img_metas, rpn_test_cfg):
imgs_per_gpu = len(img_metas[0])
aug_proposals = [[] for _ in range(imgs_per_gpu)]
for x, img_meta in zip(feats, img_metas):
proposal_list = self.simple_test_rpn(x, img_meta, rpn_test_cfg)
for i, proposals in enumerate(proposal_list):
aug_proposals[i].append(proposals)
# reorganize the order of 'img_metas' to match the dimensions
# of 'aug_proposals'
aug_img_metas = []
for i in range(imgs_per_gpu):
aug_img_meta = []
for j in range(len(img_metas)):
aug_img_meta.append(img_metas[j][i])
aug_img_metas.append(aug_img_meta)
# after merging, proposals will be rescaled to the original image size
merged_proposals = [
merge_aug_proposals(proposals, aug_img_meta, rpn_test_cfg)
for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)
]
return merged_proposals
class BBoxTestMixin(object):
if sys.version_info >= (3, 7):
async def async_test_bboxes(self,
x,
img_meta,
proposals,
rcnn_test_cfg,
rescale=False,
bbox_semaphore=None,
global_lock=None):
"""Async test only det bboxes without augmentation."""
rois = bbox2roi(proposals)
roi_feats = self.bbox_roi_extractor(
x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
if self.with_shared_head:
roi_feats = self.shared_head(roi_feats)
sleep_interval = rcnn_test_cfg.get('async_sleep_interval', 0.017)
async with completed(
__name__, 'bbox_head_forward',
sleep_interval=sleep_interval):
cls_score, bbox_pred = self.bbox_head(roi_feats)
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
cfg=rcnn_test_cfg)
return det_bboxes, det_labels
def simple_test_bboxes(self,
x,
img_meta,
proposals,
rcnn_test_cfg,
rescale=False):
"""Test only det bboxes without augmentation."""
rois = bbox2roi(proposals)
roi_feats = self.bbox_roi_extractor(
x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
if self.with_shared_head:
roi_feats = self.shared_head(roi_feats)
cls_score, bbox_pred = self.bbox_head(roi_feats)
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
det_bboxes, det_labels = self.bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=rescale,
cfg=rcnn_test_cfg)
return det_bboxes, det_labels
def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):
aug_bboxes = []
aug_scores = []
for x, img_meta in zip(feats, img_metas):
# only one image in the batch
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
flip = img_meta[0]['flip']
# TODO more flexible
proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,
scale_factor, flip)
rois = bbox2roi([proposals])
# recompute feature maps to save GPU memory
roi_feats = self.bbox_roi_extractor(
x[:len(self.bbox_roi_extractor.featmap_strides)], rois)
if self.with_shared_head:
roi_feats = self.shared_head(roi_feats)
cls_score, bbox_pred = self.bbox_head(roi_feats)
bboxes, scores = self.bbox_head.get_det_bboxes(
rois,
cls_score,
bbox_pred,
img_shape,
scale_factor,
rescale=False,
cfg=None)
aug_bboxes.append(bboxes)
aug_scores.append(scores)
# after merging, bboxes will be rescaled to the original image size
merged_bboxes, merged_scores = merge_aug_bboxes(
aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)
det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,
rcnn_test_cfg.score_thr,
rcnn_test_cfg.nms,
rcnn_test_cfg.max_per_img)
return det_bboxes, det_labels
class MaskTestMixin(object):
if sys.version_info >= (3, 7):
async def async_test_mask(self,
x,
img_meta,
det_bboxes,
det_labels,
rescale=False,
mask_test_cfg=None):
# image shape of the first image in the batch (only one)
ori_shape = img_meta[0]['ori_shape']
scale_factor = img_meta[0]['scale_factor']
if det_bboxes.shape[0] == 0:
segm_result = [[]
for _ in range(self.mask_head.num_classes - 1)]
else:
_bboxes = (
det_bboxes[:, :4] *
scale_factor if rescale else det_bboxes)
mask_rois = bbox2roi([_bboxes])
mask_feats = self.mask_roi_extractor(
x[:len(self.mask_roi_extractor.featmap_strides)],
mask_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
if mask_test_cfg and mask_test_cfg.get('async_sleep_interval'):
sleep_interval = mask_test_cfg['async_sleep_interval']
else:
sleep_interval = 0.035
async with completed(
__name__,
'mask_head_forward',
sleep_interval=sleep_interval):
mask_pred = self.mask_head(mask_feats)
segm_result = self.mask_head.get_seg_masks(
mask_pred, _bboxes, det_labels, self.test_cfg.rcnn,
ori_shape, scale_factor, rescale)
return segm_result
def simple_test_mask(self,
x,
img_meta,
det_bboxes,
det_labels,
rescale=False):
# image shape of the first image in the batch (only one)
ori_shape = img_meta[0]['ori_shape']
scale_factor = img_meta[0]['scale_factor']
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes)]
else:
# if det_bboxes is rescaled to the original image size, we need to
# rescale it back to the testing scale to obtain RoIs.
if rescale and not isinstance(scale_factor, float):
scale_factor = torch.from_numpy(scale_factor).to(
det_bboxes.device)
_bboxes = (
det_bboxes[:, :4] * scale_factor if rescale else det_bboxes)
mask_rois = bbox2roi([_bboxes])
mask_feats = self.mask_roi_extractor(
x[:len(self.mask_roi_extractor.featmap_strides)], mask_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
mask_pred = self.mask_head(mask_feats)
segm_result = self.mask_head.get_seg_masks(mask_pred, _bboxes,
det_labels,
self.test_cfg.rcnn,
ori_shape, scale_factor,
rescale)
return segm_result
def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):
if det_bboxes.shape[0] == 0:
segm_result = [[] for _ in range(self.mask_head.num_classes)]
else:
aug_masks = []
for x, img_meta in zip(feats, img_metas):
img_shape = img_meta[0]['img_shape']
scale_factor = img_meta[0]['scale_factor']
flip = img_meta[0]['flip']
_bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,
scale_factor, flip)
mask_rois = bbox2roi([_bboxes])
mask_feats = self.mask_roi_extractor(
x[:len(self.mask_roi_extractor.featmap_strides)],
mask_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
mask_pred = self.mask_head(mask_feats)
# convert to numpy array to save memory
aug_masks.append(mask_pred.sigmoid().cpu().numpy())
merged_masks = merge_aug_masks(aug_masks, img_metas,
self.test_cfg.rcnn)
ori_shape = img_metas[0][0]['ori_shape']
segm_result = self.mask_head.get_seg_masks(
merged_masks,
det_bboxes,
det_labels,
self.test_cfg.rcnn,
ori_shape,
scale_factor=1.0,
rescale=False)
return segm_result
import torch
import torch.nn as nn
from mmdet3d.core import (bbox2result_coco, bbox2roi, build_assigner,
build_sampler)
from mmdet.models.registry import DETECTORS
from .. import builder
from .base import BaseDetector
from .test_mixins import BBoxTestMixin, MaskTestMixin, RPNTestMixin
@DETECTORS.register_module
class TwoStageDetector(BaseDetector, RPNTestMixin, BBoxTestMixin,
MaskTestMixin):
"""Base class for two-stage detectors.
Two-stage detectors typically consisting of a region proposal network and a
task-specific regression head.
"""
def __init__(self,
backbone,
neck=None,
shared_head=None,
rpn_head=None,
bbox_roi_extractor=None,
bbox_head=None,
mask_roi_extractor=None,
mask_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(TwoStageDetector, self).__init__()
self.backbone = builder.build_backbone(backbone)
if neck is not None:
self.neck = builder.build_neck(neck)
if shared_head is not None:
self.shared_head = builder.build_shared_head(shared_head)
if rpn_head is not None:
self.rpn_head = builder.build_head(rpn_head)
if bbox_head is not None:
self.bbox_roi_extractor = builder.build_roi_extractor(
bbox_roi_extractor)
self.bbox_head = builder.build_head(bbox_head)
if mask_head is not None:
if mask_roi_extractor is not None:
self.mask_roi_extractor = builder.build_roi_extractor(
mask_roi_extractor)
self.share_roi_extractor = False
else:
self.share_roi_extractor = True
self.mask_roi_extractor = self.bbox_roi_extractor
self.mask_head = builder.build_head(mask_head)
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.init_weights(pretrained=pretrained)
@property
def with_rpn(self):
return hasattr(self, 'rpn_head') and self.rpn_head is not None
def init_weights(self, pretrained=None):
super(TwoStageDetector, self).init_weights(pretrained)
self.backbone.init_weights(pretrained=pretrained)
if self.with_neck:
if isinstance(self.neck, nn.Sequential):
for m in self.neck:
m.init_weights()
else:
self.neck.init_weights()
if self.with_shared_head:
self.shared_head.init_weights(pretrained=pretrained)
if self.with_rpn:
self.rpn_head.init_weights()
if self.with_bbox:
self.bbox_roi_extractor.init_weights()
self.bbox_head.init_weights()
if self.with_mask:
self.mask_head.init_weights()
if not self.share_roi_extractor:
self.mask_roi_extractor.init_weights()
def extract_feat(self, img):
"""Directly extract features from the backbone+neck
"""
x = self.backbone(img)
if self.with_neck:
x = self.neck(x)
return x
def forward_dummy(self, img):
"""Used for computing network flops.
See `mmedetection/tools/get_flops.py`
"""
outs = ()
# backbone
x = self.extract_feat(img)
# rpn
if self.with_rpn:
rpn_outs = self.rpn_head(x)
outs = outs + (rpn_outs, )
proposals = torch.randn(1000, 4).cuda()
# bbox head
rois = bbox2roi([proposals])
if self.with_bbox:
bbox_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
if self.with_shared_head:
bbox_feats = self.shared_head(bbox_feats)
cls_score, bbox_pred = self.bbox_head(bbox_feats)
outs = outs + (cls_score, bbox_pred)
# mask head
if self.with_mask:
mask_rois = rois[:100]
mask_feats = self.mask_roi_extractor(
x[:self.mask_roi_extractor.num_inputs], mask_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
mask_pred = self.mask_head(mask_feats)
outs = outs + (mask_pred, )
return outs
def forward_train(self,
img,
img_meta,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
proposals=None):
"""
Args:
img (Tensor): of shape (N, C, H, W) encoding input images.
Typically these should be mean centered and std scaled.
img_meta (list[dict]): list of image info dict where each dict has:
'img_shape', 'scale_factor', 'flip', and may also contain
'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
For details on the values of these keys see
`mmdet/datasets/pipelines/formatting.py:Collect`.
gt_bboxes (list[Tensor]): each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
gt_masks (None | Tensor) : true segmentation masks for each box
used if the architecture supports a segmentation task.
proposals : override rpn proposals with custom proposals. Use when
`with_rpn` is False.
Returns:
dict[str, Tensor]: a dictionary of loss components
"""
x = self.extract_feat(img)
losses = dict()
# RPN forward and loss
if self.with_rpn:
rpn_outs = self.rpn_head(x)
rpn_loss_inputs = rpn_outs + (gt_bboxes, img_meta,
self.train_cfg.rpn)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals
# assign gts and sample proposals
if self.with_bbox or self.with_mask:
bbox_assigner = build_assigner(self.train_cfg.rcnn.assigner)
bbox_sampler = build_sampler(
self.train_cfg.rcnn.sampler, context=self)
num_imgs = img.size(0)
if gt_bboxes_ignore is None:
gt_bboxes_ignore = [None for _ in range(num_imgs)]
sampling_results = []
for i in range(num_imgs):
assign_result = bbox_assigner.assign(proposal_list[i],
gt_bboxes[i],
gt_bboxes_ignore[i],
gt_labels[i])
sampling_result = bbox_sampler.sample(
assign_result,
proposal_list[i],
gt_bboxes[i],
gt_labels[i],
feats=[lvl_feat[i][None] for lvl_feat in x])
sampling_results.append(sampling_result)
# bbox head forward and loss
if self.with_bbox:
rois = bbox2roi([res.bboxes for res in sampling_results])
# TODO: a more flexible way to decide which feature maps to use
bbox_feats = self.bbox_roi_extractor(
x[:self.bbox_roi_extractor.num_inputs], rois)
if self.with_shared_head:
bbox_feats = self.shared_head(bbox_feats)
cls_score, bbox_pred = self.bbox_head(bbox_feats)
bbox_targets = self.bbox_head.get_target(sampling_results,
gt_bboxes, gt_labels,
self.train_cfg.rcnn)
loss_bbox = self.bbox_head.loss(cls_score, bbox_pred,
*bbox_targets)
losses.update(loss_bbox)
# mask head forward and loss
if self.with_mask:
if not self.share_roi_extractor:
pos_rois = bbox2roi(
[res.pos_bboxes for res in sampling_results])
mask_feats = self.mask_roi_extractor(
x[:self.mask_roi_extractor.num_inputs], pos_rois)
if self.with_shared_head:
mask_feats = self.shared_head(mask_feats)
else:
pos_inds = []
device = bbox_feats.device
for res in sampling_results:
pos_inds.append(
torch.ones(
res.pos_bboxes.shape[0],
device=device,
dtype=torch.uint8))
pos_inds.append(
torch.zeros(
res.neg_bboxes.shape[0],
device=device,
dtype=torch.uint8))
pos_inds = torch.cat(pos_inds)
mask_feats = bbox_feats[pos_inds]
if mask_feats.shape[0] > 0:
mask_pred = self.mask_head(mask_feats)
mask_targets = self.mask_head.get_target(
sampling_results, gt_masks, self.train_cfg.rcnn)
pos_labels = torch.cat(
[res.pos_gt_labels for res in sampling_results])
loss_mask = self.mask_head.loss(mask_pred, mask_targets,
pos_labels)
losses.update(loss_mask)
return losses
def simple_test(self, img, img_meta, proposals=None, rescale=False):
"""Test without augmentation."""
assert self.with_bbox, 'Bbox head must be implemented.'
x = self.extract_feat(img)
if proposals is None:
proposal_list = self.simple_test_rpn(x, img_meta,
self.test_cfg.rpn)
else:
proposal_list = proposals
det_bboxes, det_labels = self.simple_test_bboxes(
x, img_meta, proposal_list, self.test_cfg.rcnn, rescale=rescale)
bbox_results = bbox2result_coco(det_bboxes, det_labels,
self.bbox_head.num_classes)
if not self.with_mask:
return bbox_results
else:
segm_results = self.simple_test_mask(
x, img_meta, det_bboxes, det_labels, rescale=rescale)
return bbox_results, segm_results
def aug_test(self, imgs, img_metas, rescale=False):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
# recompute feats to save memory
proposal_list = self.aug_test_rpn(
self.extract_feats(imgs), img_metas, self.test_cfg.rpn)
det_bboxes, det_labels = self.aug_test_bboxes(
self.extract_feats(imgs), img_metas, proposal_list,
self.test_cfg.rcnn)
if rescale:
_det_bboxes = det_bboxes
else:
_det_bboxes = det_bboxes.clone()
_det_bboxes[:, :4] *= img_metas[0][0]['scale_factor']
bbox_results = bbox2result_coco(_det_bboxes, det_labels,
self.bbox_head.num_classes)
# det_bboxes always keep the original scale
if self.with_mask:
segm_results = self.aug_test_mask(
self.extract_feats(imgs), img_metas, det_bboxes, det_labels)
return bbox_results, segm_results
else:
return bbox_results
...@@ -2,9 +2,8 @@ import torch ...@@ -2,9 +2,8 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from mmdet3d.ops import Voxelization from mmdet3d.ops import Voxelization
from mmdet.models.registry import DETECTORS from mmdet.models import DETECTORS, SingleStageDetector
from .. import builder from .. import builder
from .single_stage import SingleStageDetector
@DETECTORS.register_module @DETECTORS.register_module
......
...@@ -3,8 +3,7 @@ import torch.nn as nn ...@@ -3,8 +3,7 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from mmcv.cnn import xavier_init from mmcv.cnn import xavier_init
from mmdet3d.models.utils import ConvModule from mmdet.ops import ConvModule
from ..plugins import NonLocal2D
from ..registry import FUSION_LAYERS from ..registry import FUSION_LAYERS
...@@ -110,10 +109,9 @@ class PointFusion(nn.Module): ...@@ -110,10 +109,9 @@ class PointFusion(nn.Module):
img_levels=3, img_levels=3,
conv_cfg=None, conv_cfg=None,
norm_cfg=None, norm_cfg=None,
activation=None, act_cfg=None,
activate_out=True, activate_out=True,
fuse_out=False, fuse_out=False,
refine_type=None,
dropout_ratio=0, dropout_ratio=0,
aligned=True, aligned=True,
align_corners=True, align_corners=True,
...@@ -129,10 +127,9 @@ class PointFusion(nn.Module): ...@@ -129,10 +127,9 @@ class PointFusion(nn.Module):
assert len(img_channels) == len(img_levels) assert len(img_channels) == len(img_levels)
self.img_levels = img_levels self.img_levels = img_levels
self.activation = activation self.act_cfg = act_cfg
self.activate_out = activate_out self.activate_out = activate_out
self.fuse_out = fuse_out self.fuse_out = fuse_out
self.refine_type = refine_type
self.dropout_ratio = dropout_ratio self.dropout_ratio = dropout_ratio
self.img_channels = img_channels self.img_channels = img_channels
self.aligned = aligned self.aligned = aligned
...@@ -150,7 +147,7 @@ class PointFusion(nn.Module): ...@@ -150,7 +147,7 @@ class PointFusion(nn.Module):
padding=1, padding=1,
conv_cfg=conv_cfg, conv_cfg=conv_cfg,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
activation=self.activation, act_cfg=self.act_cfg,
inplace=False) inplace=False)
self.lateral_convs.append(l_conv) self.lateral_convs.append(l_conv)
self.img_transform = nn.Sequential( self.img_transform = nn.Sequential(
...@@ -175,13 +172,6 @@ class PointFusion(nn.Module): ...@@ -175,13 +172,6 @@ class PointFusion(nn.Module):
nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
nn.ReLU(inplace=False)) nn.ReLU(inplace=False))
if self.refine_type == 'non_local':
self.refine = NonLocal2D(
out_channels,
reduction=1,
use_scale=False,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
self.init_weights() self.init_weights()
# default init_weights for conv(msra) and norm in ConvModule # default init_weights for conv(msra) and norm in ConvModule
...@@ -210,16 +200,6 @@ class PointFusion(nn.Module): ...@@ -210,16 +200,6 @@ class PointFusion(nn.Module):
if self.fuse_out: if self.fuse_out:
fuse_out = self.fuse_conv(fuse_out) fuse_out = self.fuse_conv(fuse_out)
if self.refine_type is not None:
fuse_out_T = fuse_out.t()[None, ..., None] # NxC -> 1xCxNx1
batch_idx = 0
attentive = []
for i in range(len(pts)):
end_idx = batch_idx + len(pts[i])
attentive.append(
self.refine(fuse_out_T[:, :, batch_idx:end_idx]))
batch_idx = end_idx
fuse_out = torch.cat(attentive, dim=-2).squeeze().t()
return fuse_out return fuse_out
def obtain_mlvl_feats(self, img_feats, pts, img_meta): def obtain_mlvl_feats(self, img_feats, pts, img_meta):
......
import torch.nn as nn import torch.nn as nn
import mmdet3d.ops.spconv as spconv import mmdet3d.ops.spconv as spconv
from mmdet.ops import build_norm_layer
from ..registry import MIDDLE_ENCODERS from ..registry import MIDDLE_ENCODERS
from ..utils import build_norm_layer
@MIDDLE_ENCODERS.register_module @MIDDLE_ENCODERS.register_module
......
import logging
from functools import partial from functools import partial
import torch import torch
import torch.nn as nn import torch.nn as nn
from mmcv.cnn import constant_init, kaiming_init from mmcv.cnn import constant_init, kaiming_init
from mmcv.runner import load_checkpoint
from torch.nn import Sequential from torch.nn import Sequential
from torch.nn.modules.batchnorm import _BatchNorm from torch.nn.modules.batchnorm import _BatchNorm
from mmdet.models import NECKS
from mmdet.ops import build_norm_layer
from .. import builder from .. import builder
from ..registry import NECKS
from ..utils import build_norm_layer
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
@NECKS.register_module @NECKS.register_module
...@@ -43,17 +28,12 @@ class SECONDFPN(nn.Module): ...@@ -43,17 +28,12 @@ class SECONDFPN(nn.Module):
assert len(num_upsample_filters) == len(upsample_strides) assert len(num_upsample_filters) == len(upsample_strides)
self.in_channels = in_channels self.in_channels = in_channels
if norm_cfg is not None: ConvTranspose2d = partial(nn.ConvTranspose2d, bias=False)
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=False)
else:
ConvTranspose2d = partial(nn.ConvTranspose2d, bias=True)
deblocks = [] deblocks = []
for i, num_upsample_filter in enumerate(num_upsample_filters): for i, num_upsample_filter in enumerate(num_upsample_filters):
norm_layer = ( norm_layer = build_norm_layer(norm_cfg, num_upsample_filter)[1]
build_norm_layer(norm_cfg, num_upsample_filter)[1]
if norm_cfg is not None else Empty)
deblock = Sequential( deblock = Sequential(
ConvTranspose2d( ConvTranspose2d(
in_channels[i], in_channels[i],
...@@ -66,30 +46,22 @@ class SECONDFPN(nn.Module): ...@@ -66,30 +46,22 @@ class SECONDFPN(nn.Module):
deblocks.append(deblock) deblocks.append(deblock)
self.deblocks = nn.ModuleList(deblocks) self.deblocks = nn.ModuleList(deblocks)
def init_weights(self, pretrained=None): def init_weights(self):
if isinstance(pretrained, str): for m in self.modules():
logger = logging.getLogger() if isinstance(m, nn.Conv2d):
load_checkpoint(self, pretrained, strict=False, logger=logger) kaiming_init(m)
elif pretrained is None: elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
# keeping the initiation yields better results constant_init(m, 1)
for m in self.modules():
if isinstance(m, nn.Conv2d):
kaiming_init(m)
elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
constant_init(m, 1)
else:
raise TypeError('pretrained must be a str or None')
return
def forward(self, inputs): def forward(self, x):
assert len(inputs) == len(self.in_channels) assert len(x) == len(self.in_channels)
ups = [deblock(inputs[i]) for i, deblock in enumerate(self.deblocks)] ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]
if len(ups) > 1: if len(ups) > 1:
x = torch.cat(ups, dim=1) out = torch.cat(ups, dim=1)
else: else:
x = ups[0] out = ups[0]
return [x] return [out]
@NECKS.register_module @NECKS.register_module
...@@ -120,18 +92,18 @@ class SECONDFusionFPN(SECONDFPN): ...@@ -120,18 +92,18 @@ class SECONDFusionFPN(SECONDFPN):
self.down_sample_rate = down_sample_rate self.down_sample_rate = down_sample_rate
def forward(self, def forward(self,
inputs, x,
coors=None, coors=None,
points=None, points=None,
img_feats=None, img_feats=None,
img_meta=None): img_meta=None):
assert len(inputs) == len(self.in_channels) assert len(x) == len(self.in_channels)
ups = [deblock(inputs[i]) for i, deblock in enumerate(self.deblocks)] ups = [deblock(x[i]) for i, deblock in enumerate(self.deblocks)]
if len(ups) > 1: if len(ups) > 1:
x = torch.cat(ups, dim=1) out = torch.cat(ups, dim=1)
else: else:
x = ups[0] out = ups[0]
if (self.fusion_layer is not None and img_feats is not None): if (self.fusion_layer is not None and img_feats is not None):
downsample_pts_coors = torch.zeros_like(coors) downsample_pts_coors = torch.zeros_like(coors)
downsample_pts_coors[:, 0] = coors[:, 0] downsample_pts_coors[:, 0] = coors[:, 0]
...@@ -142,6 +114,6 @@ class SECONDFusionFPN(SECONDFPN): ...@@ -142,6 +114,6 @@ class SECONDFusionFPN(SECONDFPN):
downsample_pts_coors[:, 3] = ( downsample_pts_coors[:, 3] = (
coors[:, 3] / self.down_sample_rate[2]) coors[:, 3] / self.down_sample_rate[2])
# fusion for each point # fusion for each point
x = self.fusion_layer(img_feats, points, x, downsample_pts_coors, out = self.fusion_layer(img_feats, points, out,
img_meta) downsample_pts_coors, img_meta)
return [x] return [out]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment