Unverified Commit 0f8181f1 authored by Wenhao Wu's avatar Wenhao Wu Committed by GitHub
Browse files

[Feature] Support PointRCNN RPN and RCNN module (#1022)

* rebase & resubmit

* rename config & model

* fix unittest

* resolve comments & add docstring for class_agnostic_nms

* refine loss calculation & remove find_unused_parameters

* resolve typo & add docstring

* resolve comments
parent e0aa7ce8
model = dict(
type='PointRCNN',
backbone=dict(
type='PointNet2SAMSG',
in_channels=4,
num_points=(4096, 1024, 256, 64),
radii=((0.1, 0.5), (0.5, 1.0), (1.0, 2.0), (2.0, 4.0)),
num_samples=((16, 32), (16, 32), (16, 32), (16, 32)),
sa_channels=(((16, 16, 32), (32, 32, 64)), ((64, 64, 128), (64, 96,
128)),
((128, 196, 256), (128, 196, 256)), ((256, 256, 512),
(256, 384, 512))),
fps_mods=(('D-FPS'), ('D-FPS'), ('D-FPS'), ('D-FPS')),
fps_sample_range_lists=((-1), (-1), (-1), (-1)),
aggregation_channels=(None, None, None, None),
dilated_group=(False, False, False, False),
out_indices=(0, 1, 2, 3),
norm_cfg=dict(type='BN2d', eps=1e-3, momentum=0.1),
sa_cfg=dict(
type='PointSAModuleMSG',
pool_mod='max',
use_xyz=True,
normalize_xyz=False)),
neck=dict(
type='PointNetFPNeck',
fp_channels=((1536, 512, 512), (768, 512, 512), (608, 256, 256),
(257, 128, 128))),
rpn_head=dict(
type='PointRPNHead',
num_classes=3,
enlarge_width=0.1,
pred_layer_cfg=dict(
in_channels=128,
cls_linear_channels=(256, 256),
reg_linear_channels=(256, 256)),
cls_loss=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
bbox_loss=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
bbox_coder=dict(
type='PointXYZWHLRBBoxCoder',
code_size=8,
# code_size: (center residual (3), size regression (3),
# torch.cos(yaw) (1), torch.sin(yaw) (1)
use_mean_size=True,
mean_size=[[3.9, 1.6, 1.56], [0.8, 0.6, 1.73], [1.76, 0.6,
1.73]])),
roi_head=dict(
type='PointRCNNRoIHead',
point_roi_extractor=dict(
type='Single3DRoIPointExtractor',
roi_layer=dict(type='RoIPointPool3d', num_sampled_points=512)),
bbox_head=dict(
type='PointRCNNBboxHead',
num_classes=1,
pred_layer_cfg=dict(
in_channels=512,
cls_conv_channels=(256, 256),
reg_conv_channels=(256, 256),
bias=True),
in_channels=5,
# 5 = 3 (xyz) + scores + depth
mlp_channels=[128, 128],
num_points=(128, 32, -1),
radius=(0.2, 0.4, 100),
num_samples=(16, 16, 16),
sa_channels=((128, 128, 128), (128, 128, 256), (256, 256, 512)),
with_corner_loss=True),
depth_normalizer=70.0),
# model training and testing settings
train_cfg=dict(
pos_distance_thr=10.0,
rpn=dict(
nms_cfg=dict(
use_rotate_nms=True, iou_thr=0.8, nms_pre=9000, nms_post=512),
score_thr=None),
rcnn=dict(
assigner=[
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.5,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.7,
cls_neg_thr=0.25)),
test_cfg=dict(
rpn=dict(
nms_cfg=dict(
use_rotate_nms=True, iou_thr=0.85, nms_pre=9000, nms_post=512),
score_thr=None),
rcnn=dict(use_rotate_nms=True, nms_thr=0.1, score_thr=0.1)))
_base_ = [
'../_base_/datasets/kitti-3d-car.py', '../_base_/models/point_rcnn.py',
'../_base_/default_runtime.py', '../_base_/schedules/cyclic_40e.py'
]
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car', 'Pedestrian', 'Cyclist']
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
input_modality = dict(use_lidar=True, use_camera=False)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
sample_groups=dict(Car=20, Pedestrian=15, Cyclist=15),
classes=class_names)
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='ObjectNoise',
num_try=100,
translation_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_range=[-0.78539816, 0.78539816]),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointSample', num_points=16384, sample_range=40.0),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointSample', num_points=16384, sample_range=40.0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='RepeatDataset',
times=2,
dataset=dict(pipeline=train_pipeline, classes=class_names)),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(lr=lr, betas=(0.95, 0.85))
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=80)
evaluation = dict(interval=2)
# yapf:disable
log_config = dict(
interval=30,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# PointRCNN: 3D Object Proposal Generation and Detection from Point Cloud
## Introduction
<!-- [ALGORITHM] -->
We implement PointRCNN and provide its results with checkpoints on KITTI dataset.
```
@InProceedings{Shi_2019_CVPR,
author = {Shi, Shaoshuai and Wang, Xiaogang and Li, Hongsheng},
title = {PointRCNN: 3D Object Proposal Generation and Detection From Point Cloud},
booktitle = {The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2019}
}
```
## Results
### KITTI
| Backbone |Class| Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |
| :---------: | :-----: |:-----: | :------: | :------------: | :----: |:----: |
| [PointNet++](./pointrcnn_2x8_kitti-3d-3classes.py) |3 Class|cyclic 80e|7.1||70.39||
...@@ -892,8 +892,8 @@ class PointSample(object): ...@@ -892,8 +892,8 @@ class PointSample(object):
if sample_range is not None and not replace: if sample_range is not None and not replace:
# Only sampling the near points when len(points) >= num_samples # Only sampling the near points when len(points) >= num_samples
depth = np.linalg.norm(points.tensor, axis=1) depth = np.linalg.norm(points.tensor, axis=1)
far_inds = np.where(depth > sample_range)[0] far_inds = np.where(depth >= sample_range)[0]
near_inds = np.where(depth <= sample_range)[0] near_inds = np.where(depth < sample_range)[0]
# in case there are too many far points # in case there are too many far points
if len(far_inds) > num_samples: if len(far_inds) > num_samples:
far_inds = np.random.choice( far_inds = np.random.choice(
...@@ -920,12 +920,6 @@ class PointSample(object): ...@@ -920,12 +920,6 @@ class PointSample(object):
and 'pts_semantic_mask' keys are updated in the result dict. and 'pts_semantic_mask' keys are updated in the result dict.
""" """
points = results['points'] points = results['points']
# Points in Camera coord can provide the depth information.
# TODO: Need to support distance-based sampling for other coord system.
if self.sample_range is not None:
from mmdet3d.core.points import CameraPoints
assert isinstance(points, CameraPoints), 'Sampling based on' \
'distance is only applicable for CAMERA coord'
points, choices = self._points_random_sampling( points, choices = self._points_random_sampling(
points, points,
self.num_points, self.num_points,
......
...@@ -9,6 +9,7 @@ from .free_anchor3d_head import FreeAnchor3DHead ...@@ -9,6 +9,7 @@ from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead from .groupfree3d_head import GroupFree3DHead
from .parta2_rpn_head import PartA2RPNHead from .parta2_rpn_head import PartA2RPNHead
from .pgd_head import PGDHead from .pgd_head import PGDHead
from .point_rpn_head import PointRPNHead
from .shape_aware_head import ShapeAwareHead from .shape_aware_head import ShapeAwareHead
from .smoke_mono3d_head import SMOKEMono3DHead from .smoke_mono3d_head import SMOKEMono3DHead
from .ssd_3d_head import SSD3DHead from .ssd_3d_head import SSD3DHead
...@@ -18,5 +19,5 @@ __all__ = [ ...@@ -18,5 +19,5 @@ __all__ = [
'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead', 'Anchor3DHead', 'FreeAnchor3DHead', 'PartA2RPNHead', 'VoteHead',
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'SMOKEMono3DHead', 'PGDHead' 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead'
] ]
import torch
from mmcv.runner import BaseModule, force_fp32
from torch import nn as nn
from mmdet3d.core.bbox.structures import (DepthInstance3DBoxes,
LiDARInstance3DBoxes)
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.core import build_bbox_coder, multi_apply
from mmdet.models import HEADS, build_loss
@HEADS.register_module()
class PointRPNHead(BaseModule):
"""RPN module for PointRCNN.
Args:
num_classes (int): Number of classes.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
pred_layer_cfg (dict, optional): Config of classfication and
regression prediction layers. Defaults to None.
enlarge_width (float, optional): Enlarge bbox for each side to ignore
close points. Defaults to 0.1.
cls_loss (dict, optional): Config of direction classification loss.
Defaults to None.
bbox_loss (dict, optional): Config of localization loss.
Defaults to None.
bbox_coder (dict, optional): Config dict of box coders.
Defaults to None.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
num_classes,
train_cfg,
test_cfg,
pred_layer_cfg=None,
enlarge_width=0.1,
cls_loss=None,
bbox_loss=None,
bbox_coder=None,
init_cfg=None):
super().__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.enlarge_width = enlarge_width
# build loss function
self.bbox_loss = build_loss(bbox_loss)
self.cls_loss = build_loss(cls_loss)
# build box coder
self.bbox_coder = build_bbox_coder(bbox_coder)
# build pred conv
self.cls_layers = self._make_fc_layers(
fc_cfg=pred_layer_cfg.cls_linear_channels,
input_channels=pred_layer_cfg.in_channels,
output_channels=self._get_cls_out_channels())
self.reg_layers = self._make_fc_layers(
fc_cfg=pred_layer_cfg.reg_linear_channels,
input_channels=pred_layer_cfg.in_channels,
output_channels=self._get_reg_out_channels())
def _make_fc_layers(self, fc_cfg, input_channels, output_channels):
"""Make fully connect layers.
Args:
fc_cfg (dict): Config of fully connect.
input_channels (int): Input channels for fc_layers.
output_channels (int): Input channels for fc_layers.
Returns:
nn.Sequential: Fully connect layers.
"""
fc_layers = []
c_in = input_channels
for k in range(0, fc_cfg.__len__()):
fc_layers.extend([
nn.Linear(c_in, fc_cfg[k], bias=False),
nn.BatchNorm1d(fc_cfg[k]),
nn.ReLU(),
])
c_in = fc_cfg[k]
fc_layers.append(nn.Linear(c_in, output_channels, bias=True))
return nn.Sequential(*fc_layers)
def _get_cls_out_channels(self):
"""Return the channel number of classification outputs."""
# Class numbers (k) + objectness (1)
return self.num_classes
def _get_reg_out_channels(self):
"""Return the channel number of regression outputs."""
# Bbox classification and regression
# (center residual (3), size regression (3)
# torch.cos(yaw) (1), torch.sin(yaw) (1)
return self.bbox_coder.code_size
def forward(self, feat_dict):
"""Forward pass.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
tuple[list[torch.Tensor]]: Predicted boxes and classification
scores.
"""
point_features = feat_dict['fp_features']
point_features = point_features.permute(0, 2, 1).contiguous()
batch_size = point_features.shape[0]
feat_cls = point_features.view(-1, point_features.shape[-1])
feat_reg = point_features.view(-1, point_features.shape[-1])
point_cls_preds = self.cls_layers(feat_cls).reshape(
batch_size, -1, self._get_cls_out_channels())
point_box_preds = self.reg_layers(feat_reg).reshape(
batch_size, -1, self._get_reg_out_channels())
return (point_box_preds, point_cls_preds)
@force_fp32(apply_to=('bbox_preds'))
def loss(self,
bbox_preds,
cls_preds,
points,
gt_bboxes_3d,
gt_labels_3d,
img_metas=None):
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of PointRCNN RPN_Head.
cls_preds (dict): Classification from forward of PointRCNN
RPN_Head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
img_metas (list[dict], Optional): Contain pcd and img's meta info.
Defaults to None.
Returns:
dict: Losses of PointRCNN RPN module.
"""
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d)
(bbox_targets, mask_targets, positive_mask, negative_mask,
box_loss_weights, point_targets) = targets
# bbox loss
bbox_loss = self.bbox_loss(bbox_preds, bbox_targets,
box_loss_weights.unsqueeze(-1))
# calculate semantic loss
semantic_points = cls_preds.reshape(-1, self.num_classes)
semantic_targets = mask_targets
semantic_targets[negative_mask] = self.num_classes
semantic_points_label = semantic_targets
# for ignore, but now we do not have ignore label
semantic_loss_weight = negative_mask.float() + positive_mask.float()
semantic_loss = self.cls_loss(semantic_points,
semantic_points_label.reshape(-1),
semantic_loss_weight.reshape(-1))
semantic_loss /= positive_mask.float().sum()
losses = dict(bbox_loss=bbox_loss, semantic_loss=semantic_loss)
return losses
def get_targets(self, points, gt_bboxes_3d, gt_labels_3d):
"""Generate targets of PointRCNN RPN head.
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
Returns:
tuple[torch.Tensor]: Targets of PointRCNN RPN head.
"""
# find empty example
for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0:
fake_box = gt_bboxes_3d[index].tensor.new_zeros(
1, gt_bboxes_3d[index].tensor.shape[-1])
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
(bbox_targets, mask_targets, positive_mask, negative_mask,
point_targets) = multi_apply(self.get_targets_single, points,
gt_bboxes_3d, gt_labels_3d)
bbox_targets = torch.stack(bbox_targets)
mask_targets = torch.stack(mask_targets)
positive_mask = torch.stack(positive_mask)
negative_mask = torch.stack(negative_mask)
box_loss_weights = positive_mask / (positive_mask.sum() + 1e-6)
return (bbox_targets, mask_targets, positive_mask, negative_mask,
box_loss_weights, point_targets)
def get_targets_single(self, points, gt_bboxes_3d, gt_labels_3d):
"""Generate targets of PointRCNN RPN head for single batch.
Args:
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
Returns:
tuple[torch.Tensor]: Targets of ssd3d head.
"""
gt_bboxes_3d = gt_bboxes_3d.to(points.device)
valid_gt = gt_labels_3d != -1
gt_bboxes_3d = gt_bboxes_3d[valid_gt]
gt_labels_3d = gt_labels_3d[valid_gt]
# transform the bbox coordinate to the pointcloud coordinate
gt_bboxes_3d_tensor = gt_bboxes_3d.tensor.clone()
gt_bboxes_3d_tensor[..., 2] += gt_bboxes_3d_tensor[..., 5] / 2
points_mask, assignment = self._assign_targets_by_points_inside(
gt_bboxes_3d, points)
gt_bboxes_3d_tensor = gt_bboxes_3d_tensor[assignment]
mask_targets = gt_labels_3d[assignment]
bbox_targets = self.bbox_coder.encode(gt_bboxes_3d_tensor,
points[..., 0:3], mask_targets)
positive_mask = (points_mask.max(1)[0] > 0)
negative_mask = (points_mask.max(1)[0] == 0)
# add ignore_mask
extend_gt_bboxes_3d = gt_bboxes_3d.enlarged_box(self.enlarge_width)
points_mask, _ = self._assign_targets_by_points_inside(
extend_gt_bboxes_3d, points)
negative_mask = (points_mask.max(1)[0] == 0)
point_targets = points[..., 0:3]
return (bbox_targets, mask_targets, positive_mask, negative_mask,
point_targets)
def get_bboxes(self,
points,
bbox_preds,
cls_preds,
input_metas,
rescale=False):
"""Generate bboxes from RPN head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Regression predictions from PointRCNN head.
cls_preds (dict): Class scores predictions from PointRCNN head.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool, optional): Whether to rescale bboxes.
Defaults to False.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
"""
sem_scores = cls_preds.sigmoid()
obj_scores = sem_scores.max(-1)[0]
object_class = sem_scores.argmax(dim=-1)
batch_size = sem_scores.shape[0]
results = list()
for b in range(batch_size):
bbox3d = self.bbox_coder.decode(bbox_preds[b], points[b, ..., :3],
object_class[b])
bbox_selected, score_selected, labels, cls_preds_selected = \
self.class_agnostic_nms(obj_scores[b], sem_scores[b], bbox3d)
bbox = input_metas[b]['box_type_3d'](
bbox_selected.clone(),
box_dim=bbox_selected.shape[-1],
with_yaw=True)
results.append((bbox, score_selected, labels, cls_preds_selected))
return results
def class_agnostic_nms(self, obj_scores, sem_scores, bbox):
"""Class agnostic nms.
Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): Semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
nms_cfg = self.test_cfg.nms_cfg if not self.training \
else self.train_cfg.nms_cfg
if nms_cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
if self.test_cfg.score_thr is not None:
score_thr = self.test_cfg.score_thr
keep = (obj_scores >= score_thr)
obj_scores = obj_scores[keep]
sem_scores = sem_scores[keep]
bbox = bbox[keep]
if obj_scores.shape[0] > 0:
topk = min(nms_cfg.nms_pre, obj_scores.shape[0])
obj_scores_nms, indices = torch.topk(obj_scores, k=topk)
bbox_for_nms = bbox[indices]
sem_scores_nms = sem_scores[indices]
keep = nms_func(bbox_for_nms[:, 0:7], obj_scores_nms,
nms_cfg.iou_thr)
keep = keep[:nms_cfg.nms_post]
bbox_selected = bbox_for_nms[keep]
score_selected = obj_scores_nms[keep]
cls_preds = sem_scores_nms[keep]
labels = torch.argmax(cls_preds, -1)
return bbox_selected, score_selected, labels, cls_preds
def _assign_targets_by_points_inside(self, bboxes_3d, points):
"""Compute assignment by checking whether point is inside bbox.
Args:
bboxes_3d (:obj:`BaseInstance3DBoxes`): Instance of bounding boxes.
points (torch.Tensor): Points of a batch.
Returns:
tuple[torch.Tensor]: Flags indicating whether each point is
inside bbox and the index of box where each point are in.
"""
# TODO: align points_in_boxes function in each box_structures
num_bbox = bboxes_3d.tensor.shape[0]
if isinstance(bboxes_3d, LiDARInstance3DBoxes):
assignment = bboxes_3d.points_in_boxes(points[:, 0:3]).long()
points_mask = assignment.new_zeros(
[assignment.shape[0], num_bbox + 1])
assignment[assignment == -1] = num_bbox
points_mask.scatter_(1, assignment.unsqueeze(1), 1)
points_mask = points_mask[:, :-1]
assignment[assignment == num_bbox] = num_bbox - 1
elif isinstance(bboxes_3d, DepthInstance3DBoxes):
points_mask = bboxes_3d.points_in_boxes(points)
assignment = points_mask.argmax(dim=-1)
else:
raise NotImplementedError('Unsupported bbox type!')
return points_mask, assignment
...@@ -441,7 +441,7 @@ class SSD3DHead(VoteHead): ...@@ -441,7 +441,7 @@ class SSD3DHead(VoteHead):
negative_mask) negative_mask)
def get_bboxes(self, points, bbox_preds, input_metas, rescale=False): def get_bboxes(self, points, bbox_preds, input_metas, rescale=False):
"""Generate bboxes from sdd3d head predictions. """Generate bboxes from 3DSSD head predictions.
Args: Args:
points (torch.Tensor): Input points. points (torch.Tensor): Input points.
...@@ -479,7 +479,7 @@ class SSD3DHead(VoteHead): ...@@ -479,7 +479,7 @@ class SSD3DHead(VoteHead):
Args: Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes. obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): semantic class score of bounding boxes. sem_scores (torch.Tensor): Semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes. bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points. points (torch.Tensor): Input points.
input_meta (dict): Point cloud and image's meta info. input_meta (dict): Point cloud and image's meta info.
...@@ -505,20 +505,20 @@ class SSD3DHead(VoteHead): ...@@ -505,20 +505,20 @@ class SSD3DHead(VoteHead):
minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0] minmax_box3d[:, 3:] = torch.max(corner3d, dim=1)[0]
bbox_classes = torch.argmax(sem_scores, -1) bbox_classes = torch.argmax(sem_scores, -1)
nms_selected = batched_nms( nms_keep = batched_nms(
minmax_box3d[nonempty_box_mask][:, [0, 1, 3, 4]], minmax_box3d[nonempty_box_mask][:, [0, 1, 3, 4]],
obj_scores[nonempty_box_mask], bbox_classes[nonempty_box_mask], obj_scores[nonempty_box_mask], bbox_classes[nonempty_box_mask],
self.test_cfg.nms_cfg)[1] self.test_cfg.nms_cfg)[1]
if nms_selected.shape[0] > self.test_cfg.max_output_num: if nms_keep.shape[0] > self.test_cfg.max_output_num:
nms_selected = nms_selected[:self.test_cfg.max_output_num] nms_keep = nms_keep[:self.test_cfg.max_output_num]
# filter empty boxes and boxes with low score # filter empty boxes and boxes with low score
scores_mask = (obj_scores >= self.test_cfg.score_thr) scores_mask = (obj_scores >= self.test_cfg.score_thr)
nonempty_box_inds = torch.nonzero( nonempty_box_inds = torch.nonzero(
nonempty_box_mask, as_tuple=False).flatten() nonempty_box_mask, as_tuple=False).flatten()
nonempty_mask = torch.zeros_like(bbox_classes).scatter( nonempty_mask = torch.zeros_like(bbox_classes).scatter(
0, nonempty_box_inds[nms_selected], 1) 0, nonempty_box_inds[nms_keep], 1)
selected = (nonempty_mask.bool() & scores_mask.bool()) selected = (nonempty_mask.bool() & scores_mask.bool())
if self.test_cfg.per_class_proposal: if self.test_cfg.per_class_proposal:
......
...@@ -10,6 +10,7 @@ from .imvoxelnet import ImVoxelNet ...@@ -10,6 +10,7 @@ from .imvoxelnet import ImVoxelNet
from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2 from .parta2 import PartA2
from .point_rcnn import PointRCNN
from .single_stage_mono3d import SingleStageMono3DDetector from .single_stage_mono3d import SingleStageMono3DDetector
from .smoke_mono3d import SMOKEMono3D from .smoke_mono3d import SMOKEMono3D
from .ssd3dnet import SSD3DNet from .ssd3dnet import SSD3DNet
...@@ -20,5 +21,5 @@ __all__ = [ ...@@ -20,5 +21,5 @@ __all__ = [
'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector', 'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',
'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet', 'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',
'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector', 'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',
'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'SMOKEMono3D' 'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'PointRCNN', 'SMOKEMono3D'
] ]
import torch
from mmdet.models import DETECTORS
from .two_stage import TwoStage3DDetector
@DETECTORS.register_module()
class PointRCNN(TwoStage3DDetector):
r"""PointRCNN detector.
Please refer to the `PointRCNN <https://arxiv.org/abs/1812.04244>`_
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
rpn_head (dict, optional): Config of RPN head. Defaults to None.
roi_head (dict, optional): Config of ROI head. Defaults to None.
train_cfg (dict, optional): Train configs. Defaults to None.
test_cfg (dict, optional): Test configs. Defaults to None.
pretrained (str, optional): Model pretrained path. Defaults to None.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
super(PointRCNN, self).__init__(
backbone=backbone,
neck=neck,
rpn_head=rpn_head,
roi_head=roi_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
def extract_feat(self, points):
"""Directly extract features from the backbone+neck.
Args:
points (torch.Tensor): Input points.
Returns:
dict: Features from the backbone+neck
"""
x = self.backbone(points)
if self.with_neck:
x = self.neck(x)
return x
def forward_train(self, points, img_metas, gt_bboxes_3d, gt_labels_3d):
"""Forward of training.
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list[dict]): Meta information of each sample.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
Returns:
dict: Losses.
"""
losses = dict()
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
# features for rcnn
backbone_feats = x['fp_features'].clone()
backbone_xyz = x['fp_xyz'].clone()
rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}
bbox_preds, cls_preds = self.rpn_head(x)
rpn_loss = self.rpn_head.loss(
bbox_preds=bbox_preds,
cls_preds=cls_preds,
points=points,
gt_bboxes_3d=gt_bboxes_3d,
gt_labels_3d=gt_labels_3d,
img_metas=img_metas)
losses.update(rpn_loss)
bbox_list = self.rpn_head.get_bboxes(points_cat, bbox_preds, cls_preds,
img_metas)
proposal_list = [
dict(
boxes_3d=bboxes,
scores_3d=scores,
labels_3d=labels,
cls_preds=preds_cls)
for bboxes, scores, labels, preds_cls in bbox_list
]
rcnn_feats.update({'points_cls_preds': cls_preds})
roi_losses = self.roi_head.forward_train(rcnn_feats, img_metas,
proposal_list, gt_bboxes_3d,
gt_labels_3d)
losses.update(roi_losses)
return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False):
"""Forward of testing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list[dict]): Image metas.
imgs (list[torch.Tensor], optional): Images of each sample.
Defaults to None.
rescale (bool, optional): Whether to rescale results.
Defaults to False.
Returns:
list: Predicted 3d boxes.
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
# features for rcnn
backbone_feats = x['fp_features'].clone()
backbone_xyz = x['fp_xyz'].clone()
rcnn_feats = {'features': backbone_feats, 'points': backbone_xyz}
bbox_preds, cls_preds = self.rpn_head(x)
rcnn_feats.update({'points_cls_preds': cls_preds})
bbox_list = self.rpn_head.get_bboxes(
points_cat, bbox_preds, cls_preds, img_metas, rescale=rescale)
proposal_list = [
dict(
boxes_3d=bboxes,
scores_3d=scores,
labels_3d=labels,
cls_preds=preds_cls)
for bboxes, scores, labels, preds_cls in bbox_list
]
bbox_results = self.roi_head.simple_test(rcnn_feats, img_metas,
proposal_list)
return bbox_results
...@@ -71,7 +71,7 @@ class PointNetFPNeck(BaseModule): ...@@ -71,7 +71,7 @@ class PointNetFPNeck(BaseModule):
- fp_xyz (torch.Tensor): The coordinates of fp features. - fp_xyz (torch.Tensor): The coordinates of fp features.
- fp_features (torch.Tensor): The features from the last - fp_features (torch.Tensor): The features from the last
feature propogation layers. feature propagation layers.
""" """
sa_xyz, sa_features = self._extract_input(feat_dict) sa_xyz, sa_features = self._extract_input(feat_dict)
......
...@@ -4,10 +4,11 @@ from .bbox_heads import PartA2BboxHead ...@@ -4,10 +4,11 @@ from .bbox_heads import PartA2BboxHead
from .h3d_roi_head import H3DRoIHead from .h3d_roi_head import H3DRoIHead
from .mask_heads import PointwiseSemanticHead, PrimitiveHead from .mask_heads import PointwiseSemanticHead, PrimitiveHead
from .part_aggregation_roi_head import PartAggregationROIHead from .part_aggregation_roi_head import PartAggregationROIHead
from .point_rcnn_roi_head import PointRCNNRoIHead
from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor from .roi_extractors import Single3DRoIAwareExtractor, SingleRoIExtractor
__all__ = [ __all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead', 'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor', 'Single3DRoIAwareExtractor', 'PartA2BboxHead', 'SingleRoIExtractor',
'H3DRoIHead', 'PrimitiveHead' 'H3DRoIHead', 'PrimitiveHead', 'PointRCNNRoIHead'
] ]
...@@ -5,9 +5,10 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead, ...@@ -5,9 +5,10 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
Shared4Conv1FCBBoxHead) Shared4Conv1FCBBoxHead)
from .h3d_bbox_head import H3DBboxHead from .h3d_bbox_head import H3DBboxHead
from .parta2_bbox_head import PartA2BboxHead from .parta2_bbox_head import PartA2BboxHead
from .point_rcnn_bbox_head import PointRCNNBboxHead
__all__ = [ __all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead',
'H3DBboxHead' 'H3DBboxHead', 'PointRCNNBboxHead'
] ]
...@@ -285,7 +285,7 @@ class PartA2BboxHead(BaseModule): ...@@ -285,7 +285,7 @@ class PartA2BboxHead(BaseModule):
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets, def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights): pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
"""Coumputing losses. """Computing losses.
Args: Args:
cls_score (torch.Tensor): Scores of each roi. cls_score (torch.Tensor): Scores of each roi.
...@@ -461,12 +461,13 @@ class PartA2BboxHead(BaseModule): ...@@ -461,12 +461,13 @@ class PartA2BboxHead(BaseModule):
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights, return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) bbox_weights)
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1): def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1.0):
"""Calculate corner loss of given boxes. """Calculate corner loss of given boxes.
Args: Args:
pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7). pred_bbox3d (torch.FloatTensor): Predicted boxes in shape (N, 7).
gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7). gt_bbox3d (torch.FloatTensor): Ground truth boxes in shape (N, 7).
delta (float, optional): huber loss threshold. Defaults to 1.0
Returns: Returns:
torch.FloatTensor: Calculated corner loss in shape (N). torch.FloatTensor: Calculated corner loss in shape (N).
...@@ -489,8 +490,8 @@ class PartA2BboxHead(BaseModule): ...@@ -489,8 +490,8 @@ class PartA2BboxHead(BaseModule):
torch.norm(pred_box_corners - gt_box_corners_flip, torch.norm(pred_box_corners - gt_box_corners_flip,
dim=2)) # (N, 8) dim=2)) # (N, 8)
# huber loss # huber loss
abs_error = torch.abs(corner_dist) abs_error = corner_dist.abs()
quadratic = torch.clamp(abs_error, max=delta) quadratic = abs_error.clamp(max=delta)
linear = (abs_error - quadratic) linear = (abs_error - quadratic)
corner_loss = 0.5 * quadratic**2 + delta * linear corner_loss = 0.5 * quadratic**2 + delta * linear
...@@ -540,13 +541,13 @@ class PartA2BboxHead(BaseModule): ...@@ -540,13 +541,13 @@ class PartA2BboxHead(BaseModule):
cur_box_prob = class_pred[batch_id] cur_box_prob = class_pred[batch_id]
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id] cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d, keep = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr, cfg.score_thr, cfg.nms_thr,
img_metas[batch_id], img_metas[batch_id],
cfg.use_rotate_nms) cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[selected] selected_bboxes = cur_rcnn_boxes3d[keep]
selected_label_preds = cur_class_labels[selected] selected_label_preds = cur_class_labels[keep]
selected_scores = cur_cls_score[selected] selected_scores = cur_cls_score[keep]
result_list.append( result_list.append(
(img_metas[batch_id]['box_type_3d'](selected_bboxes, (img_metas[batch_id]['box_type_3d'](selected_bboxes,
...@@ -618,6 +619,6 @@ class PartA2BboxHead(BaseModule): ...@@ -618,6 +619,6 @@ class PartA2BboxHead(BaseModule):
dtype=torch.int64, dtype=torch.int64,
device=box_preds.device)) device=box_preds.device))
selected = torch.cat( keep = torch.cat(
selected_list, dim=0) if len(selected_list) > 0 else [] selected_list, dim=0) if len(selected_list) > 0 else []
return selected return keep
This diff is collapsed.
import torch
from torch.nn import functional as F
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2result, bbox3d2roi
from mmdet.core import build_assigner, build_sampler
from mmdet.models import HEADS
from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module()
class PointRCNNRoIHead(Base3DRoIHead):
"""RoI head for PointRCNN.
Args:
bbox_head (dict): Config of bbox_head.
point_roi_extractor (dict): Config of RoI extractor.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
depth_normalizer (float, optional): Normalize depth feature.
Defaults to 70.0.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def __init__(self,
bbox_head,
point_roi_extractor,
train_cfg,
test_cfg,
depth_normalizer=70.0,
pretrained=None,
init_cfg=None):
super(PointRCNNRoIHead, self).__init__(
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
init_cfg=init_cfg)
self.depth_normalizer = depth_normalizer
if point_roi_extractor is not None:
self.point_roi_extractor = build_roi_extractor(point_roi_extractor)
self.init_assigner_sampler()
def init_bbox_head(self, bbox_head):
"""Initialize box head.
Args:
bbox_head (dict): Config dict of RoI Head.
"""
self.bbox_head = build_head(bbox_head)
def init_mask_head(self):
"""Initialize maek head."""
pass
def init_assigner_sampler(self):
"""Initialize assigner and sampler."""
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
def forward_train(self, feats_dict, input_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PointRCNNRoIHead.
Args:
feats_dict (dict): Contains features from the first stage.
imput_metas (list[dict]): Meta info of each input.
proposal_list (list[dict]): Proposal information from rpn.
The dictionary should contain the following keys:
- boxes_3d (:obj:`BaseInstance3DBoxes`): Proposal bboxes
- labels_3d (torch.Tensor): Labels of proposals
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]):
GT bboxes of each sample. The bboxes are encapsulated
by 3D box structures.
gt_labels_3d (list[LongTensor]): GT labels of each sample.
Returns:
dict: Losses from RoI RCNN head.
- loss_bbox (torch.Tensor): Loss of bboxes
"""
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
# concat the depth, semantic features and backbone features
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
bbox_results = self._bbox_forward_train(features, points,
sample_results)
losses = dict()
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, img_metas, proposal_list, **kwargs):
"""Simple testing forward function of PointRCNNRoIHead.
Note:
This function assumes that the batch size is 1
Args:
feats_dict (dict): Contains features from the first stage.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
dict: Bbox results of one frame.
"""
rois = bbox3d2roi([res['boxes_3d'].tensor for res in proposal_list])
labels_3d = [res['labels_3d'] for res in proposal_list]
features = feats_dict['features']
points = feats_dict['points']
point_cls_preds = feats_dict['points_cls_preds']
sem_scores = point_cls_preds.sigmoid()
point_scores = sem_scores.max(-1)[0]
features = features.transpose(1, 2).contiguous()
point_depths = points.norm(dim=2) / self.depth_normalizer - 0.5
features_list = [
point_scores.unsqueeze(2),
point_depths.unsqueeze(2), features
]
features = torch.cat(features_list, dim=2)
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
object_score = bbox_results['cls_score'].sigmoid()
bbox_list = self.bbox_head.get_bboxes(
rois,
object_score,
bbox_results['bbox_pred'],
labels_3d,
img_metas,
cfg=self.test_cfg)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def _bbox_forward_train(self, features, points, sampling_results):
"""Forward training function of roi_extractor and bbox_head.
Args:
features (torch.Tensor): Backbone features with depth and \
semantic features.
points (torch.Tensor): Pointcloud.
sampling_results (:obj:`SamplingResult`): Sampled results used
for training.
Returns:
dict: Forward results including losses and predictions.
"""
rois = bbox3d2roi([res.bboxes for res in sampling_results])
batch_size = features.shape[0]
bbox_results = self._bbox_forward(features, points, batch_size, rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
bbox_results['bbox_pred'], rois,
*bbox_targets)
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, features, points, batch_size, rois):
"""Forward function of roi_extractor and bbox_head used in both
training and testing.
Args:
features (torch.Tensor): Backbone features with depth and
semantic features.
points (torch.Tensor): Pointcloud.
batch_size (int): Batch size.
rois (torch.Tensor): RoI boxes.
Returns:
dict: Contains predictions of bbox_head and
features of roi_extractor.
"""
pooled_point_feats = self.point_roi_extractor(features, points,
batch_size, rois)
cls_score, bbox_pred = self.bbox_head(pooled_point_feats)
bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
"""Assign and sample proposals for training.
Args:
proposal_list (list[dict]): Proposals produced by RPN.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes.
gt_labels_3d (list[torch.Tensor]): Ground truth labels
Returns:
list[:obj:`SamplingResult`]: Sampled results of each training
sample.
"""
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['boxes_3d']
cur_labels_3d = cur_proposal_list['labels_3d']
cur_gt_bboxes = gt_bboxes_3d[batch_idx].to(cur_boxes.device)
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
# 0 is bg
batch_gt_indis = cur_gt_labels.new_full((len(cur_boxes), ), 0)
batch_max_overlaps = cur_boxes.tensor.new_zeros(len(cur_boxes))
# -1 is bg
batch_gt_labels = cur_gt_labels.new_full((len(cur_boxes), ), -1)
# each class may have its own assigner
if isinstance(self.bbox_assigner, list):
for i, assigner in enumerate(self.bbox_assigner):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_labels_3d == i)
cur_assign_res = assigner.assign(
cur_boxes.tensor[pred_per_cls],
cur_gt_bboxes.tensor[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
gt_inds_arange_pad = gt_per_cls.nonzero(
as_tuple=False).view(-1) + 1
# pad 0 for indice unassigned
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=0)
# pad -1 for indice ignore
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad += 1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
cur_assign_res.gt_inds + 1] - 1
batch_max_overlaps[
pred_per_cls] = cur_assign_res.max_overlaps
batch_gt_labels[pred_per_cls] = cur_assign_res.labels
assign_result = AssignResult(batch_num_gts, batch_gt_indis,
batch_max_overlaps,
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes.tensor,
cur_gt_bboxes.tensor,
gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes.tensor,
cur_gt_bboxes.tensor,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
...@@ -20,6 +20,7 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG, ...@@ -20,6 +20,7 @@ from .pointnet_modules import (PAConvCUDASAModule, PAConvCUDASAModuleMSG,
build_sa_module) build_sa_module)
from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all, from .roiaware_pool3d import (RoIAwarePool3d, points_in_boxes_all,
points_in_boxes_cpu, points_in_boxes_part) points_in_boxes_cpu, points_in_boxes_part)
from .roipoint_pool3d import RoIPointPool3d
from .sparse_block import (SparseBasicBlock, SparseBottleneck, from .sparse_block import (SparseBasicBlock, SparseBottleneck,
make_sparse_convmodule) make_sparse_convmodule)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
...@@ -39,5 +40,5 @@ __all__ = [ ...@@ -39,5 +40,5 @@ __all__ = [
'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version', 'get_compiler_version', 'assign_score_withk', 'get_compiling_cuda_version',
'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA', 'Points_Sampler', 'build_sa_module', 'PAConv', 'PAConvCUDA',
'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule', 'PAConvSAModuleMSG', 'PAConvSAModule', 'PAConvCUDASAModule',
'PAConvCUDASAModuleMSG' 'PAConvCUDASAModuleMSG', 'RoIPointPool3d'
] ]
...@@ -472,6 +472,35 @@ def test_imvoxelnet(): ...@@ -472,6 +472,35 @@ def test_imvoxelnet():
assert labels_3d.shape[0] >= 0 assert labels_3d.shape[0] >= 0
def test_pointrcnn():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
pointrcnn_cfg = _get_detector_cfg(
'pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
self = build_detector(pointrcnn_cfg).cuda()
points_0 = torch.rand([1000, 4], device='cuda')
points_1 = torch.rand([1000, 4], device='cuda')
points = [points_0, points_1]
img_meta_0 = dict(box_type_3d=LiDARInstance3DBoxes)
img_meta_1 = dict(box_type_3d=LiDARInstance3DBoxes)
img_metas = [img_meta_0, img_meta_1]
gt_bbox_0 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
gt_bbox_1 = LiDARInstance3DBoxes(torch.rand([10, 7], device='cuda'))
gt_bboxes = [gt_bbox_0, gt_bbox_1]
gt_labels_0 = torch.randint(0, 3, [10], device='cuda')
gt_labels_1 = torch.randint(0, 3, [10], device='cuda')
gt_labels = [gt_labels_0, gt_labels_1]
# test_forward_train
losses = self.forward_train(points, img_metas, gt_bboxes, gt_labels)
assert losses['bbox_loss'] >= 0
assert losses['semantic_loss'] >= 0
assert losses['loss_cls'] >= 0
assert losses['loss_bbox'] >= 0
assert losses['loss_corner'] >= 0
def test_smoke(): def test_smoke():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy import copy
import mmcv
import numpy as np import numpy as np
import pytest import pytest
import random import random
...@@ -116,6 +117,23 @@ def _get_pts_bbox_head_cfg(fname): ...@@ -116,6 +117,23 @@ def _get_pts_bbox_head_cfg(fname):
return pts_bbox_head return pts_bbox_head
def _get_pointrcnn_rpn_head_cfg(fname):
"""Grab configs necessary to create a rpn_head.
These are deep copied to allow for safe modification of parameters without
influencing other tests.
"""
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.model.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.model.test_cfg))
rpn_head = model.rpn_head
rpn_head.update(train_cfg=train_cfg.rpn)
rpn_head.update(test_cfg=test_cfg.rpn)
return rpn_head, train_cfg.rpn.rpn_proposal
def _get_vote_head_cfg(fname): def _get_vote_head_cfg(fname):
"""Grab configs necessary to create a vote_head. """Grab configs necessary to create a vote_head.
...@@ -147,6 +165,14 @@ def _get_parta2_bbox_head_cfg(fname): ...@@ -147,6 +165,14 @@ def _get_parta2_bbox_head_cfg(fname):
return vote_head return vote_head
def _get_pointrcnn_bbox_head_cfg(fname):
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
vote_head = model.roi_head.bbox_head
return vote_head
def test_anchor3d_head_loss(): def test_anchor3d_head_loss():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
...@@ -263,6 +289,39 @@ def test_parta2_rpnhead_getboxes(): ...@@ -263,6 +289,39 @@ def test_parta2_rpnhead_getboxes():
assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7]) assert result_list[0]['boxes_3d'].tensor.shape == torch.Size([512, 7])
def test_pointrcnn_rpnhead_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
rpn_head_cfg, proposal_cfg = _get_pointrcnn_rpn_head_cfg(
'./pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
self = build_head(rpn_head_cfg)
self.cuda()
fp_features = torch.rand([2, 128, 1024], dtype=torch.float32).cuda()
feats = {'fp_features': fp_features}
# fake input_metas
input_metas = [{
'sample_idx': 1234,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}, {
'sample_idx': 2345,
'box_type_3d': LiDARInstance3DBoxes,
'box_mode_3d': Box3DMode.LIDAR
}]
(bbox_preds, cls_preds) = self.forward(feats)
assert bbox_preds.shape == (2, 1024, 8)
assert cls_preds.shape == (2, 1024, 3)
points = torch.rand([2, 1024, 3], dtype=torch.float32).cuda()
result_list = self.get_bboxes(points, bbox_preds, cls_preds, input_metas)
max_num = proposal_cfg.max_num
bbox, score_selected, labels, cls_preds_selected = result_list[0]
assert bbox.tensor.shape == (max_num, 7)
assert score_selected.shape == (max_num, )
assert labels.shape == (max_num, )
assert cls_preds_selected.shape == (max_num, 3)
def test_vote_head(): def test_vote_head():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
...@@ -466,6 +525,18 @@ def test_parta2_bbox_head(): ...@@ -466,6 +525,18 @@ def test_parta2_bbox_head():
assert bbox_pred.shape == (256, 7) assert bbox_pred.shape == (256, 7)
def test_pointrcnn_bbox_head():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
pointrcnn_bbox_head_cfg = _get_pointrcnn_bbox_head_cfg(
'./pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
self = build_head(pointrcnn_bbox_head_cfg).cuda()
feats = torch.rand([100, 512, 133]).cuda()
rcnn_cls, rcnn_reg = self.forward(feats)
assert rcnn_cls.shape == (100, 1)
assert rcnn_reg.shape == (100, 7)
def test_part_aggregation_ROI_head(): def test_part_aggregation_ROI_head():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
...@@ -540,6 +611,50 @@ def test_part_aggregation_ROI_head(): ...@@ -540,6 +611,50 @@ def test_part_aggregation_ROI_head():
assert labels_3d.shape == (12, ) assert labels_3d.shape == (12, )
def test_pointrcnn_roi_head():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
roi_head_cfg = _get_roi_head_cfg(
'./pointrcnn/pointrcnn_2x8_kitti-3d-3classes.py')
self = build_head(roi_head_cfg).cuda()
features = torch.rand([3, 128, 16384]).cuda()
points = torch.rand([3, 16384, 3]).cuda()
points_cls_preds = torch.rand([3, 16384, 3]).cuda()
rcnn_feats = {
'features': features,
'points': points,
'points_cls_preds': points_cls_preds
}
boxes_3d = LiDARInstance3DBoxes(torch.rand(50, 7).cuda())
labels_3d = torch.randint(low=0, high=2, size=[50]).cuda()
proposal = {'boxes_3d': boxes_3d, 'labels_3d': labels_3d}
proposal_list = [proposal for i in range(3)]
gt_bboxes_3d = [
LiDARInstance3DBoxes(torch.rand([5, 7], device='cuda'))
for i in range(3)
]
gt_labels_3d = [torch.randint(0, 2, [5], device='cuda') for i in range(3)]
box_type_3d = LiDARInstance3DBoxes
img_metas = [dict(box_type_3d=box_type_3d) for i in range(3)]
losses = self.forward_train(rcnn_feats, img_metas, proposal_list,
gt_bboxes_3d, gt_labels_3d)
assert losses['loss_cls'] >= 0
assert losses['loss_bbox'] >= 0
assert losses['loss_corner'] >= 0
bbox_results = self.simple_test(rcnn_feats, img_metas, proposal_list)
boxes_3d = bbox_results[0]['boxes_3d']
scores_3d = bbox_results[0]['scores_3d']
labels_3d = bbox_results[0]['labels_3d']
assert boxes_3d.tensor.shape[1] == 7
assert boxes_3d.tensor.shape[0] == scores_3d.shape[0]
assert scores_3d.shape[0] == labels_3d.shape[0]
def test_free_anchor_3D_head(): def test_free_anchor_3D_head():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
...@@ -700,7 +815,7 @@ def test_h3d_head(): ...@@ -700,7 +815,7 @@ def test_h3d_head():
h3d_head_cfg.bbox_head.num_proposal = num_proposal h3d_head_cfg.bbox_head.num_proposal = num_proposal
self = build_head(h3d_head_cfg).cuda() self = build_head(h3d_head_cfg).cuda()
# prepare roi outputs # prepare RoI outputs
fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()] fp_xyz = [torch.rand([1, num_point, 3], dtype=torch.float32).cuda()]
hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda() hd_features = torch.rand([1, 256, num_point], dtype=torch.float32).cuda()
fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()] fp_indices = [torch.randint(0, 128, [1, num_point]).cuda()]
......
...@@ -61,6 +61,8 @@ def test_config_build_model(): ...@@ -61,6 +61,8 @@ def test_config_build_model():
check_parta2_roi_head(head_config, detector.roi_head) check_parta2_roi_head(head_config, detector.roi_head)
elif head_config.type == 'H3DRoIHead': elif head_config.type == 'H3DRoIHead':
check_h3d_roi_head(head_config, detector.roi_head) check_h3d_roi_head(head_config, detector.roi_head)
elif head_config.type == 'PointRCNNRoIHead':
check_pointrcnn_roi_head(head_config, detector.roi_head)
else: else:
_check_roi_head(head_config, detector.roi_head) _check_roi_head(head_config, detector.roi_head)
# else: # else:
...@@ -273,3 +275,28 @@ def _check_h3d_bbox_head(bbox_cfg, bbox_head): ...@@ -273,3 +275,28 @@ def _check_h3d_bbox_head(bbox_cfg, bbox_head):
12 == bbox_head.line_center_matcher.num_point[0] 12 == bbox_head.line_center_matcher.num_point[0]
assert bbox_cfg.suface_matching_cfg.mlp_channels[-1] * \ assert bbox_cfg.suface_matching_cfg.mlp_channels[-1] * \
18 == bbox_head.bbox_pred[0].in_channels 18 == bbox_head.bbox_pred[0].in_channels
def check_pointrcnn_roi_head(config, head):
assert config['type'] == head.__class__.__name__
# check point_roi_extractor
point_roi_cfg = config.point_roi_extractor
point_roi_extractor = head.point_roi_extractor
_check_pointrcnn_roi_extractor(point_roi_cfg, point_roi_extractor)
# check pointrcnn rcnn bboxhead
bbox_cfg = config.bbox_head
bbox_head = head.bbox_head
_check_pointrcnn_bbox_head(bbox_cfg, bbox_head)
def _check_pointrcnn_roi_extractor(config, roi_extractor):
assert config['type'] == roi_extractor.__class__.__name__
assert config.roi_layer.num_sampled_points == \
roi_extractor.roi_layer.num_sampled_points
def _check_pointrcnn_bbox_head(bbox_cfg, bbox_head):
assert bbox_cfg['type'] == bbox_head.__class__.__name__
assert bbox_cfg.num_classes == bbox_head.num_classes
assert bbox_cfg.with_corner_loss == bbox_head.with_corner_loss
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment