"tests/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "843355f89fd043e82b3344d9259e6faa640da6f9"
Commit 89bda282 authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'master' into fix-train-runtime

parents ff8623e1 99db60dd
...@@ -12,7 +12,7 @@ repos: ...@@ -12,7 +12,7 @@ repos:
hooks: hooks:
- id: isort - id: isort
- repo: https://github.com/pre-commit/mirrors-yapf - repo: https://github.com/pre-commit/mirrors-yapf
rev: v0.29.0 rev: v0.30.0
hooks: hooks:
- id: yapf - id: yapf
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
......
...@@ -116,7 +116,7 @@ input_modality = dict( ...@@ -116,7 +116,7 @@ input_modality = dict(
use_lidar=True, use_lidar=True,
use_depth=False, use_depth=False,
use_lidar_intensity=True, use_lidar_intensity=True,
use_camera=False, use_camera=True,
) )
db_sampler = dict( db_sampler = dict(
root_path=data_root, root_path=data_root,
......
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(type='VoxelFeatureExtractorV3'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False,
),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
)
test_cfg = dict(
rpn=dict(
nms_pre=1024,
max_per_img=100,
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.7,
score_thr=0))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0, 0, 0],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.39269908, 0.39269908]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0.2, 0.2, 0.2]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
target_lr=1e-5,
as_ratio=True)
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl', port=29502)
log_level = 'INFO'
work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
from .boxvelo_head import Anchor3DVeloHead from .boxvelo_head import Anchor3DVeloHead
from .parta2_rpn_head import PartA2RPNHead
from .second_head import SECONDHead from .second_head import SECONDHead
__all__ = ['Anchor3DVeloHead', 'SECONDHead'] __all__ = ['Anchor3DVeloHead', 'SECONDHead', 'PartA2RPNHead']
from __future__ import division
import numpy as np
import torch
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS
from .second_head import SECONDHead
@HEADS.register_module
class PartA2RPNHead(SECONDHead):
"""rpn head for PartA2
Args:
class_name (list[str]): name of classes (TODO: to be removed)
in_channels (int): Number of channels in the input feature map.
train_cfg (dict): train configs
test_cfg (dict): test configs
feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier.
encode_bg_as_zeros (bool): Whether to use sigmoid of softmax
(TODO: to be removed)
box_code_size (int): The size of box code.
anchor_generator(dict): Config dict of anchor generator.
assigner_per_size (bool): Whether to do assignment for each separate
anchor size.
assign_per_class (bool): Whether to do assignment for each class.
diff_rad_by_sin (bool): Whether to change the difference into sin
difference for box regression loss.
dir_offset (float | int): The offset of BEV rotation angles
(TODO: may be moved into box coder)
dirlimit_offset (float | int): The limited range of BEV rotation angles
(TODO: may be moved into box coder)
box_coder (dict): Config dict of box coders.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss.
""" # npqa:W293
def __init__(self,
class_name,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
encode_bg_as_zeros=False,
box_code_size=7,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
custom_values=[],
reshape_out=False),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=0,
dir_limit_offset=1,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
super().__init__(class_name, in_channels, train_cfg, test_cfg,
feat_channels, use_direction_classifier,
encode_bg_as_zeros, box_code_size, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
dir_offset, dir_limit_offset, bbox_coder, loss_cls,
loss_bbox, loss_dir)
def get_bboxes(self,
cls_scores,
bbox_preds,
dir_cls_preds,
input_metas,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
device = cls_scores[0].device
mlvl_anchors = self.anchor_generator.grid_anchors(
featmap_sizes, device=device)
mlvl_anchors = [
anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors
]
result_list = []
for img_id in range(len(input_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
dir_cls_pred_list = [
dir_cls_preds[i][img_id].detach() for i in range(num_levels)
]
input_meta = input_metas[img_id]
proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
dir_cls_pred_list, mlvl_anchors,
input_meta, cfg, rescale)
result_list.append(proposals)
return result_list
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_max_scores = []
mlvl_label_pred = []
mlvl_dir_scores = []
mlvl_cls_score = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = cfg.get('nms_pre', -1)
if self.use_sigmoid_cls:
max_scores, pred_labels = scores.max(dim=1)
else:
max_scores, pred_labels = scores[:, :-1].max(dim=1)
# get topk
if nms_pre > 0 and scores.shape[0] > nms_pre:
topk_scores, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
max_scores = topk_scores
cls_score = cls_score[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
pred_labels = pred_labels[topk_inds]
bboxes = self.bbox_coder.decode(anchors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_max_scores.append(max_scores)
mlvl_cls_score.append(cls_score)
mlvl_label_pred.append(pred_labels)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
mlvl_max_scores = torch.cat(mlvl_max_scores)
mlvl_label_pred = torch.cat(mlvl_label_pred)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
mlvl_cls_score = torch.cat(
mlvl_cls_score) # shape [k, num_class] before sigmoid
score_thr = cfg.get('score_thr', 0)
result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred,
mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg)
result.update(dict(sample_idx=input_meta['sample_idx']))
return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg):
bboxes = []
scores = []
labels = []
dir_scores = []
cls_scores = []
score_thr_inds = mlvl_max_scores > score_thr
_scores = mlvl_max_scores[score_thr_inds]
_bboxes_for_nms = mlvl_bboxes_for_nms[score_thr_inds, :]
if cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[score_thr_inds, :]
_mlvl_dir_scores = mlvl_dir_scores[score_thr_inds]
_mlvl_label_pred = mlvl_label_pred[score_thr_inds]
_mlvl_cls_score = mlvl_cls_score[score_thr_inds]
if len(selected) > 0:
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
labels.append(_mlvl_label_pred[selected])
cls_scores.append(_mlvl_cls_score[selected])
dir_scores.append(_mlvl_dir_scores[selected])
dir_rot = box_torch_ops.limit_period(
bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset,
np.pi)
bboxes[-1][..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores[-1].to(bboxes[-1].dtype))
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
cls_scores = torch.cat(cls_scores, dim=0)
labels = torch.cat(labels, dim=0)
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
cls_scores = cls_scores[inds]
dir_scores = dir_scores[inds]
return dict(
box3d_lidar=bboxes.cpu(),
scores=scores.cpu(),
label_preds=labels.cpu(),
cls_preds=cls_scores.cpu(
) # raw scores with shape [max_num, cls_num]
)
else:
return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0,
self.box_code_size]).cpu(),
scores=mlvl_bboxes.new_zeros([0]).cpu(),
label_preds=mlvl_bboxes.new_zeros([0]).cpu(),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]
]).cpu())
...@@ -3,10 +3,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2, ...@@ -3,10 +3,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
DynamicMVXFasterRCNNV3) DynamicMVXFasterRCNNV3)
from .mvx_single_stage import MVXSingleStageDetector from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2
from .voxelnet import DynamicVoxelNet, VoxelNet from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [ __all__ = [
'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector', 'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2', 'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNNV3' 'DynamicMVXFasterRCNNV3', 'PartA2'
] ]
import torch
import torch.nn.functional as F
from mmdet3d.ops import Voxelization
from mmdet.models import DETECTORS, TwoStageDetector
from .. import builder
@DETECTORS.register_module
class PartA2(TwoStageDetector):
def __init__(self,
voxel_layer,
voxel_encoder,
middle_encoder,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(PartA2, self).__init__(
backbone=backbone,
neck=neck,
rpn_head=rpn_head,
roi_head=roi_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
self.middle_encoder = builder.build_middle_encoder(middle_encoder)
def extract_feat(self, points, img_meta):
voxels, num_points, coors = self.voxelize(points)
voxel_dict = dict(voxels=voxels, num_points=num_points, coors=coors)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
feats_dict = self.middle_encoder(voxel_features, coors, batch_size)
x = self.backbone(feats_dict['spatial_features'])
if self.with_neck:
neck_feats = self.neck(x)
feats_dict.update({'neck_feats': neck_feats})
return feats_dict, voxel_dict
@torch.no_grad()
def voxelize(self, points):
voxels, coors, num_points = [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def forward_train(self,
points,
img_meta,
gt_bboxes_3d,
gt_labels_3d,
gt_bboxes_ignore=None,
proposals=None):
# TODO: complete it
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
losses = dict()
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d, img_meta)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals # noqa: F841
return losses
def forward_test(self, **kwargs):
return self.simple_test(**kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
proposals=None,
rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
# TODO: complete it
if proposals is None:
proposal_list = self.simple_test_rpn(feats_dict['neck_feats'],
img_meta, self.test_cfg.rpn)
else:
proposal_list = proposals
return self.roi_head.simple_test(
feats_dict, proposal_list, img_meta, rescale=rescale)
from .pillar_scatter import PointPillarsScatter from .pillar_scatter import PointPillarsScatter
from .sparse_encoder import SparseEncoder from .sparse_encoder import SparseEncoder
from .sparse_unet import SparseUNet
__all__ = ['PointPillarsScatter', 'SparseEncoder'] __all__ = ['PointPillarsScatter', 'SparseEncoder', 'SparseUNet']
import torch
import torch.nn as nn
import mmdet3d.ops.spconv as spconv
from mmdet3d.ops import SparseBasicBlock
from mmdet.ops import build_norm_layer
from ..registry import MIDDLE_ENCODERS
@MIDDLE_ENCODERS.register_module
class SparseUNet(nn.Module):
def __init__(self,
in_channels,
output_shape,
pre_act=False,
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
base_channels=16,
output_channels=128,
encoder_channels=((16, ), (32, 32, 32), (64, 64, 64), (64, 64,
64)),
encoder_paddings=((1, ), (1, 1, 1), (1, 1, 1), ((0, 1, 1), 1,
1)),
decoder_channels=((64, 64, 64), (64, 64, 32), (32, 32, 16),
(16, 16, 16)),
decoder_paddings=((1, 0), (1, 0), (0, 0), (0, 1))):
"""SparseUNet for PartA^2
See https://arxiv.org/abs/1907.03670 for more detials.
Args:
in_channels (int): the number of input channels
output_shape (list[int]): the shape of output tensor
pre_act (bool): use pre_act_block or post_act_block
norm_cfg (dict): config of normalization layer
base_channels (int): out channels for conv_input layer
output_channels (int): out channels for conv_out layer
encoder_channels (tuple[tuple[int]]):
conv channels of each encode block
encoder_paddings (tuple[tuple[int]]): paddings of each encode block
decoder_channels (tuple[tuple[int]]):
conv channels of each decode block
decoder_paddings (tuple[tuple[int]]): paddings of each decode block
"""
super().__init__()
self.sparse_shape = output_shape
self.output_shape = output_shape
self.in_channels = in_channels
self.pre_act = pre_act
self.base_channels = base_channels
self.output_channels = output_channels
self.encoder_channels = encoder_channels
self.encoder_paddings = encoder_paddings
self.decoder_channels = decoder_channels
self.decoder_paddings = decoder_paddings
self.stage_num = len(self.encoder_channels)
# Spconv init all weight on its own
if pre_act:
# TODO: use ConvModule to encapsulate
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
self.base_channels,
3,
padding=1,
bias=False,
indice_key='subm1'))
make_block = self.pre_act_block
else:
self.conv_input = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
self.base_channels,
3,
padding=1,
bias=False,
indice_key='subm1'),
build_norm_layer(norm_cfg, self.base_channels)[1], nn.ReLU())
make_block = self.post_act_block
encoder_out_channels = self.make_encoder_layers(
make_block, norm_cfg, self.base_channels)
self.make_decoder_layers(make_block, norm_cfg, encoder_out_channels)
self.conv_out = spconv.SparseSequential(
# [200, 176, 5] -> [200, 176, 2]
spconv.SparseConv3d(
encoder_out_channels,
self.output_channels, (3, 1, 1),
stride=(2, 1, 1),
padding=0,
bias=False,
indice_key='spconv_down2'),
build_norm_layer(norm_cfg, self.output_channels)[1],
nn.ReLU())
def forward(self, voxel_features, coors, batch_size):
"""Forward of SparseUNet
Args:
voxel_features (torch.float32): shape [N, C]
coors (torch.int32): shape [N, 4](batch_idx, z_idx, y_idx, x_idx)
batch_size (int): batch size
Returns:
dict: backbone features
"""
coors = coors.int()
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coors,
self.sparse_shape,
batch_size)
x = self.conv_input(input_sp_tensor)
encode_features = []
for encoder_layer in self.encoder_layers:
x = encoder_layer(x)
encode_features.append(x)
# for detection head
# [200, 176, 5] -> [200, 176, 2]
out = self.conv_out(encode_features[-1])
spatial_features = out.dense()
N, C, D, H, W = spatial_features.shape
spatial_features = spatial_features.view(N, C * D, H, W)
# for segmentation head, with output shape:
# [400, 352, 11] <- [200, 176, 5]
# [800, 704, 21] <- [400, 352, 11]
# [1600, 1408, 41] <- [800, 704, 21]
# [1600, 1408, 41] <- [1600, 1408, 41]
decode_features = []
x = encode_features[-1]
for i in range(self.stage_num, 0, -1):
x = self.decoder_layer_forward(encode_features[i - 1], x,
getattr(self, f'lateral_layer{i}'),
getattr(self, f'merge_layer{i}'),
getattr(self, f'upsample_layer{i}'))
decode_features.append(x)
seg_features = decode_features[-1].features
ret = dict(
spatial_features=spatial_features, seg_features=seg_features)
return ret
def decoder_layer_forward(self, x_lateral, x_bottom, lateral_layer,
merge_layer, upsample_layer):
"""Forward of upsample and residual block.
Args:
x_lateral (SparseConvTensor): lateral tensor
x_bottom (SparseConvTensor): feature from bottom layer
lateral_layer (SparseBasicBlock): convolution for lateral tensor
merge_layer (SparseSequential): convolution for merging features
upsample_layer (SparseSequential): convolution for upsampling
Returns:
SparseConvTensor: upsampled feature
"""
x = lateral_layer(x_lateral)
x.features = torch.cat((x_bottom.features, x.features), dim=1)
x_merge = merge_layer(x)
x = self.reduce_channel(x, x_merge.features.shape[1])
x.features = x_merge.features + x.features
x = upsample_layer(x)
return x
@staticmethod
def reduce_channel(x, out_channels):
"""reduce channel for element-wise addition.
Args:
x (SparseConvTensor): x.features (N, C1)
out_channels (int): the number of channel after reduction
Returns:
SparseConvTensor: channel reduced feature
"""
features = x.features
n, in_channels = features.shape
assert (in_channels % out_channels
== 0) and (in_channels >= out_channels)
x.features = features.view(n, out_channels, -1).sum(dim=2)
return x
def pre_act_block(self,
in_channels,
out_channels,
kernel_size,
indice_key=None,
stride=1,
padding=0,
conv_type='subm',
norm_cfg=None):
"""Make pre activate sparse convolution block.
Args:
in_channels (int): the number of input channels
out_channels (int): the number of out channels
kernel_size (int): kernel size of convolution
indice_key (str): the indice key used for sparse tensor
stride (int): the stride of convolution
padding (int or list[int]): the padding number of input
conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
norm_cfg (dict): config of normalization layer
Returns:
spconv.SparseSequential: pre activate sparse convolution block.
"""
# TODO: use ConvModule to encapsulate
assert conv_type in ['subm', 'spconv', 'inverseconv']
if conv_type == 'subm':
m = spconv.SparseSequential(
build_norm_layer(norm_cfg, in_channels)[1],
nn.ReLU(inplace=True),
spconv.SubMConv3d(
in_channels,
out_channels,
kernel_size,
padding=padding,
bias=False,
indice_key=indice_key))
elif conv_type == 'spconv':
m = spconv.SparseSequential(
build_norm_layer(norm_cfg, in_channels)[1],
nn.ReLU(inplace=True),
spconv.SparseConv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
indice_key=indice_key))
elif conv_type == 'inverseconv':
m = spconv.SparseSequential(
build_norm_layer(norm_cfg, in_channels)[1],
nn.ReLU(inplace=True),
spconv.SparseInverseConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key))
else:
raise NotImplementedError
return m
def post_act_block(self,
in_channels,
out_channels,
kernel_size,
indice_key,
stride=1,
padding=0,
conv_type='subm',
norm_cfg=None):
"""Make post activate sparse convolution block.
Args:
in_channels (int): the number of input channels
out_channels (int): the number of out channels
kernel_size (int): kernel size of convolution
indice_key (str): the indice key used for sparse tensor
stride (int): the stride of convolution
padding (int or list[int]): the padding number of input
conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
norm_cfg (dict[str]): config of normalization layer
Returns:
spconv.SparseSequential: post activate sparse convolution block.
"""
# TODO: use ConvModule to encapsulate
assert conv_type in ['subm', 'spconv', 'inverseconv']
if conv_type == 'subm':
m = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
elif conv_type == 'spconv':
m = spconv.SparseSequential(
spconv.SparseConv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
elif conv_type == 'inverseconv':
m = spconv.SparseSequential(
spconv.SparseInverseConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
else:
raise NotImplementedError
return m
def make_encoder_layers(self, make_block, norm_cfg, in_channels):
"""make encoder layers using sparse convs
Args:
make_block (method): a bounded function to build blocks
norm_cfg (dict[str]): config of normalization layer
in_channels (int): the number of encoder input channels
Returns:
int: the number of encoder output channels
"""
self.encoder_layers = spconv.SparseSequential()
for i, blocks in enumerate(self.encoder_channels):
blocks_list = []
for j, out_channels in enumerate(tuple(blocks)):
padding = tuple(self.encoder_paddings[i])[j]
# each stage started with a spconv layer
# except the first stage
if i != 0 and j == 0:
blocks_list.append(
make_block(
in_channels,
out_channels,
3,
norm_cfg=norm_cfg,
stride=2,
padding=padding,
indice_key=f'spconv{i + 1}',
conv_type='spconv'))
else:
blocks_list.append(
make_block(
in_channels,
out_channels,
3,
norm_cfg=norm_cfg,
padding=padding,
indice_key=f'subm{i + 1}'))
in_channels = out_channels
stage_name = f'encoder_layer{i + 1}'
stage_layers = spconv.SparseSequential(*blocks_list)
self.encoder_layers.add_module(stage_name, stage_layers)
return out_channels
def make_decoder_layers(self, make_block, norm_cfg, in_channels):
"""make decoder layers using sparse convs
Args:
make_block (method): a bounded function to build blocks
norm_cfg (dict[str]): config of normalization layer
in_channels (int): the number of encoder input channels
Returns:
int: the number of encoder output channels
"""
block_num = len(self.decoder_channels)
for i, block_channels in enumerate(self.decoder_channels):
paddings = self.decoder_paddings[i]
setattr(
self, f'lateral_layer{block_num - i}',
SparseBasicBlock(
in_channels,
block_channels[0],
conv_cfg=dict(
type='SubMConv3d', indice_key=f'subm{block_num - i}'),
norm_cfg=norm_cfg))
setattr(
self, f'merge_layer{block_num - i}',
make_block(
in_channels * 2,
block_channels[1],
3,
norm_cfg=norm_cfg,
padding=paddings[0],
indice_key=f'subm{block_num - i}'))
if block_num - i != 1:
setattr(
self, f'upsample_layer{block_num - i}',
make_block(
in_channels,
block_channels[2],
3,
norm_cfg=norm_cfg,
padding=paddings[1],
indice_key=f'spconv{block_num - i}',
conv_type='inverseconv'))
else:
# use submanifold conv instead of inverse conv
# in the last block
setattr(
self, f'upsample_layer{block_num - i}',
make_block(
in_channels,
block_channels[2],
3,
norm_cfg=norm_cfg,
padding=paddings[1],
indice_key='subm1',
conv_type='subm'))
in_channels = block_channels[2]
...@@ -2,12 +2,29 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version, ...@@ -2,12 +2,29 @@ from mmdet.ops import (RoIAlign, SigmoidFocalLoss, get_compiler_version,
get_compiling_cuda_version, nms, roi_align, get_compiling_cuda_version, nms, roi_align,
sigmoid_focal_loss) sigmoid_focal_loss)
from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d from .norm import NaiveSyncBatchNorm1d, NaiveSyncBatchNorm2d
from .sparse_block import (SparseBasicBlock, SparseBasicBlockV0,
SparseBottleneck, SparseBottleneckV0)
from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization from .voxel import DynamicScatter, Voxelization, dynamic_scatter, voxelization
__all__ = [ __all__ = [
'nms', 'soft_nms', 'RoIAlign', 'roi_align', 'get_compiler_version', 'nms',
'get_compiling_cuda_version', 'build_conv_layer', 'NaiveSyncBatchNorm1d', 'soft_nms',
'NaiveSyncBatchNorm2d', 'batched_nms', 'Voxelization', 'voxelization', 'RoIAlign',
'dynamic_scatter', 'DynamicScatter', 'sigmoid_focal_loss', 'roi_align',
'SigmoidFocalLoss' 'get_compiler_version',
'get_compiling_cuda_version',
'build_conv_layer',
'NaiveSyncBatchNorm1d',
'NaiveSyncBatchNorm2d',
'batched_nms',
'Voxelization',
'voxelization',
'dynamic_scatter',
'DynamicScatter',
'sigmoid_focal_loss',
'SigmoidFocalLoss',
'SparseBasicBlockV0',
'SparseBottleneckV0',
'SparseBasicBlock',
'SparseBottleneck',
] ]
...@@ -4,12 +4,14 @@ from . import roiaware_pool3d_ext ...@@ -4,12 +4,14 @@ from . import roiaware_pool3d_ext
def points_in_boxes_gpu(points, boxes): def points_in_boxes_gpu(points, boxes):
""" """Find points that are in boxes (CUDA)
Args: Args:
points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate points (torch.Tensor): [B, M, 3], [x, y, z] in LiDAR coordinate
boxes (torch.Tensor): [B, T, 7], boxes (torch.Tensor): [B, T, 7],
num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate, num_valid_boxes <= T, [x, y, z, w, l, h, ry] in LiDAR coordinate,
(x, y, z) is the bottom center (x, y, z) is the bottom center
Returns: Returns:
box_idxs_of_pts (torch.Tensor): (B, M), default background = -1 box_idxs_of_pts (torch.Tensor): (B, M), default background = -1
""" """
...@@ -27,14 +29,20 @@ def points_in_boxes_gpu(points, boxes): ...@@ -27,14 +29,20 @@ def points_in_boxes_gpu(points, boxes):
def points_in_boxes_cpu(points, boxes): def points_in_boxes_cpu(points, boxes):
""" """Find points that are in boxes (CPU)
Note: Currently, the output of this function is different from that of
points_in_boxes_gpu.
Args: Args:
points (torch.Tensor): [npoints, 3] points (torch.Tensor): [npoints, 3]
boxes (torch.Tensor): [N, 7], in LiDAR coordinate, boxes (torch.Tensor): [N, 7], in LiDAR coordinate,
(x, y, z) is the bottom center (x, y, z) is the bottom center
Returns: Returns:
point_indices (torch.Tensor): (N, npoints) point_indices (torch.Tensor): (N, npoints)
""" """
# TODO: Refactor this function as a CPU version of points_in_boxes_gpu
assert boxes.shape[1] == 7 assert boxes.shape[1] == 7
assert points.shape[1] == 3 assert points.shape[1] == 3
......
...@@ -10,7 +10,8 @@ class RoIAwarePool3d(nn.Module): ...@@ -10,7 +10,8 @@ class RoIAwarePool3d(nn.Module):
def __init__(self, out_size, max_pts_per_voxel=128, mode='max'): def __init__(self, out_size, max_pts_per_voxel=128, mode='max'):
super().__init__() super().__init__()
""" """RoIAwarePool3d module
Args: Args:
out_size (int or tuple): n or [n1, n2, n3] out_size (int or tuple): n or [n1, n2, n3]
max_pts_per_voxel (int): m max_pts_per_voxel (int): m
...@@ -23,12 +24,14 @@ class RoIAwarePool3d(nn.Module): ...@@ -23,12 +24,14 @@ class RoIAwarePool3d(nn.Module):
self.mode = pool_method_map[mode] self.mode = pool_method_map[mode]
def forward(self, rois, pts, pts_feature): def forward(self, rois, pts, pts_feature):
""" """RoIAwarePool3d module forward
Args: Args:
rois (torch.Tensor): [N, 7],in LiDAR coordinate, rois (torch.Tensor): [N, 7],in LiDAR coordinate,
(x, y, z) is the bottom center of rois (x, y, z) is the bottom center of rois
pts (torch.Tensor): [npoints, 3] pts (torch.Tensor): [npoints, 3]
pts_feature (torch.Tensor): [npoints, C] pts_feature (torch.Tensor): [npoints, C]
Returns: Returns:
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
""" """
...@@ -43,7 +46,8 @@ class RoIAwarePool3dFunction(Function): ...@@ -43,7 +46,8 @@ class RoIAwarePool3dFunction(Function):
@staticmethod @staticmethod
def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel, def forward(ctx, rois, pts, pts_feature, out_size, max_pts_per_voxel,
mode): mode):
""" """RoIAwarePool3d function forward
Args: Args:
rois (torch.Tensor): [N, 7], in LiDAR coordinate, rois (torch.Tensor): [N, 7], in LiDAR coordinate,
(x, y, z) is the bottom center of rois (x, y, z) is the bottom center of rois
...@@ -52,6 +56,7 @@ class RoIAwarePool3dFunction(Function): ...@@ -52,6 +56,7 @@ class RoIAwarePool3dFunction(Function):
out_size (int or tuple): n or [n1, n2, n3] out_size (int or tuple): n or [n1, n2, n3]
max_pts_per_voxel (int): m max_pts_per_voxel (int): m
mode (int): 0 (max pool) or 1 (average pool) mode (int): 0 (max pool) or 1 (average pool)
Returns: Returns:
pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C] pooled_features (torch.Tensor): [N, out_x, out_y, out_z, C]
""" """
...@@ -84,11 +89,12 @@ class RoIAwarePool3dFunction(Function): ...@@ -84,11 +89,12 @@ class RoIAwarePool3dFunction(Function):
@staticmethod @staticmethod
def backward(ctx, grad_out): def backward(ctx, grad_out):
""" """RoIAwarePool3d function forward
Args: Args:
grad_out: [N, out_x, out_y, out_z, C] grad_out (torch.Tensor): [N, out_x, out_y, out_z, C]
Returns: Returns:
grad_in: [npoints, C] grad_in (torch.Tensor): [npoints, C]
""" """
ret = ctx.roiaware_pool3d_for_backward ret = ctx.roiaware_pool3d_for_backward
pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret pts_idx_of_voxels, argmax, mode, num_pts, num_channels = ret
......
from torch import nn
import mmdet3d.ops.spconv as spconv
from mmdet.models.backbones.resnet import BasicBlock, Bottleneck
from mmdet.ops import build_norm_layer
from mmdet.ops.conv import conv_cfg
conv_cfg.update({'SubMConv3d': spconv.SubMConv3d})
def conv3x3(in_planes, out_planes, stride=1, indice_key=None):
"""3x3 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 3x3 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=3,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
def conv1x1(in_planes, out_planes, stride=1, indice_key=None):
"""1x1 submanifold sparse convolution with padding.
Args:
in_planes (int): the number of input channels
out_planes (int): the number of output channels
stride (int): the stride of convolution
indice_key (str): the indice key used for sparse tensor
Returns:
spconv.conv.SubMConv3d: 1x1 submanifold sparse convolution ops
"""
# TODO: deprecate this class
return spconv.SubMConv3d(
in_planes,
out_planes,
kernel_size=1,
stride=stride,
padding=1,
bias=False,
indice_key=indice_key)
class SparseBasicBlockV0(spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv3x3(inplanes, planes, stride, indice_key=indice_key)
norm_name1, norm_layer1 = build_norm_layer(norm_cfg, planes)
self.bn1 = norm_layer1
self.relu = nn.ReLU()
self.conv2 = conv3x3(planes, planes, indice_key=indice_key)
norm_name2, norm_layer2 = build_norm_layer(norm_cfg, planes)
self.bn2 = norm_layer2
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneckV0(spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
indice_key=None,
norm_fn=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
# TODO: deprecate this class
super().__init__()
self.conv1 = conv1x1(inplanes, planes, indice_key=indice_key)
self.bn1 = norm_fn(planes)
self.conv2 = conv3x3(planes, planes, stride, indice_key=indice_key)
self.bn2 = norm_fn(planes)
self.conv3 = conv1x1(
planes, planes * self.expansion, indice_key=indice_key)
self.bn3 = norm_fn(planes * self.expansion)
self.relu = nn.ReLU()
self.downsample = downsample
self.stride = stride
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBottleneck(Bottleneck, spconv.SparseModule):
expansion = 4
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse bottleneck block for PartA^2.
Bottleneck block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
Bottleneck.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
out = self.conv1(x)
out.features = self.bn1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.bn2(out.features)
out.features = self.relu(out.features)
out = self.conv3(out)
out.features = self.bn3(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
class SparseBasicBlock(BasicBlock, spconv.SparseModule):
expansion = 1
def __init__(self,
inplanes,
planes,
stride=1,
downsample=None,
conv_cfg=None,
norm_cfg=None):
"""Sparse basic block for PartA^2.
Sparse basic block implemented with submanifold sparse convolution.
"""
spconv.SparseModule.__init__(self)
BasicBlock.__init__(
self,
inplanes,
planes,
stride=stride,
downsample=downsample,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg)
def forward(self, x):
identity = x.features
assert x.features.dim() == 2, f'x.features.dim()={x.features.dim()}'
out = self.conv1(x)
out.features = self.norm1(out.features)
out.features = self.relu(out.features)
out = self.conv2(out)
out.features = self.norm2(out.features)
if self.downsample is not None:
identity = self.downsample(x)
out.features += identity
out.features = self.relu(out.features)
return out
import copy
from os.path import dirname, exists, join
import pytest
import torch
def _get_config_directory():
""" Find the predefined detector config directory """
try:
# Assume we are running in the source mmdetection repo
repo_dpath = dirname(dirname(__file__))
except NameError:
# For IPython development when this __file__ is not defined
import mmdet
repo_dpath = dirname(dirname(mmdet.__file__))
config_dpath = join(repo_dpath, 'configs')
if not exists(config_dpath):
raise Exception('Cannot find config path')
return config_dpath
def _get_config_module(fname):
"""
Load a configuration as a python module
"""
from mmcv import Config
config_dpath = _get_config_directory()
config_fpath = join(config_dpath, fname)
config_mod = Config.fromfile(config_fpath)
return config_mod
def _get_head_cfg(fname):
"""
Grab configs necessary to create a bbox_head. These are deep copied to
allow for safe modification of parameters without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
bbox_head = model.bbox_head
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
return bbox_head
def _get_rpn_head_cfg(fname):
"""
Grab configs necessary to create a rpn_head. These are deep copied to allow
for safe modification of parameters without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
rpn_head = model.rpn_head
rpn_head.update(train_cfg=train_cfg.rpn)
rpn_head.update(test_cfg=test_cfg.rpn)
return rpn_head, train_cfg.rpn_proposal
def test_second_head_loss():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
bbox_head_cfg = _get_head_cfg(
'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(bbox_head_cfg)
self.cuda()
assert isinstance(self.conv_cls, torch.nn.modules.conv.Conv2d)
assert self.conv_cls.in_channels == 512
assert self.conv_cls.out_channels == 18
assert self.conv_reg.out_channels == 42
assert self.conv_dir_cls.out_channels == 12
# test forward
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
assert cls_score[0].shape == torch.Size([2, 18, 200, 176])
assert bbox_pred[0].shape == torch.Size([2, 42, 200, 176])
assert dir_cls_preds[0].shape == torch.Size([2, 12, 200, 176])
# test loss
gt_bboxes = list(
torch.tensor(
[[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
dtype=torch.float32).cuda())
gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert losses['loss_cls_3d'][0] > 0
assert losses['loss_bbox_3d'][0] > 0
assert losses['loss_dir_3d'][0] > 0
# test empty ground truth case
gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
gt_labels = list(torch.empty((2, 0)).cuda())
empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert empty_gt_losses['loss_cls_3d'][0] > 0
assert empty_gt_losses['loss_bbox_3d'][0] == 0
assert empty_gt_losses['loss_dir_3d'][0] == 0
def test_second_head_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
bbox_head_cfg = _get_head_cfg(
'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(bbox_head_cfg)
self.cuda()
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
cls_score[0] -= 1.5 # too many positive samples may cause cuda oom
result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
input_metas)
assert (result_list[0]['scores'] > 0.3).all()
def test_parta2_rpnhead_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
'kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(rpn_head_cfg)
self.cuda()
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
cls_score[0] -= 1.5 # too many positive samples may cause cuda oom
result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
input_metas, proposal_cfg)
assert result_list[0]['scores'].shape == torch.Size([512])
assert result_list[0]['label_preds'].shape == torch.Size([512])
assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
assert result_list[0]['box3d_lidar'].shape == torch.Size([512, 7])
...@@ -19,23 +19,10 @@ def test_RoIAwarePool3d(): ...@@ -19,23 +19,10 @@ def test_RoIAwarePool3d():
dtype=torch.float32).cuda( dtype=torch.float32).cuda(
) # boxes (m, 7) with bottom center in lidar coordinate ) # boxes (m, 7) with bottom center in lidar coordinate
pts = torch.tensor( pts = torch.tensor(
[ [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
[1, 2, 3.3], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
[1.2, 2.5, 3.0], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
[0.8, 2.1, 3.5], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
[1.6, 2.6, 3.6],
[0.8, 1.2, 3.9],
[-9.2, 21.0, 18.2],
[3.8, 7.9, 6.3],
[4.7, 3.5, -12.2],
[3.8, 7.6, -2],
[-10.6, -12.9, -20],
[-16, -18, 9],
[-21.3, -52, -5],
[0, 0, 0],
[6, 7, 8],
[-2, -3, -4],
],
dtype=torch.float32).cuda() # points (n, 3) in lidar coordinate dtype=torch.float32).cuda() # points (n, 3) in lidar coordinate
pts_feature = pts.clone() pts_feature = pts.clone()
...@@ -83,23 +70,10 @@ def test_points_in_boxes_cpu(): ...@@ -83,23 +70,10 @@ def test_points_in_boxes_cpu():
dtype=torch.float32 dtype=torch.float32
) # boxes (m, 7) with bottom center in lidar coordinate ) # boxes (m, 7) with bottom center in lidar coordinate
pts = torch.tensor( pts = torch.tensor(
[ [[1, 2, 3.3], [1.2, 2.5, 3.0], [0.8, 2.1, 3.5], [1.6, 2.6, 3.6],
[1, 2, 3.3], [0.8, 1.2, 3.9], [-9.2, 21.0, 18.2], [3.8, 7.9, 6.3],
[1.2, 2.5, 3.0], [4.7, 3.5, -12.2], [3.8, 7.6, -2], [-10.6, -12.9, -20], [-16, -18, 9],
[0.8, 2.1, 3.5], [-21.3, -52, -5], [0, 0, 0], [6, 7, 8], [-2, -3, -4]],
[1.6, 2.6, 3.6],
[0.8, 1.2, 3.9],
[-9.2, 21.0, 18.2],
[3.8, 7.9, 6.3],
[4.7, 3.5, -12.2],
[3.8, 7.6, -2],
[-10.6, -12.9, -20],
[-16, -18, 9],
[-21.3, -52, -5],
[0, 0, 0],
[6, 7, 8],
[-2, -3, -4],
],
dtype=torch.float32) # points (n, 3) in lidar coordinate dtype=torch.float32) # points (n, 3) in lidar coordinate
point_indices = points_in_boxes_cpu(points=pts, boxes=boxes) point_indices = points_in_boxes_cpu(points=pts, boxes=boxes)
...@@ -109,9 +83,3 @@ def test_points_in_boxes_cpu(): ...@@ -109,9 +83,3 @@ def test_points_in_boxes_cpu():
dtype=torch.int32) dtype=torch.int32)
assert point_indices.shape == torch.Size([2, 15]) assert point_indices.shape == torch.Size([2, 15])
assert (point_indices == expected_point_indices).all() assert (point_indices == expected_point_indices).all()
if __name__ == '__main__':
test_points_in_boxes_cpu()
test_points_in_boxes_gpu()
test_RoIAwarePool3d()
import torch
import mmdet3d.ops.spconv as spconv
from mmdet3d.ops import SparseBasicBlock, SparseBasicBlockV0
def test_SparseUNet():
from mmdet3d.models.middle_encoders.sparse_unet import SparseUNet
self = SparseUNet(
in_channels=4, output_shape=[41, 1600, 1408], pre_act=False)
# test encoder layers
assert len(self.encoder_layers) == 4
assert self.encoder_layers.encoder_layer1[0][0].in_channels == 16
assert self.encoder_layers.encoder_layer1[0][0].out_channels == 16
assert isinstance(self.encoder_layers.encoder_layer1[0][0],
spconv.conv.SubMConv3d)
assert isinstance(self.encoder_layers.encoder_layer1[0][1],
torch.nn.modules.batchnorm.BatchNorm1d)
assert isinstance(self.encoder_layers.encoder_layer1[0][2],
torch.nn.modules.activation.ReLU)
assert self.encoder_layers.encoder_layer4[0][0].in_channels == 64
assert self.encoder_layers.encoder_layer4[0][0].out_channels == 64
assert isinstance(self.encoder_layers.encoder_layer4[0][0],
spconv.conv.SparseConv3d)
assert isinstance(self.encoder_layers.encoder_layer4[2][0],
spconv.conv.SubMConv3d)
# test decoder layers
assert isinstance(self.lateral_layer1, SparseBasicBlock)
assert isinstance(self.merge_layer1[0], spconv.conv.SubMConv3d)
assert isinstance(self.upsample_layer1[0], spconv.conv.SubMConv3d)
assert isinstance(self.upsample_layer2[0], spconv.conv.SparseInverseConv3d)
voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
[6.8162713, -2.480431, -1.3616394, 0.36],
[11.643568, -4.744306, -1.3580885, 0.16],
[23.482342, 6.5036807, 0.5806964, 0.35]],
dtype=torch.float32) # n, point_features
coordinates = torch.tensor(
[[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
[1, 35, 930, 469]],
dtype=torch.int32) # n, 4(batch, ind_x, ind_y, ind_z)
unet_ret_dict = self.forward(voxel_features, coordinates, 2)
seg_features = unet_ret_dict['seg_features']
spatial_features = unet_ret_dict['spatial_features']
assert seg_features.shape == torch.Size([4, 16])
assert spatial_features.shape == torch.Size([2, 256, 200, 176])
def test_SparseBasicBlock():
voxel_features = torch.tensor([[6.56126, 0.9648336, -1.7339306, 0.315],
[6.8162713, -2.480431, -1.3616394, 0.36],
[11.643568, -4.744306, -1.3580885, 0.16],
[23.482342, 6.5036807, 0.5806964, 0.35]],
dtype=torch.float32) # n, point_features
coordinates = torch.tensor(
[[0, 12, 819, 131], [0, 16, 750, 136], [1, 16, 705, 232],
[1, 35, 930, 469]],
dtype=torch.int32) # n, 4(batch, ind_x, ind_y, ind_z)
# test v0
self = SparseBasicBlockV0(
4,
4,
indice_key='subm0',
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
[41, 1600, 1408], 2)
out_features = self(input_sp_tensor)
assert out_features.features.shape == torch.Size([4, 4])
# test
input_sp_tensor = spconv.SparseConvTensor(voxel_features, coordinates,
[41, 1600, 1408], 2)
self = SparseBasicBlock(
4,
4,
conv_cfg=dict(type='SubMConv3d', indice_key='subm1'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01))
# test conv and bn layer
assert isinstance(self.conv1, spconv.conv.SubMConv3d)
assert self.conv1.in_channels == 4
assert self.conv1.out_channels == 4
assert isinstance(self.conv2, spconv.conv.SubMConv3d)
assert self.conv2.out_channels == 4
assert self.conv2.out_channels == 4
assert self.bn1.eps == 1e-3
assert self.bn1.momentum == 0.01
out_features = self(input_sp_tensor)
assert out_features.features.shape == torch.Size([4, 4])
import argparse import argparse
import os.path as osp import os.path as osp
import tools.data_converter.indoor_converter as indoor
import tools.data_converter.kitti_converter as kitti import tools.data_converter.kitti_converter as kitti
import tools.data_converter.nuscenes_converter as nuscenes_converter import tools.data_converter.nuscenes_converter as nuscenes_converter
import tools.data_converter.scannet_converter as scannet
import tools.data_converter.sunrgbd_converter as sunrgbd
from tools.data_converter.create_gt_database import create_groundtruth_database from tools.data_converter.create_gt_database import create_groundtruth_database
...@@ -46,11 +45,11 @@ def nuscenes_data_prep(root_path, ...@@ -46,11 +45,11 @@ def nuscenes_data_prep(root_path,
def scannet_data_prep(root_path, info_prefix, out_dir): def scannet_data_prep(root_path, info_prefix, out_dir):
scannet.create_scannet_info_file(root_path, info_prefix, out_dir) indoor.create_indoor_info_file(root_path, info_prefix, out_dir)
def sunrgbd_data_prep(root_path, info_prefix, out_dir): def sunrgbd_data_prep(root_path, info_prefix, out_dir):
sunrgbd.create_sunrgbd_info_file(root_path, info_prefix, out_dir) indoor.create_indoor_info_file(root_path, info_prefix, out_dir)
parser = argparse.ArgumentParser(description='Data converter arg parser') parser = argparse.ArgumentParser(description='Data converter arg parser')
......
import os
import mmcv
from tools.data_converter.scannet_data_utils import ScanNetData
from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
def create_indoor_info_file(data_path,
pkl_prefix='sunrgbd',
save_path=None,
use_v1=False):
"""Create indoor information file.
Get information of the raw data and save it to the pkl file.
Args:
data_path (str): Path of the data.
pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
save_path (str): Path of the pkl to be saved. Default: None.
use_v1 (bool): Whether to use v1. Default: False.
"""
assert os.path.exists(data_path)
assert pkl_prefix in ['sunrgbd', 'scannet']
save_path = data_path if save_path is None else save_path
assert os.path.exists(save_path)
train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')
val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
if pkl_prefix == 'sunrgbd':
train_dataset = SUNRGBDData(
root_path=data_path, split='train', use_v1=use_v1)
val_dataset = SUNRGBDData(
root_path=data_path, split='val', use_v1=use_v1)
else:
train_dataset = ScanNetData(root_path=data_path, split='train')
val_dataset = ScanNetData(root_path=data_path, split='val')
infos_train = train_dataset.get_infos(has_label=True)
mmcv.dump(infos_train, train_filename, 'pkl')
print(f'{pkl_prefix} info train file is saved to {train_filename}')
infos_val = val_dataset.get_infos(has_label=True)
mmcv.dump(infos_val, val_filename, 'pkl')
print(f'{pkl_prefix} info val file is saved to {val_filename}')
import os
import pickle
from pathlib import Path
from tools.data_converter.scannet_data_utils import ScanNetData
def create_scannet_info_file(data_path, pkl_prefix='scannet', save_path=None):
assert os.path.exists(data_path)
if save_path is None:
save_path = Path(data_path)
else:
save_path = Path(save_path)
assert os.path.exists(save_path)
train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
train_dataset = ScanNetData(root_path=data_path, split='train')
val_dataset = ScanNetData(root_path=data_path, split='val')
scannet_infos_train = train_dataset.get_scannet_infos(has_label=True)
with open(train_filename, 'wb') as f:
pickle.dump(scannet_infos_train, f)
print('Scannet info train file is saved to %s' % train_filename)
scannet_infos_val = val_dataset.get_scannet_infos(has_label=True)
with open(val_filename, 'wb') as f:
pickle.dump(scannet_infos_val, f)
print('Scannet info val file is saved to %s' % val_filename)
if __name__ == '__main__':
create_scannet_info_file(
data_path='./data/scannet', save_path='./data/scannet')
import concurrent.futures as futures
import os import os
import mmcv
import numpy as np import numpy as np
class ScanNetData(object): class ScanNetData(object):
''' Load and parse object data ''' """ScanNet Data
Generate scannet infos for scannet_converter
Args:
root_path (str): Root path of the raw data
split (str): Set split type of the data. Default: 'train'.
"""
def __init__(self, root_path, split='train'): def __init__(self, root_path, split='train'):
self.root_dir = root_path self.root_dir = root_path
...@@ -25,28 +34,37 @@ class ScanNetData(object): ...@@ -25,28 +34,37 @@ class ScanNetData(object):
for i, nyu40id in enumerate(list(self.cat_ids)) for i, nyu40id in enumerate(list(self.cat_ids))
} }
assert split in ['train', 'val', 'test'] assert split in ['train', 'val', 'test']
split_dir = os.path.join(self.root_dir, 'meta_data', split_file = os.path.join(self.root_dir, 'meta_data',
'scannetv2_%s.txt' % split) f'scannetv2_{split}.txt')
self.sample_id_list = [x.strip() for x in open(split_dir).readlines() mmcv.check_file_exist(split_file)
] if os.path.exists(split_dir) else None self.sample_id_list = mmcv.list_from_file(split_file)
def __len__(self): def __len__(self):
return len(self.sample_id_list) return len(self.sample_id_list)
def get_box_label(self, idx): def get_box_label(self, idx):
box_file = os.path.join(self.root_dir, 'scannet_train_instance_data', box_file = os.path.join(self.root_dir, 'scannet_train_instance_data',
'%s_bbox.npy' % idx) f'{idx}_bbox.npy')
assert os.path.exists(box_file) assert os.path.exists(box_file)
return np.load(box_file) return np.load(box_file)
def get_scannet_infos(self, def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
num_workers=4, """Get data infos.
has_label=True,
sample_id_list=None): This method gets information from the raw data.
import concurrent.futures as futures
Args:
num_workers (int): Number of threads to be used. Default: 4.
has_label (bool): Whether the data has label. Default: True.
sample_id_list (List[int]): Index list of the sample.
Default: None.
Returns:
infos (List[dict]): Information of the raw data.
"""
def process_single_scene(sample_idx): def process_single_scene(sample_idx):
print('%s sample_idx: %s' % (self.split, sample_idx)) print(f'{self.split} sample_idx: {sample_idx}')
info = dict() info = dict()
pc_info = {'num_features': 6, 'lidar_idx': sample_idx} pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
info['point_cloud'] = pc_info info['point_cloud'] = pc_info
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment