Commit 99db60dd authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'feature_parta2_head' into 'master'

Feature parta2 head

See merge request open-mmlab/mmdet.3d!12
parents 2a7c24bb 21a8c818
......@@ -116,7 +116,7 @@ input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
use_camera=True,
)
db_sampler = dict(
root_path=data_root,
......
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(type='VoxelFeatureExtractorV3'),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False,
),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
num_filters=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
num_upsample_filters=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[0.6, 0.8, 1.73], [0.6, 1.76, 1.73], [1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
)
test_cfg = dict(
rpn=dict(
nms_pre=1024,
max_per_img=100,
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.7,
score_thr=0))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
)
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0, 0, 0],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.39269908, 0.39269908]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0.2, 0.2, 0.2]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
target_lr=1e-5,
as_ratio=True)
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl', port=29502)
log_level = 'INFO'
work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
from .boxvelo_head import Anchor3DVeloHead
from .parta2_rpn_head import PartA2RPNHead
from .second_head import SECONDHead
__all__ = ['Anchor3DVeloHead', 'SECONDHead']
__all__ = ['Anchor3DVeloHead', 'SECONDHead', 'PartA2RPNHead']
from __future__ import division
import numpy as np
import torch
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS
from .second_head import SECONDHead
@HEADS.register_module
class PartA2RPNHead(SECONDHead):
"""rpn head for PartA2
Args:
class_name (list[str]): name of classes (TODO: to be removed)
in_channels (int): Number of channels in the input feature map.
train_cfg (dict): train configs
test_cfg (dict): test configs
feat_channels (int): Number of channels of the feature map.
use_direction_classifier (bool): Whether to add a direction classifier.
encode_bg_as_zeros (bool): Whether to use sigmoid of softmax
(TODO: to be removed)
box_code_size (int): The size of box code.
anchor_generator(dict): Config dict of anchor generator.
assigner_per_size (bool): Whether to do assignment for each separate
anchor size.
assign_per_class (bool): Whether to do assignment for each class.
diff_rad_by_sin (bool): Whether to change the difference into sin
difference for box regression loss.
dir_offset (float | int): The offset of BEV rotation angles
(TODO: may be moved into box coder)
dirlimit_offset (float | int): The limited range of BEV rotation angles
(TODO: may be moved into box coder)
box_coder (dict): Config dict of box coders.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss.
""" # npqa:W293
def __init__(self,
class_name,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
encode_bg_as_zeros=False,
box_code_size=7,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
custom_values=[],
reshape_out=False),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=0,
dir_limit_offset=1,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
super().__init__(class_name, in_channels, train_cfg, test_cfg,
feat_channels, use_direction_classifier,
encode_bg_as_zeros, box_code_size, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
dir_offset, dir_limit_offset, bbox_coder, loss_cls,
loss_bbox, loss_dir)
def get_bboxes(self,
cls_scores,
bbox_preds,
dir_cls_preds,
input_metas,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds)
assert len(cls_scores) == len(dir_cls_preds)
num_levels = len(cls_scores)
featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]
device = cls_scores[0].device
mlvl_anchors = self.anchor_generator.grid_anchors(
featmap_sizes, device=device)
mlvl_anchors = [
anchor.reshape(-1, self.box_code_size) for anchor in mlvl_anchors
]
result_list = []
for img_id in range(len(input_metas)):
cls_score_list = [
cls_scores[i][img_id].detach() for i in range(num_levels)
]
bbox_pred_list = [
bbox_preds[i][img_id].detach() for i in range(num_levels)
]
dir_cls_pred_list = [
dir_cls_preds[i][img_id].detach() for i in range(num_levels)
]
input_meta = input_metas[img_id]
proposals = self.get_bboxes_single(cls_score_list, bbox_pred_list,
dir_cls_pred_list, mlvl_anchors,
input_meta, cfg, rescale)
result_list.append(proposals)
return result_list
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
cfg,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_max_scores = []
mlvl_label_pred = []
mlvl_dir_scores = []
mlvl_cls_score = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = cfg.get('nms_pre', -1)
if self.use_sigmoid_cls:
max_scores, pred_labels = scores.max(dim=1)
else:
max_scores, pred_labels = scores[:, :-1].max(dim=1)
# get topk
if nms_pre > 0 and scores.shape[0] > nms_pre:
topk_scores, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
max_scores = topk_scores
cls_score = cls_score[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
pred_labels = pred_labels[topk_inds]
bboxes = self.bbox_coder.decode(anchors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_max_scores.append(max_scores)
mlvl_cls_score.append(cls_score)
mlvl_label_pred.append(pred_labels)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
mlvl_max_scores = torch.cat(mlvl_max_scores)
mlvl_label_pred = torch.cat(mlvl_label_pred)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
mlvl_cls_score = torch.cat(
mlvl_cls_score) # shape [k, num_class] before sigmoid
score_thr = cfg.get('score_thr', 0)
result = self.class_agnostic_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred,
mlvl_cls_score, mlvl_dir_scores,
score_thr, cfg.nms_post, cfg)
result.update(dict(sample_idx=input_meta['sample_idx']))
return result
def class_agnostic_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_max_scores, mlvl_label_pred, mlvl_cls_score,
mlvl_dir_scores, score_thr, max_num, cfg):
bboxes = []
scores = []
labels = []
dir_scores = []
cls_scores = []
score_thr_inds = mlvl_max_scores > score_thr
_scores = mlvl_max_scores[score_thr_inds]
_bboxes_for_nms = mlvl_bboxes_for_nms[score_thr_inds, :]
if cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[score_thr_inds, :]
_mlvl_dir_scores = mlvl_dir_scores[score_thr_inds]
_mlvl_label_pred = mlvl_label_pred[score_thr_inds]
_mlvl_cls_score = mlvl_cls_score[score_thr_inds]
if len(selected) > 0:
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
labels.append(_mlvl_label_pred[selected])
cls_scores.append(_mlvl_cls_score[selected])
dir_scores.append(_mlvl_dir_scores[selected])
dir_rot = box_torch_ops.limit_period(
bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset,
np.pi)
bboxes[-1][..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores[-1].to(bboxes[-1].dtype))
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
cls_scores = torch.cat(cls_scores, dim=0)
labels = torch.cat(labels, dim=0)
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
cls_scores = cls_scores[inds]
dir_scores = dir_scores[inds]
return dict(
box3d_lidar=bboxes.cpu(),
scores=scores.cpu(),
label_preds=labels.cpu(),
cls_preds=cls_scores.cpu(
) # raw scores with shape [max_num, cls_num]
)
else:
return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0,
self.box_code_size]).cpu(),
scores=mlvl_bboxes.new_zeros([0]).cpu(),
label_preds=mlvl_bboxes.new_zeros([0]).cpu(),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]
]).cpu())
......@@ -3,10 +3,11 @@ from .mvx_faster_rcnn import (DynamicMVXFasterRCNN, DynamicMVXFasterRCNNV2,
DynamicMVXFasterRCNNV3)
from .mvx_single_stage import MVXSingleStageDetector
from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2
from .voxelnet import DynamicVoxelNet, VoxelNet
__all__ = [
'BaseDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXSingleStageDetector',
'MVXTwoStageDetector', 'DynamicMVXFasterRCNN', 'DynamicMVXFasterRCNNV2',
'DynamicMVXFasterRCNNV3'
'DynamicMVXFasterRCNNV3', 'PartA2'
]
import torch
import torch.nn.functional as F
from mmdet3d.ops import Voxelization
from mmdet.models import DETECTORS, TwoStageDetector
from .. import builder
@DETECTORS.register_module
class PartA2(TwoStageDetector):
def __init__(self,
voxel_layer,
voxel_encoder,
middle_encoder,
backbone,
neck=None,
rpn_head=None,
roi_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(PartA2, self).__init__(
backbone=backbone,
neck=neck,
rpn_head=rpn_head,
roi_head=roi_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
pretrained=pretrained,
)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = builder.build_voxel_encoder(voxel_encoder)
self.middle_encoder = builder.build_middle_encoder(middle_encoder)
def extract_feat(self, points, img_meta):
voxels, num_points, coors = self.voxelize(points)
voxel_dict = dict(voxels=voxels, num_points=num_points, coors=coors)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
feats_dict = self.middle_encoder(voxel_features, coors, batch_size)
x = self.backbone(feats_dict['spatial_features'])
if self.with_neck:
neck_feats = self.neck(x)
feats_dict.update({'neck_feats': neck_feats})
return feats_dict, voxel_dict
@torch.no_grad()
def voxelize(self, points):
voxels, coors, num_points = [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def forward_train(self,
points,
img_meta,
gt_bboxes_3d,
gt_labels_3d,
gt_bboxes_ignore=None,
proposals=None):
# TODO: complete it
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
losses = dict()
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
rpn_loss_inputs = rpn_outs + (gt_bboxes_3d, gt_labels_3d, img_meta)
rpn_losses = self.rpn_head.loss(
*rpn_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
losses.update(rpn_losses)
proposal_cfg = self.train_cfg.get('rpn_proposal',
self.test_cfg.rpn)
proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals # noqa: F841
return losses
def forward_test(self, **kwargs):
return self.simple_test(**kwargs)
def forward(self, return_loss=True, **kwargs):
if return_loss:
return self.forward_train(**kwargs)
else:
return self.forward_test(**kwargs)
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
proposals=None,
rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
# TODO: complete it
if proposals is None:
proposal_list = self.simple_test_rpn(feats_dict['neck_feats'],
img_meta, self.test_cfg.rpn)
else:
proposal_list = proposals
return self.roi_head.simple_test(
feats_dict, proposal_list, img_meta, rescale=rescale)
import copy
from os.path import dirname, exists, join
import pytest
import torch
def _get_config_directory():
""" Find the predefined detector config directory """
try:
# Assume we are running in the source mmdetection repo
repo_dpath = dirname(dirname(__file__))
except NameError:
# For IPython development when this __file__ is not defined
import mmdet
repo_dpath = dirname(dirname(mmdet.__file__))
config_dpath = join(repo_dpath, 'configs')
if not exists(config_dpath):
raise Exception('Cannot find config path')
return config_dpath
def _get_config_module(fname):
"""
Load a configuration as a python module
"""
from mmcv import Config
config_dpath = _get_config_directory()
config_fpath = join(config_dpath, fname)
config_mod = Config.fromfile(config_fpath)
return config_mod
def _get_head_cfg(fname):
"""
Grab configs necessary to create a bbox_head. These are deep copied to
allow for safe modification of parameters without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
bbox_head = model.bbox_head
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
return bbox_head
def _get_rpn_head_cfg(fname):
"""
Grab configs necessary to create a rpn_head. These are deep copied to allow
for safe modification of parameters without influencing other tests.
"""
import mmcv
config = _get_config_module(fname)
model = copy.deepcopy(config.model)
train_cfg = mmcv.Config(copy.deepcopy(config.train_cfg))
test_cfg = mmcv.Config(copy.deepcopy(config.test_cfg))
rpn_head = model.rpn_head
rpn_head.update(train_cfg=train_cfg.rpn)
rpn_head.update(test_cfg=test_cfg.rpn)
return rpn_head, train_cfg.rpn_proposal
def test_second_head_loss():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
bbox_head_cfg = _get_head_cfg(
'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(bbox_head_cfg)
self.cuda()
assert isinstance(self.conv_cls, torch.nn.modules.conv.Conv2d)
assert self.conv_cls.in_channels == 512
assert self.conv_cls.out_channels == 18
assert self.conv_reg.out_channels == 42
assert self.conv_dir_cls.out_channels == 12
# test forward
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
assert cls_score[0].shape == torch.Size([2, 18, 200, 176])
assert bbox_pred[0].shape == torch.Size([2, 42, 200, 176])
assert dir_cls_preds[0].shape == torch.Size([2, 12, 200, 176])
# test loss
gt_bboxes = list(
torch.tensor(
[[[6.4118, -3.4305, -1.7291, 1.7033, 3.4693, 1.6197, -0.9091]],
[[16.9107, 9.7925, -1.9201, 1.6097, 3.2786, 1.5307, -2.4056]]],
dtype=torch.float32).cuda())
gt_labels = list(torch.tensor([[0], [1]], dtype=torch.int64).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert losses['loss_cls_3d'][0] > 0
assert losses['loss_bbox_3d'][0] > 0
assert losses['loss_dir_3d'][0] > 0
# test empty ground truth case
gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
gt_labels = list(torch.empty((2, 0)).cuda())
empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert empty_gt_losses['loss_cls_3d'][0] > 0
assert empty_gt_losses['loss_bbox_3d'][0] == 0
assert empty_gt_losses['loss_dir_3d'][0] == 0
def test_second_head_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
bbox_head_cfg = _get_head_cfg(
'kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(bbox_head_cfg)
self.cuda()
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
cls_score[0] -= 1.5 # too many positive samples may cause cuda oom
result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
input_metas)
assert (result_list[0]['scores'] > 0.3).all()
def test_parta2_rpnhead_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
'kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(rpn_head_cfg)
self.cuda()
feats = list()
feats.append(torch.rand([2, 512, 200, 176], dtype=torch.float32).cuda())
input_metas = [{
'sample_idx': 1234
}, {
'sample_idx': 2345
}] # fake input_metas
(cls_score, bbox_pred, dir_cls_preds) = self.forward(feats)
# test get_boxes
cls_score[0] -= 1.5 # too many positive samples may cause cuda oom
result_list = self.get_bboxes(cls_score, bbox_pred, dir_cls_preds,
input_metas, proposal_cfg)
assert result_list[0]['scores'].shape == torch.Size([512])
assert result_list[0]['label_preds'].shape == torch.Size([512])
assert result_list[0]['cls_preds'].shape == torch.Size([512, 3])
assert result_list[0]['box3d_lidar'].shape == torch.Size([512, 7])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment