"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "02d83c9ff1b93f2c6f9c94f9369b3e4bc1ba8ce7"
Unverified Commit 36f658a5 authored by ChaimZhu's avatar ChaimZhu Committed by GitHub
Browse files

[Feature] Add SMOKE detector and Configs (#975)

* add smoke detector and cfgs

* fix init

* fix input_modality
parent 80f372e1
dataset_type = 'KittiMonoDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(use_lidar=False, use_camera=True)
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='Resize', img_scale=(1242, 375), keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
'centers2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
img_scale=(1242, 375),
flip=False,
transforms=[
dict(type='RandomFlip3D'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img']),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train_mono3d.coco.json',
info_file=data_root + 'kitti_infos_train.pkl',
img_prefix=data_root,
classes=class_names,
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
box_type_3d='Camera'),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
info_file=data_root + 'kitti_infos_val.pkl',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val_mono3d.coco.json',
info_file=data_root + 'kitti_infos_val.pkl',
img_prefix=data_root,
classes=class_names,
pipeline=test_pipeline,
modality=input_modality,
test_mode=True,
box_type_3d='Camera'))
evaluation = dict(interval=2)
model = dict(
type='SMOKEMono3D',
backbone=dict(
type='DLANet',
depth=34,
in_channels=3,
norm_cfg=dict(type='GN', num_groups=32),
init_cfg=dict(
type='Pretrained',
checkpoint='http://dl.yf.io/dla/models/imagenet/dla34-ba72cf86.pth'
)),
neck=dict(
type='DLANeck',
in_channels=[16, 32, 64, 128, 256, 512],
start_level=2,
end_level=5,
norm_cfg=dict(type='GN', num_groups=32)),
bbox_head=dict(
type='SMOKEMono3DHead',
num_classes=3,
in_channels=64,
dim_channel=[3, 4, 5],
ori_channel=[6, 7],
stacked_convs=0,
feat_channels=64,
use_direction_classifier=False,
diff_rad_by_sin=False,
pred_attrs=False,
pred_velo=False,
dir_offset=0,
strides=None,
group_reg_dims=(8, ),
cls_branch=(256, ),
reg_branch=((256, ), ),
num_attrs=0,
bbox_code_size=7,
dir_branch=(),
attr_branch=(),
bbox_coder=dict(
type='SMOKECoder',
base_depth=(28.01, 16.32),
base_dims=((0.88, 1.73, 0.67), (1.78, 1.70, 0.58), (3.88, 1.63,
1.53)),
code_size=7),
loss_cls=dict(type='GaussianFocalLoss', loss_weight=1.0),
loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1 / 300),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_attr=None,
conv_bias=True,
dcn_on_last_conv=False),
train_cfg=None,
test_cfg=dict(topK=100, local_maximum_kernel=3, max_per_img=100))
_base_ = [
'../_base_/datasets/kitti-mono3d.py', '../_base_/models/smoke.py',
'../_base_/default_runtime.py'
]
# optimizer
optimizer = dict(type='Adam', lr=2.5e-4)
optimizer_config = dict(grad_clip=None)
lr_config = dict(policy='step', warmup=None, step=[50])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=72)
log_config = dict(interval=10)
find_unused_parameters = True
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='RandomShiftScale', shift_scale=(0.2, 0.4), aug_prob=0.3),
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'gt_bboxes_3d', 'gt_labels_3d',
'centers2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D'),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 384),
flip=False,
transforms=[
dict(type='AffineResize', img_scale=(1280, 384), down_ratio=4),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img']),
])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(pipeline=train_pipeline),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
......@@ -11,6 +11,7 @@ from .mvx_faster_rcnn import DynamicMVXFasterRCNN, MVXFasterRCNN
from .mvx_two_stage import MVXTwoStageDetector
from .parta2 import PartA2
from .single_stage_mono3d import SingleStageMono3DDetector
from .smoke_mono3d import SMOKEMono3D
from .ssd3dnet import SSD3DNet
from .votenet import VoteNet
from .voxelnet import VoxelNet
......@@ -19,5 +20,5 @@ __all__ = [
'Base3DDetector', 'VoxelNet', 'DynamicVoxelNet', 'MVXTwoStageDetector',
'DynamicMVXFasterRCNN', 'MVXFasterRCNN', 'PartA2', 'VoteNet', 'H3DNet',
'CenterPoint', 'SSD3DNet', 'ImVoteNet', 'SingleStageMono3DDetector',
'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet'
'FCOSMono3D', 'ImVoxelNet', 'GroupFree3DNet', 'SMOKEMono3D'
]
from mmdet.models.builder import DETECTORS
from .single_stage_mono3d import SingleStageMono3DDetector
@DETECTORS.register_module()
class SMOKEMono3D(SingleStageMono3DDetector):
r"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection.
"""
def __init__(self,
backbone,
neck,
bbox_head,
train_cfg=None,
test_cfg=None,
pretrained=None):
super(SMOKEMono3D, self).__init__(backbone, neck, bbox_head, train_cfg,
test_cfg, pretrained)
......@@ -470,3 +470,71 @@ def test_imvoxelnet():
assert boxes_3d.tensor.shape[1] == 7
assert scores_3d.shape[0] >= 0
assert labels_3d.shape[0] >= 0
def test_smoke():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
_setup_seed(0)
smoke_cfg = _get_detector_cfg(
'smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py')
self = build_detector(smoke_cfg).cuda()
imgs = torch.rand([1, 3, 384, 1280], dtype=torch.float32).cuda()
gt_bboxes = [
torch.Tensor([[563.63122442, 175.02195182, 614.81298184, 224.97763099],
[480.89676358, 179.86272635, 511.53017463, 202.54645962],
[541.48322272, 175.73767011, 564.55208966, 193.95009791],
[329.51448848, 176.14566789, 354.24670848,
213.82599081]]).cuda()
]
gt_bboxes_3d = [
CameraInstance3DBoxes(
torch.Tensor([[-0.69, 1.69, 25.01, 3.20, 1.61, 1.66, -1.59],
[-7.43, 1.88, 47.55, 3.70, 1.40, 1.51, 1.55],
[-4.71, 1.71, 60.52, 4.05, 1.46, 1.66, 1.56],
[-12.63, 1.88, 34.09, 1.95, 1.72, 0.50,
1.54]]).cuda(),
box_dim=7)
]
gt_labels = [torch.tensor([0, 0, 0, 1]).cuda()]
gt_labels_3d = gt_labels
centers2d = [
torch.Tensor([[589.6528477, 198.3862263], [496.8143155, 190.75967182],
[553.40528354, 184.53785991],
[342.23690317, 194.44298819]]).cuda()
]
# depths is actually not used in smoke head loss computation
depths = [torch.rand([3], dtype=torch.float32).cuda()]
attr_labels = None
img_metas = [
dict(
cam_intrinsic=[[721.5377, 0., 609.5593, 0.],
[0., 721.5377, 172.854, 0.], [0., 0., 1., 0.],
[0., 0., 0., 1.]],
scale_factor=np.array([1., 1., 1., 1.], dtype=np.float32),
pad_shape=[384, 1280],
trans_mat=np.array([[0.25, 0., 0.], [0., 0.25, 0], [0., 0., 1.]],
dtype=np.float32),
affine_aug=False,
box_type_3d=CameraInstance3DBoxes)
]
# test forward_train
losses = self.forward_train(imgs, img_metas, gt_bboxes, gt_labels,
gt_bboxes_3d, gt_labels_3d, centers2d, depths,
attr_labels)
assert losses['loss_cls'] >= 0
assert losses['loss_bbox'] >= 0
# test simple_test
with torch.no_grad():
results = self.simple_test(imgs, img_metas)
boxes_3d = results[0]['img_bbox']['boxes_3d']
scores_3d = results[0]['img_bbox']['scores_3d']
labels_3d = results[0]['img_bbox']['labels_3d']
assert boxes_3d.tensor.shape[0] >= 0
assert boxes_3d.tensor.shape[1] == 7
assert scores_3d.shape[0] >= 0
assert labels_3d.shape[0] >= 0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment