Commit bd73d3b9 authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

[refactor]MVXTwoStage & Centerpoint

parent 360c27f9
voxel_size = [0.1, 0.1, 0.2]
model = dict(
type='CenterPoint',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_voxel_layer=dict(
max_num_points=10, voxel_size=voxel_size, max_voxels=(90000, 120000)),
pts_voxel_encoder=dict(type='HardSimpleVFE', num_features=5),
......@@ -54,8 +55,9 @@ model = dict(
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(
type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
......
voxel_size = [0.2, 0.2, 8]
model = dict(
type='CenterPoint',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_voxel_layer=dict(
max_num_points=20, voxel_size=voxel_size, max_voxels=(30000, 40000)),
pts_voxel_encoder=dict(
......@@ -53,8 +54,9 @@ model = dict(
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
loss_cls=dict(type='mmdet.GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(
type='mmdet.L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
# model training and testing settings
train_cfg=dict(
......
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
type='AdamW', lr=lr, weight_decay=0.001, betas=(0.95, 0.99)),
clip_grad=dict(max_norm=10, norm_type=2),
)
lr_config = dict(
policy='CosineAnnealing',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
min_lr_ratio=1e-5)
momentum_config = None
runner = dict(type='EpochBasedRunner', max_epochs=40)
param_scheduler = [
dict(type='LinearLR', start_factor=0.1, by_epoch=False, begin=0, end=1000),
dict(
type='CosineAnnealingLR',
begin=0,
T_max=40,
end=40,
by_epoch=True,
eta_min=1e-5)
]
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=40, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
......@@ -9,7 +9,7 @@ class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix = dict(pts='samples/LIDAR_TOP', img='')
model = dict(
pts_voxel_layer=dict(
voxel_size=voxel_size, point_cloud_range=point_cloud_range),
......@@ -96,7 +96,9 @@ train_pipeline = [
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
......@@ -125,16 +127,15 @@ test_pipeline = [
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
train=dict(dataset=dict(pipeline=train_pipeline)),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
train_dataloader = dict(
dataset=dict(
dataset=dict(
pipeline=train_pipeline, metainfo=dict(CLASSES=class_names))))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
......@@ -12,7 +12,7 @@ class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix = dict(pts='samples/LIDAR_TOP', img='')
model = dict(
pts_voxel_layer=dict(point_cloud_range=point_cloud_range),
pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),
......@@ -90,8 +90,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
......@@ -120,13 +121,9 @@ test_pipeline = [
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
......@@ -144,28 +141,31 @@ eval_pipeline = [
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
train=dict(
train_dataloader = dict(
_delete_=True,
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CBGSDataset',
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
metainfo=dict(CLASSES=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
box_type_3d='LiDAR')))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
evaluation = dict(interval=20, pipeline=eval_pipeline)
train_cfg = dict(val_interval=20)
......@@ -12,7 +12,7 @@ class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix = dict(pts='samples/LIDAR_TOP', img='')
model = dict(
pts_voxel_layer=dict(point_cloud_range=point_cloud_range),
pts_voxel_encoder=dict(point_cloud_range=point_cloud_range),
......@@ -91,8 +91,9 @@ train_pipeline = [
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
......@@ -119,13 +120,9 @@ test_pipeline = [
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
dict(type='RandomFlip3D')
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
......@@ -143,28 +140,31 @@ eval_pipeline = [
file_client_args=file_client_args,
pad_empty_sweeps=True,
remove_close=True),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
train=dict(
train_dataloader = dict(
_delete_=True,
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CBGSDataset',
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
metainfo=dict(CLASSES=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')),
val=dict(pipeline=test_pipeline, classes=class_names),
test=dict(pipeline=test_pipeline, classes=class_names))
box_type_3d='LiDAR')))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(CLASSES=class_names)))
evaluation = dict(interval=20, pipeline=eval_pipeline)
train_cfg = dict(val_interval=20)
......@@ -6,8 +6,14 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type='DynamicMVXFasterRCNN',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
mean=[102.9801, 115.9465, 122.7717],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
img_backbone=dict(
type='ResNet',
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
......@@ -16,7 +22,7 @@ model = dict(
norm_eval=True,
style='caffe'),
img_neck=dict(
type='FPN',
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
......@@ -82,34 +88,36 @@ model = dict(
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
......@@ -133,18 +141,14 @@ model = dict(
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
metainfo = dict(CLASSES=class_names)
input_modality = dict(use_lidar=True, use_camera=True)
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='Resize',
img_scale=[(640, 192), (2560, 768)],
multiscale_mode='range',
keep_ratio=True),
type='RandomResize', scale=[(640, 192), (2560, 768)], keep_ratio=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
......@@ -154,12 +158,12 @@ train_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']),
type='Pack3DDetInputs',
keys=[
'points', 'img', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bboxes',
'gt_labels'
])
]
test_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......@@ -170,82 +174,79 @@ test_pipeline = [
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', multiscale_mode='value', keep_ratio=True),
# Temporary solution, fix this after refactor the augtest
dict(type='Resize', scale=0, keep_ratio=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
]),
dict(type='Pack3DDetInputs', keys=['points', 'img'])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(type='LoadImageFromFile'),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
modality = dict(use_lidar=True, use_camera=True)
train_dataloader = dict(
batch_size=2,
num_workers=2,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
modality=modality,
ann_file='kitti_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False,
box_type_3d='LiDAR')),
val=dict(
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
modality=modality,
ann_file='kitti_infos_val.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR'),
test=dict(
box_type_3d='LiDAR'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
ann_file='kitti_infos_val.pkl',
modality=modality,
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR'))
# Training settings
optimizer = dict(weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
evaluation = dict(interval=1, pipeline=eval_pipeline)
optim_wrapper = dict(
optimizer=dict(weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2),
)
val_evaluator = dict(
type='KittiMetric', ann_file='data/kitti/kitti_infos_val.pkl')
test_evaluator = val_evaluator
# You may need to download the model first is the network is unstable
load_from = 'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth' # noqa
# Copyright (c) OpenMMLab. All rights reserved.
from .assigners import AssignResult, BaseAssigner, MaxIoUAssigner
from .assigners import AssignResult, BaseAssigner, Max3DIoUAssigner
# from .bbox_target import bbox_target
from .builder import build_assigner, build_bbox_coder, build_sampler
from .coders import DeltaXYZWLHRBBoxCoder
......@@ -18,7 +18,7 @@ from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
__all__ = [
'BaseSampler', 'AssignResult', 'BaseAssigner', 'MaxIoUAssigner',
'BaseSampler', 'AssignResult', 'BaseAssigner', 'Max3DIoUAssigner',
'PseudoSampler', 'RandomSampler', 'InstanceBalancedPosSampler',
'IoUBalancedNegSampler', 'CombinedSampler', 'SamplingResult',
'DeltaXYZWLHRBBoxCoder', 'BboxOverlapsNearest3D', 'BboxOverlaps3D',
......
# Copyright (c) OpenMMLab. All rights reserved.
from mmdet.core.bbox import AssignResult, BaseAssigner
from .max_3d_iou_assigner import MaxIoUAssigner
from .max_3d_iou_assigner import Max3DIoUAssigner
__all__ = ['BaseAssigner', 'MaxIoUAssigner', 'AssignResult']
__all__ = ['BaseAssigner', 'Max3DIoUAssigner', 'AssignResult']
......@@ -35,6 +35,8 @@ class Det3DDataset(BaseDataset):
- use_camera: bool
- use_lidar: bool
Defaults to `dict(use_lidar=True, use_camera=False)`
default_cam_key (str, optional): The default camera name adopted.
Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
......@@ -65,6 +67,7 @@ class Det3DDataset(BaseDataset):
data_prefix: dict = dict(pts='velodyne', img=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
default_cam_key: str = None,
box_type_3d: dict = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......@@ -84,6 +87,7 @@ class Det3DDataset(BaseDataset):
if key not in modality:
modality[key] = False
self.modality = modality
self.default_cam_key = default_cam_key
assert self.modality['use_lidar'] or self.modality['use_camera'], (
'Please specify the `modality` (`use_lidar` '
f', `use_camera`) for {self.__class__.__name__}')
......@@ -233,6 +237,20 @@ class Det3DDataset(BaseDataset):
cam_prefix = self.data_prefix.get('img', '')
img_info['img_path'] = osp.join(cam_prefix,
img_info['img_path'])
if self.default_cam_key is not None:
info['img_path'] = info['images'][
self.default_cam_key]['img_path']
if 'lidar2cam' in info['images'][self.default_cam_key]:
info['lidar2cam'] = np.array(
info['images'][self.default_cam_key]['lidar2cam'])
if 'cam2img' in info['images'][self.default_cam_key]:
info['cam2img'] = np.array(
info['images'][self.default_cam_key]['cam2img'])
if 'lidar2img' in info['images'][self.default_cam_key]:
info['lidar2img'] = np.array(
info['images'][self.default_cam_key]['lidar2img'])
else:
info['lidar2img'] = info['cam2img'] @ info['lidar2cam']
if not self.test_mode:
# used in traing
......
......@@ -49,6 +49,7 @@ class KittiDataset(Det3DDataset):
ann_file: str,
pipeline: List[Union[dict, Callable]] = [],
modality: Optional[dict] = dict(use_lidar=True),
default_cam_key='CAM2',
box_type_3d: str = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
......@@ -61,6 +62,7 @@ class KittiDataset(Det3DDataset):
ann_file=ann_file,
pipeline=pipeline,
modality=modality,
default_cam_key=default_cam_key,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
test_mode=test_mode,
......
......@@ -111,9 +111,9 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`DetDataSample`]): Each item contains
the meta information of each image and corresponding
annotations.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each image and
corresponding annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
......
# Copyright (c) OpenMMLab. All rights reserved.
import copy
from typing import Dict, List, Optional, Tuple, Union
import torch
from mmcv.cnn import ConvModule, build_conv_layer
from mmcv.runner import BaseModule, force_fp32
from torch import nn
from mmengine import InstanceData
from torch import Tensor, nn
from mmdet3d.core import (circle_nms, draw_heatmap_gaussian, gaussian_radius,
xywhr2xyxyr)
from mmdet3d.core import (Det3DDataSample, circle_nms, draw_heatmap_gaussian,
gaussian_radius, xywhr2xyxyr)
from mmdet3d.core.post_processing import nms_bev
from mmdet3d.models import builder
from mmdet3d.models.builder import build_loss
from mmdet3d.models.utils import clip_sigmoid
from mmdet3d.registry import MODELS
from mmdet.core import build_bbox_coder, multi_apply
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core import multi_apply
@MODELS.register_module()
......@@ -53,7 +54,6 @@ class SeparateHead(BaseModule):
self.init_bias = init_bias
for head in self.heads:
classes, num_conv = self.heads[head]
conv_layers = []
c_in = in_channels
for i in range(num_conv - 1):
......@@ -250,8 +250,6 @@ class CenterHead(BaseModule):
feature map. Default: [128].
tasks (list[dict], optional): Task information including class number
and class names. Default: None.
train_cfg (dict, optional): Train-time configs. Default: None.
test_cfg (dict, optional): Test-time configs. Default: None.
bbox_coder (dict, optional): Bbox coder configs. Default: None.
common_heads (dict, optional): Conv information for common heads.
Default: dict().
......@@ -269,32 +267,45 @@ class CenterHead(BaseModule):
Default: dict(type='Conv2d')
norm_cfg (dict, optional): Config of norm layer.
Default: dict(type='BN2d').
bias (str, optional): Type of bias. Default: 'auto'.
bias (str): Type of bias. Default: 'auto'.
norm_bbox (bool): Whether normalize the bbox predictions.
Defaults to True.
train_cfg (dict, optional): Train-time configs. Default: None.
test_cfg (dict, optional): Test-time configs. Default: None.
init_cfg (dict, optional): Config for initialization.
"""
def __init__(self,
in_channels=[128],
tasks=None,
train_cfg=None,
test_cfg=None,
bbox_coder=None,
common_heads=dict(),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(
type='L1Loss', reduction='none', loss_weight=0.25),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
share_conv_channel=64,
num_heatmap_convs=2,
conv_cfg=dict(type='Conv2d'),
norm_cfg=dict(type='BN2d'),
bias='auto',
norm_bbox=True,
init_cfg=None):
in_channels: Union[List[int], int] = [128],
tasks: Optional[List[dict]] = None,
bbox_coder: Optional[dict] = None,
common_heads: dict = dict(),
loss_cls: dict = dict(
type='mmdet.GaussianFocalLoss', reduction='mean'),
loss_bbox: dict = dict(
type='mmdet.L1Loss', reduction='none', loss_weight=0.25),
separate_head: dict = dict(
type='mmdet.SeparateHead',
init_bias=-2.19,
final_kernel=3),
share_conv_channel: int = 64,
num_heatmap_convs: int = 2,
conv_cfg: dict = dict(type='Conv2d'),
norm_cfg: dict = dict(type='BN2d'),
bias: str = 'auto',
norm_bbox: bool = True,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
**kwargs):
assert init_cfg is None, 'To prevent abnormal initialization ' \
'behavior, init_cfg is not allowed to be set'
super(CenterHead, self).__init__(init_cfg=init_cfg)
super(CenterHead, self).__init__(init_cfg=init_cfg, **kwargs)
# TODO we should rename this variable,
# for example num_classes_per_task ?
# {'num_class': 2, 'class_names': ['pedestrian', 'traffic_cone']}]
# TODO seems num_classes is useless
num_classes = [len(t['class_names']) for t in tasks]
self.class_names = [t['class_names'] for t in tasks]
self.train_cfg = train_cfg
......@@ -303,9 +314,9 @@ class CenterHead(BaseModule):
self.num_classes = num_classes
self.norm_bbox = norm_bbox
self.loss_cls = build_loss(loss_cls)
self.loss_bbox = build_loss(loss_bbox)
self.bbox_coder = build_bbox_coder(bbox_coder)
self.loss_cls = MODELS.build(loss_cls)
self.loss_bbox = MODELS.build(loss_bbox)
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.num_anchor_per_locs = [n for n in num_classes]
self.fp16_enabled = False
......@@ -328,7 +339,7 @@ class CenterHead(BaseModule):
in_channels=share_conv_channel, heads=heads, num_cls=num_cls)
self.task_heads.append(builder.build_head(separate_head))
def forward_single(self, x):
def forward_single(self, x: Tensor) -> dict:
"""Forward function for CenterPoint.
Args:
......@@ -347,7 +358,7 @@ class CenterHead(BaseModule):
return ret_dicts
def forward(self, feats):
def forward(self, feats: List[Tensor]) -> Tuple[List[Tensor]]:
"""Forward pass.
Args:
......@@ -384,7 +395,10 @@ class CenterHead(BaseModule):
feat = feat.view(-1, dim)
return feat
def get_targets(self, gt_bboxes_3d, gt_labels_3d):
def get_targets(
self,
batch_gt_instances_3d: List[InstanceData],
) -> Tuple[List[Tensor]]:
"""Generate targets.
How each output is transformed:
......@@ -399,24 +413,24 @@ class CenterHead(BaseModule):
[ tensor0, tensor1, tensor2, ... ]
Args:
gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground
truth gt boxes.
gt_labels_3d (list[torch.Tensor]): Labels of boxes.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and\
``labels_3d`` attributes.
Returns:
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
the following results in order.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the
position of the valid boxes.
- list[torch.Tensor]: Masks indicating which
boxes are valid.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the
position of the valid boxes.
- list[torch.Tensor]: Masks indicating which
boxes are valid.
"""
heatmaps, anno_boxes, inds, masks = multi_apply(
self.get_targets_single, gt_bboxes_3d, gt_labels_3d)
self.get_targets_single, batch_gt_instances_3d)
# Transpose heatmaps
heatmaps = list(map(list, zip(*heatmaps)))
heatmaps = [torch.stack(hms_) for hms_ in heatmaps]
......@@ -431,12 +445,14 @@ class CenterHead(BaseModule):
masks = [torch.stack(masks_) for masks_ in masks]
return heatmaps, anno_boxes, inds, masks
def get_targets_single(self, gt_bboxes_3d, gt_labels_3d):
def get_targets_single(self,
gt_instances_3d: InstanceData) -> Tuple[Tensor]:
"""Generate training targets for a single sample.
Args:
gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.
gt_labels_3d (torch.Tensor): Labels of boxes.
gt_instances_3d (:obj:`InstanceData`): Gt_instances of
single data sample. It usually includes
``bboxes_3d`` and ``labels_3d`` attributes.
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
......@@ -449,6 +465,8 @@ class CenterHead(BaseModule):
- list[torch.Tensor]: Masks indicating which boxes
are valid.
"""
gt_labels_3d = gt_instances_3d.labels_3d
gt_bboxes_3d = gt_instances_3d.bboxes_3d
device = gt_labels_3d.device
gt_bboxes_3d = torch.cat(
(gt_bboxes_3d.gravity_center, gt_bboxes_3d.tensor[:, 3:]),
......@@ -569,21 +587,48 @@ class CenterHead(BaseModule):
inds.append(ind)
return heatmaps, anno_boxes, inds, masks
def loss(self, pts_feats: List[Tensor],
batch_data_samples: List[Det3DDataSample], *args,
**kwargs) -> Dict[str, Tensor]:
"""Forward function for point cloud branch.
Args:
pts_feats (list[torch.Tensor]): Features of point cloud branch
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, .
Returns:
dict: Losses of each branch.
"""
outs = self(pts_feats)
batch_gt_instance_3d = []
for data_sample in batch_data_samples:
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
losses = self.loss_by_feat(outs, batch_gt_instance_3d)
return losses
@force_fp32(apply_to=('preds_dicts'))
def loss(self, gt_bboxes_3d, gt_labels_3d, preds_dicts, **kwargs):
def loss_by_feat(self, preds_dicts: Tuple[List[dict]],
batch_gt_instances_3d: List[InstanceData], *args,
**kwargs):
"""Loss function for CenterHead.
Args:
gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground
truth gt boxes.
gt_labels_3d (list[torch.Tensor]): Labels of boxes.
preds_dicts (dict): Output of forward function.
preds_dicts (tuple[list[dict]]): Prediction results of
multiple tasks. The outer tuple indicate different
tasks head, and the internal list indicate different
FPN level.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and\
``labels_3d`` attributes.
Returns:
dict[str:torch.Tensor]: Loss of heatmap and bbox of each task.
dict[str,torch.Tensor]: Loss of heatmap and bbox of each task.
"""
heatmaps, anno_boxes, inds, masks = self.get_targets(
gt_bboxes_3d, gt_labels_3d)
batch_gt_instances_3d)
loss_dict = dict()
for task_id, preds_dict in enumerate(preds_dicts):
# heatmap focal loss
......@@ -619,15 +664,62 @@ class CenterHead(BaseModule):
loss_dict[f'task{task_id}.loss_bbox'] = loss_bbox
return loss_dict
def get_bboxes(self, preds_dicts, img_metas, img=None, rescale=False):
def predict(self,
pts_feats: Dict[str, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
rescale=True,
**kwargs) -> List[InstanceData]:
"""
Args:
pts_feats (dict): Point features..
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
rescale (bool): Whether rescale the resutls to
the original scale.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict = self(pts_feats)
batch_size = len(batch_data_samples)
batch_input_metas = []
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
results_list = self.predict_by_feat(
preds_dict, batch_input_metas, rescale=rescale, **kwargs)
return results_list
def predict_by_feat(self, preds_dicts: Tuple[List[dict]],
batch_input_metas: List[dict], *args,
**kwargs) -> List[InstanceData]:
"""Generate bboxes from bbox head predictions.
Args:
preds_dicts (tuple[list[dict]]): Prediction results.
img_metas (list[dict]): Point cloud and image's meta info.
preds_dicts (tuple[list[dict]]): Prediction results of
multiple tasks. The outer tuple indicate different
tasks head, and the internal list indicate different
FPN level.
batch_input_metas (list[dict]): Meta info of multiple
inputs.
Returns:
list[dict]: Decoded bbox, scores and labels after nms.
list[:obj:`InstanceData`]: Instance prediction
results of each sample after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`LiDARInstance3DBoxes`): Prediction
of bboxes, contains a tensor with shape
(num_instances, 7) or (num_instances, 9), and
the last 2 dimensions of 9 is
velocity.
"""
rets = []
for task_id, preds_dict in enumerate(preds_dicts):
......@@ -689,18 +781,20 @@ class CenterHead(BaseModule):
rets.append(
self.get_task_detections(num_class_with_bg,
batch_cls_preds, batch_reg_preds,
batch_cls_labels, img_metas))
batch_cls_labels,
batch_input_metas))
# Merge branches results
num_samples = len(rets[0])
ret_list = []
for i in range(num_samples):
temp_instances = InstanceData()
for k in rets[0][i].keys():
if k == 'bboxes':
bboxes = torch.cat([ret[i][k] for ret in rets])
bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5
bboxes = img_metas[i]['box_type_3d'](
bboxes = batch_input_metas[i]['box_type_3d'](
bboxes, self.bbox_coder.code_size)
elif k == 'scores':
scores = torch.cat([ret[i][k] for ret in rets])
......@@ -710,7 +804,10 @@ class CenterHead(BaseModule):
rets[j][i][k] += flag
flag += num_class
labels = torch.cat([ret[i][k].int() for ret in rets])
ret_list.append([bboxes, scores, labels])
temp_instances.bboxes_3d = bboxes
temp_instances.scores_3d = scores
temp_instances.labels_3d = labels
ret_list.append(temp_instances)
return ret_list
def get_task_detections(self, num_class_with_bg, batch_cls_preds,
......
......@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Union
import numpy as np
import torch
from mmcv.ops import furthest_point_sample
from mmcv.runner import BaseModule, force_fp32
from mmcv.runner import BaseModule
from mmengine import ConfigDict, InstanceData
from torch.nn import functional as F
......@@ -308,7 +308,6 @@ class VoteHead(BaseModule):
results.update(decode_res)
return results
@force_fp32(apply_to=('bbox_preds', ))
def loss_by_feat(
self,
points: List[torch.Tensor],
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
from mmengine import InstanceData
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.utils import (ForwardResults, InstanceList, OptConfigType,
OptMultiConfig, OptSampleList, SampleList)
......@@ -38,7 +40,7 @@ class Base3DDetector(BaseDetector):
- "tensor": Forward the whole network and return tensor or tuple of
tensor without any post-processing, same as a common nn.Module.
- "predict": Forward and return the predictions, which are fully
processed to a list of :obj:`DetDataSample`.
processed to a list of :obj:`Det3DDataSample`.
- "loss": Forward and return a dict of losses according to the given
inputs and data samples.
......@@ -53,8 +55,8 @@ class Base3DDetector(BaseDetector):
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor): Image tensor has shape (B, C, H, W).
data_samples (list[:obj:`DetDataSample`],
list[list[:obj:`DetDataSample`]], optional): The
data_samples (list[:obj:`Det3DDataSample`],
list[list[:obj:`Det3DDataSample`]], optional): The
annotation data of every samples. When it is a list[list], the
outer list indicate the test time augmentation, and the
inter list indicate the batch. Otherwise, the list simply
......@@ -65,7 +67,7 @@ class Base3DDetector(BaseDetector):
The return type depends on ``mode``.
- If ``mode="tensor"``, return a tensor or a tuple of tensor.
- If ``mode="predict"``, return a list of :obj:`DetDataSample`.
- If ``mode="predict"``, return a list of :obj:`Det3DDataSample`.
- If ``mode="loss"``, return a dict of tensor.
"""
if mode == 'loss':
......@@ -87,7 +89,11 @@ class Base3DDetector(BaseDetector):
raise RuntimeError(f'Invalid mode "{mode}". '
'Only supports loss, predict and tensor mode')
def convert_to_datasample(self, results_list: InstanceList) -> SampleList:
def convert_to_datasample(
self,
results_list_3d: InstanceList,
results_list_2d: InstanceList = None,
) -> SampleList:
"""Convert results list to `Det3DDataSample`.
Subclasses could override it to be compatible for some multi-modality
......@@ -100,19 +106,35 @@ class Base3DDetector(BaseDetector):
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input. Each Det3DDataSample usually contains
'pred_instances_3d'. And the ``pred_instances_3d`` usually
'pred_instances_3d'. And the ``pred_instances_3d`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
- scores (Tensor): Classification scores of image, has a shape
(num_instance, )
- labels (Tensor): Predict Labels of 2D bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Contains a tensor with shape
(num_instances, 4).
"""
out_results_list = []
for i in range(len(results_list)):
data_sample_list = []
if results_list_2d is None:
results_list_2d = [
InstanceData() for _ in range(len(results_list_3d))
]
for i in range(len(results_list_3d)):
result = Det3DDataSample()
result.pred_instances_3d = results_list[i]
out_results_list.append(result)
return out_results_list
result.pred_instances_3d = results_list_3d[i]
result.pred_instances = results_list_2d[i]
data_sample_list.append(result)
return data_sample_list
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Optional
import torch
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmdet3d.core import merge_aug_bboxes_3d
from mmdet3d.registry import MODELS
from .mvx_two_stage import MVXTwoStageDetector
@MODELS.register_module()
class CenterPoint(MVXTwoStageDetector):
"""Base class of Multi-modality VoxelNet."""
"""Base class of Multi-modality VoxelNet.
Args:
pts_voxel_layer (dict, optional): Point cloud voxelization
layer. Defaults to None.
pts_voxel_encoder (dict, optional): Point voxelization
encoder layer. Defaults to None.
pts_middle_encoder (dict, optional): Middle encoder layer
of points cloud modality. Defaults to None.
pts_fusion_layer (dict, optional): Fusion layer.
Defaults to None.
img_backbone (dict, optional): Backbone of extracting
images feature. Defaults to None.
pts_backbone (dict, optional): Backbone of extracting
points features. Defaults to None.
img_neck (dict, optional): Neck of extracting
image features. Defaults to None.
pts_neck (dict, optional): Neck of extracting
points features. Defaults to None.
pts_bbox_head (dict, optional): Bboxes head of
point cloud modality. Defaults to None.
img_roi_head (dict, optional): RoI head of image
modality. Defaults to None.
img_rpn_head (dict, optional): RPN head of image
modality. Defaults to None.
train_cfg (dict, optional): Train config of model.
Defaults to None.
test_cfg (dict, optional): Train config of model.
Defaults to None.
init_cfg (dict, optional): Initialize config of
model. Defaults to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`Det3DDataPreprocessor`. Defaults to None.
"""
def __init__(self,
pts_voxel_layer=None,
pts_voxel_encoder=None,
pts_middle_encoder=None,
pts_fusion_layer=None,
img_backbone=None,
pts_backbone=None,
img_neck=None,
pts_neck=None,
pts_bbox_head=None,
img_roi_head=None,
img_rpn_head=None,
train_cfg=None,
test_cfg=None,
pretrained=None,
init_cfg=None):
pts_voxel_layer: Optional[dict] = None,
pts_voxel_encoder: Optional[dict] = None,
pts_middle_encoder: Optional[dict] = None,
pts_fusion_layer: Optional[dict] = None,
img_backbone: Optional[dict] = None,
pts_backbone: Optional[dict] = None,
img_neck: Optional[dict] = None,
pts_neck: Optional[dict] = None,
pts_bbox_head: Optional[dict] = None,
img_roi_head: Optional[dict] = None,
img_rpn_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
data_preprocessor: Optional[dict] = None,
**kwargs):
super(CenterPoint,
self).__init__(pts_voxel_layer, pts_voxel_encoder,
pts_middle_encoder, pts_fusion_layer,
img_backbone, pts_backbone, img_neck, pts_neck,
pts_bbox_head, img_roi_head, img_rpn_head,
train_cfg, test_cfg, pretrained, init_cfg)
def extract_pts_feat(self, pts, img_feats, img_metas):
"""Extract features of points."""
if not self.with_pts_bbox:
return None
voxels, num_points, coors = self.voxelize(pts)
voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0] + 1
x = self.pts_middle_encoder(voxel_features, coors, batch_size)
x = self.pts_backbone(x)
if self.with_pts_neck:
x = self.pts_neck(x)
return x
def forward_pts_train(self,
pts_feats,
gt_bboxes_3d,
gt_labels_3d,
img_metas,
gt_bboxes_ignore=None):
"""Forward function for point cloud branch.
Args:
pts_feats (list[torch.Tensor]): Features of point cloud branch
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for
boxes of each sampole
img_metas (list[dict]): Meta information of samples.
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
boxes to be ignored. Defaults to None.
Returns:
dict: Losses of each branch.
"""
outs = self.pts_bbox_head(pts_feats)
loss_inputs = [gt_bboxes_3d, gt_labels_3d, outs]
losses = self.pts_bbox_head.loss(*loss_inputs)
return losses
def simple_test_pts(self, x, img_metas, rescale=False):
"""Test function of point cloud branch."""
outs = self.pts_bbox_head(x)
bbox_list = self.pts_bbox_head.get_bboxes(
outs, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
train_cfg, test_cfg, init_cfg, data_preprocessor,
**kwargs)
# TODO support this
def aug_test_pts(self, feats, img_metas, rescale=False):
"""Test function of point cloud branch with augmentaiton.
......@@ -107,6 +95,7 @@ class CenterPoint(MVXTwoStageDetector):
- scores_3d (torch.Tensor): Scores of predicted boxes.
- labels_3d (torch.Tensor): Labels of predicted boxes.
"""
raise NotImplementedError
# only support aug_test for one sample
outs_list = []
for x, img_meta in zip(feats, img_metas):
......@@ -186,7 +175,9 @@ class CenterPoint(MVXTwoStageDetector):
bbox_list[0][key] = bbox_list[0][key].to('cpu')
return bbox_list[0]
# TODO support this
def aug_test(self, points, img_metas, imgs=None, rescale=False):
raise NotImplementedError
"""Test function with augmentaiton."""
img_feats, pts_feats = self.extract_feats(points, img_metas, imgs)
bbox_list = dict()
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Sequence
import torch
from mmcv.runner import force_fp32
from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS
......@@ -23,7 +25,6 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
super(DynamicMVXFasterRCNN, self).__init__(**kwargs)
@torch.no_grad()
@force_fp32()
def voxelize(self, points):
"""Apply dynamic voxelization to points.
......@@ -46,13 +47,30 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
coors_batch = torch.cat(coors_batch, dim=0)
return points, coors_batch
def extract_pts_feat(self, points, img_feats, img_metas):
"""Extract point features."""
def extract_pts_feat(
self,
points: List[Tensor],
img_feats: Optional[Sequence[Tensor]] = None,
batch_input_metas: Optional[List[dict]] = None
) -> Sequence[Tensor]:
"""Extract features of points.
Args:
points (List[tensor]): Point cloud of multiple inputs.
img_feats (list[Tensor], tuple[tensor], optional): Features from
image backbone.
batch_input_metas (list[dict], optional): The meta information
of multiple samples. Defaults to True.
Returns:
Sequence[tensor]: points features of multiple inputs
from backbone or neck.
"""
if not self.with_pts_bbox:
return None
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.pts_voxel_encoder(
voxels, coors, points, img_feats, img_metas)
voxels, coors, points, img_feats, batch_input_metas)
batch_size = coors[-1, 0] + 1
x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size)
x = self.pts_backbone(x)
......
This diff is collapsed.
......@@ -3,7 +3,7 @@ import torch
from mmcv.cnn import build_norm_layer
from mmcv.ops import DynamicScatter
from mmcv.runner import force_fp32
from torch import nn
from torch import Tensor, nn
from mmdet3d.registry import MODELS
from .. import builder
......@@ -20,13 +20,14 @@ class HardSimpleVFE(nn.Module):
num_features (int, optional): Number of features to use. Default: 4.
"""
def __init__(self, num_features=4):
def __init__(self, num_features: int = 4) -> None:
super(HardSimpleVFE, self).__init__()
self.num_features = num_features
self.fp16_enabled = False
@force_fp32(out_fp16=True)
def forward(self, features, num_points, coors):
def forward(self, features: Tensor, num_points: Tensor, coors: Tensor,
*args, **kwargs) -> Tensor:
"""Forward function.
Args:
......@@ -66,7 +67,7 @@ class DynamicSimpleVFE(nn.Module):
@torch.no_grad()
@force_fp32(out_fp16=True)
def forward(self, features, coors):
def forward(self, features, coors, *args, **kwargs):
"""Forward function.
Args:
......@@ -218,13 +219,14 @@ class DynamicVFE(nn.Module):
center_per_point = voxel_mean[voxel_inds, ...]
return center_per_point
@force_fp32(out_fp16=True)
def forward(self,
features,
coors,
points=None,
img_feats=None,
img_metas=None):
img_metas=None,
*args,
**kwargs):
"""Forward functions.
Args:
......@@ -390,7 +392,9 @@ class HardVFE(nn.Module):
num_points,
coors,
img_feats=None,
img_metas=None):
img_metas=None,
*args,
**kwargs):
"""Forward functions.
Args:
......
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from mmengine import InstanceData
from mmdet3d.core.bbox.assigners import MaxIoUAssigner
from mmdet3d.core.bbox.assigners import Max3DIoUAssigner
from mmdet3d.core.bbox.samplers import IoUNegPiecewiseSampler
def test_iou_piecewise_sampler():
if not torch.cuda.is_available():
pytest.skip()
assigner = MaxIoUAssigner(
assigner = Max3DIoUAssigner(
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
......@@ -27,7 +28,13 @@ def test_iou_piecewise_sampler():
[[0, 0, 0, 10, 10, 9, 0.2], [5, 10, 10, 20, 20, 15, 0.6]],
dtype=torch.float32).cuda()
gt_labels = torch.tensor([1, 1], dtype=torch.int64).cuda()
assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)
gt_instanses = InstanceData()
gt_instanses.bboxes_3d = gt_bboxes
gt_instanses.labels_3d = gt_labels
pred_instaces = InstanceData()
pred_instaces.priors = bboxes
assign_result = assigner.assign(pred_instaces, gt_instanses)
sampler = IoUNegPiecewiseSampler(
num=10,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment