Commit eb1107e4 authored by raojy's avatar raojy
Browse files

fix_mmdetection

parent 7aa442d5
Pipeline #3461 canceled with stages
_base_ = ['./centerpoint_voxel01_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
voxel_size = [0.075, 0.075, 0.2]
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-54, -54.8, -5.0, 54, 53.2, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
model = dict(
data_preprocessor=dict(
voxel_layer=dict(
voxel_size=voxel_size, point_cloud_range=point_cloud_range)),
pts_middle_encoder=dict(sparse_shape=[41, 1440, 1440]),
pts_bbox_head=dict(
bbox_coder=dict(
voxel_size=voxel_size[:2], pc_range=point_cloud_range[:2])),
train_cfg=dict(
pts=dict(
grid_size=[1440, 1440, 40],
voxel_size=voxel_size,
point_cloud_range=point_cloud_range)),
test_cfg=dict(
pts=dict(voxel_size=voxel_size[:2], pc_range=point_cloud_range[:2])))
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
backend_args = None
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
car=5,
truck=5,
bus=5,
trailer=5,
construction_vehicle=5,
traffic_cone=5,
barrier=5,
motorcycle=5,
bicycle=5,
pedestrian=5)),
classes=class_names,
sample_groups=dict(
car=2,
truck=3,
construction_vehicle=7,
bus=4,
trailer=6,
barrier=2,
motorcycle=6,
bicycle=6,
pedestrian=2,
traffic_cone=2),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
dataset=dict(
dataset=dict(
pipeline=train_pipeline, metainfo=dict(classes=class_names))))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
_base_ = ['./centerpoint_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(test_cfg=dict(pts=dict(nms_type='circle')))
_base_ = ['./centerpoint_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(
pts_bbox_head=dict(
separate_head=dict(
type='DCNSeparateHead',
dcn_config=dict(
type='DCN',
in_channels=64,
out_channels=64,
kernel_size=3,
padding=1,
groups=4),
init_bias=-2.19,
final_kernel=3)),
test_cfg=dict(pts=dict(nms_type='circle')))
_base_ = './centerpoint_voxel0075_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py' # noqa: E501
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-54, -54.8, -5.0, 54, 53.2, 3.0]
backend_args = None
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range),
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
_base_ = ['./centerpoint_voxel0075_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(
pts_bbox_head=dict(
separate_head=dict(
type='DCNSeparateHead',
dcn_config=dict(
type='DCN',
in_channels=64,
out_channels=64,
kernel_size=3,
padding=1,
groups=4),
init_bias=-2.19,
final_kernel=3)))
_base_ = \
'./centerpoint_voxel0075_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py'
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-54, -54.8, -5.0, 54, 53.2, 3.0]
backend_args = None
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
_base_ = \
'./centerpoint_voxel0075_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py'
model = dict(test_cfg=dict(pts=dict(use_rotate_nms=True, max_num=500)))
point_cloud_range = [-54, -54, -5.0, 54, 54, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-54, -54.8, -5.0, 54, 53.2, 3.0]
backend_args = None
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=[0.95, 1.0, 1.05],
# Add double-flip augmentation
flip=True,
pcd_horizontal_flip=True,
pcd_vertical_flip=True,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D', sync_2d=False),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))
_base_ = [
'../_base_/datasets/nus-3d.py',
'../_base_/models/centerpoint_voxel01_second_secfpn_nus.py',
'../_base_/schedules/cyclic-20e.py', '../_base_/default_runtime.py'
]
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix = dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP')
model = dict(
data_preprocessor=dict(
voxel_layer=dict(point_cloud_range=point_cloud_range)),
pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),
# model training and testing settings
train_cfg=dict(pts=dict(point_cloud_range=point_cloud_range)),
test_cfg=dict(pts=dict(pc_range=point_cloud_range[:2])))
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
backend_args = None
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
car=5,
truck=5,
bus=5,
trailer=5,
construction_vehicle=5,
traffic_cone=5,
barrier=5,
motorcycle=5,
bicycle=5,
pedestrian=5)),
classes=class_names,
sample_groups=dict(
car=2,
truck=3,
construction_vehicle=7,
bus=4,
trailer=6,
barrier=2,
motorcycle=6,
bicycle=6,
pedestrian=2,
traffic_cone=2),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectNameFilter', classes=class_names),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
_delete_=True,
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='CBGSDataset',
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
metainfo=dict(classes=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args)))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
train_cfg = dict(val_interval=20)
_base_ = ['./centerpoint_voxel01_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(test_cfg=dict(pts=dict(nms_type='circle')))
_base_ = ['./centerpoint_voxel01_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(
pts_bbox_head=dict(
separate_head=dict(
type='DCNSeparateHead',
dcn_config=dict(
type='DCN',
in_channels=64,
out_channels=64,
kernel_size=3,
padding=1,
groups=4),
init_bias=-2.19,
final_kernel=3)),
test_cfg=dict(pts=dict(nms_type='circle')))
_base_ = ['./centerpoint_voxel01_second_secfpn_8xb4-cyclic-20e_nus-3d.py']
model = dict(
pts_bbox_head=dict(
separate_head=dict(
type='DCNSeparateHead',
dcn_config=dict(
type='DCN',
in_channels=64,
out_channels=64,
kernel_size=3,
padding=1,
groups=4),
init_bias=-2.19,
final_kernel=3)))
Collections:
- Name: CenterPoint
Metadata:
Training Data: nuScenes
Training Techniques:
- AdamW
Training Resources: 8x V100 GPUs
Architecture:
- Hard Voxelization
Paper:
URL: https://arxiv.org/abs/2006.11275
Title: 'Center-based 3D Object Detection and Tracking'
README: configs/centerpoint/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/centerpoint.py#L10
Version: v0.6.0
Models:
- Name: centerpoint_voxel01_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_voxel01_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d.py
metadata:
Training Memory (GB): 5.2
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 56.11
NDS: 64.61
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20220810_030004-9061688e.pth
- Name: centerpoint_voxel01_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_voxel01_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
Metadata:
Training Memory (GB): 5.5
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 56.10
NDS: 64.69
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_01voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20220810_052355-a6928835.pth
- Name: centerpoint_voxel0075_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_voxel0075_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d.py
Metadata:
Training Memory (GB): 8.2
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 56.54
NDS: 65.17
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_circlenms_4x8_cyclic_20e_nus_20220810_011659-04cb3a3b.pth
- Name: centerpoint_voxel0075_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_voxel0075_second_secfpn_head-dcn-circlenms_8xb4-cyclic-20e_nus-3d.py
Metadata:
Training Memory (GB): 8.7
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 56.92
NDS: 65.27
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus/centerpoint_0075voxel_second_secfpn_dcn_circlenms_4x8_cyclic_20e_nus_20220810_025930-657f67e0.pth
- Name: centerpoint_pillar02_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_pillar02_second_secfpn_head-circlenms_8xb4-cyclic-20e_nus-3d.py
Metadata:
Training Memory (GB): 4.6
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 48.70
NDS: 59.62
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_circlenms_4x8_cyclic_20e_nus_20220811_031844-191a3822.pth
- Name: centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d
In Collection: CenterPoint
Config: configs/centerpoint/centerpoint_pillar02_second_secfpn_head-dcn_8xb4-cyclic-20e_nus-3d.py
Metadata:
Training Memory (GB): 4.9
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 48.38
NDS: 59.79
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/centerpoint/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus/centerpoint_02pillar_second_secfpn_dcn_4x8_cyclic_20e_nus_20220811_045458-808e69ad.pth
# Cylindrical and Asymmetrical 3D Convolution Networks for LiDAR Segmentation
> [Cylindrical and Asymmetrical 3D Convolution Networks for LiDAR Segmentation](https://arxiv.org/abs/2011.10033)
<!-- [ALGORITHM] -->
## Abstract
State-of-the-art methods for large-scale driving-scene LiDAR segmentation often project the point clouds to 2D space and then process them via 2D convolution. Although this corporation shows the competitiveness in the point cloud, it inevitably alters and abandons the 3D topology and geometric relations. A natural remedy is to utilize the3D voxelization and 3D convolution network. However, we found that in the outdoor point cloud, the improvement obtained in this way is quite limited. An important reason is the property of the outdoor point cloud, namely sparsity and varying density. Motivated by this investigation, we propose a new framework for the outdoor LiDAR segmentation, where cylindrical partition and asymmetrical 3D convolution networks are designed to explore the 3D geometric pat-tern while maintaining these inherent properties. Moreover, a point-wise refinement module is introduced to alleviate the interference of lossy voxel-based label encoding. We evaluate the proposed model on two large-scale datasets, i.e., SemanticKITTI and nuScenes. Our method achieves the 1st place in the leaderboard of SemanticKITTI and outperforms existing methods on nuScenes with a noticeable margin, about 4%. Furthermore, the proposed 3D framework also generalizes well to LiDAR panoptic segmentation and LiDAR 3D detection.
![overview](https://user-images.githubusercontent.com/45515569/228523861-2923082c-37d9-4d4f-aa59-746a8d9284c2.png)
## Introduction
We implement Cylinder3D and provide the result and checkpoints on Semantickitti datasets.
## Results and models
### SemanticKITTI
| Method | Lr schd | Laser-Polar Mix | Mem (GB) | mIoU | Download |
| :-----------------------------------------------------------------: | :-----: | :-------------: | :------: | :------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [Cylinder3D](./cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py) | 3x | ✗ | 10.2 | 63.1±0.5 | [model](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_4xb4_3x_semantickitti/cylinder3d_4xb4_3x_semantickitti_20230318_191107-822a8c31.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_4xb4_3x_semantickitti/cylinder3d_4xb4_3x_semantickitti_20230318_191107.json) |
| [Cylinder3D](./cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py) | 3x | ✔ | 12.8 | 67.0 | [model](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_8xb2-amp-laser-polar-mix-3x_semantickitti_20230425_144950-372cdf69.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_8xb2-amp-laser-polar-mix-3x_semantickitti_20230425_144950.log) |
Note: We reproduce the performance comparable with its [official repo](https://github.com/xinge008/Cylinder3D). It's slightly lower than the performance (65.9 mIOU) reported in the paper due to the lack of point-wise refinement and shorter training time.
## Citation
```latex
@inproceedings{zhu2021cylindrical,
title={Cylindrical and asymmetrical 3d convolution networks for lidar segmentation},
author={Zhu, Xinge and Zhou, Hui and Wang, Tai and Hong, Fangzhou and Ma, Yuexin and Li, Wei and Li, Hongsheng and Lin, Dahua},
booktitle={Proceedings of the IEEE/CVF conference on computer vision and pattern recognition},
pages={9939--9948},
year={2021}
}
```
_base_ = [
'../_base_/datasets/semantickitti.py', '../_base_/models/cylinder3d.py',
'../_base_/default_runtime.py'
]
# optimizer
lr = 0.001
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.01))
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=36, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# learning rate
param_scheduler = [
dict(
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0,
end=1000),
dict(
type='MultiStepLR',
begin=0,
end=36,
by_epoch=True,
milestones=[30],
gamma=0.1)
]
train_dataloader = dict(batch_size=4, )
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
# auto_scale_lr = dict(enable=False, base_batch_size=32)
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=5))
_base_ = [
'../_base_/datasets/semantickitti.py', '../_base_/models/cylinder3d.py',
'../_base_/schedules/schedule-3x.py', '../_base_/default_runtime.py'
]
train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type='PointSegClassMapping'),
dict(
type='RandomChoice',
transforms=[
[
dict(
type='LaserMix',
num_areas=[3, 4, 5, 6],
pitch_angles=[-25, 3],
pre_transform=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type='PointSegClassMapping')
],
prob=1)
],
[
dict(
type='PolarMix',
instance_classes=[0, 1, 2, 3, 4, 5, 6, 7],
swap_ratio=0.5,
rotate_paste_ratio=1.0,
pre_transform=[
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type='LoadAnnotations3D',
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type='PointSegClassMapping')
],
prob=1)
],
],
prob=[0.5, 0.5]),
dict(
type='GlobalRotScaleTrans',
rot_range=[0., 6.28318531],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0],
),
dict(type='Pack3DDetInputs', keys=['points', 'pts_semantic_mask'])
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
default_hooks = dict(checkpoint=dict(type='CheckpointHook', interval=1))
Collections:
- Name: Cylinder3D
Metadata:
Training Techniques:
- AdamW
Training Resources: 4x A100 GPUs
Architecture:
- Cylinder3D
Paper:
URL: https://arxiv.org/abs/2011.10033
Title: 'Cylindrical and Asymmetrical 3D Convolution Networks for LiDAR Segmentation'
README: configs/cylinder3d/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/mmdet3d/models/segmentors/cylinder3d.py#L13
Version: v1.1.0
Models:
- Name: cylinder3d_4xb4-3x_semantickitti
In Collection: Cylinder3D
Config: configs/cylinder3d/cylinder3d_4xb4_3x_semantickitti.py
Metadata:
Training Data: SemanticKITTI
Training Memory (GB): 10.2
Results:
- Task: 3D Semantic Segmentation
Dataset: SemanticKITTI
Metrics:
mIoU: 63.1
Weights: https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_4xb4_3x_semantickitti/cylinder3d_4xb4_3x_semantickitti_20230318_191107-822a8c31.pth
- Name: cylinder3d_8xb2-laser-polar-mix-3x_semantickitti
In Collection: Cylinder3D
Config: configs/cylinder3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
Metadata:
Training Data: SemanticKITTI
Training Memory (GB): 12.8
Results:
- Task: 3D Semantic Segmentation
Dataset: SemanticKITTI
Metrics:
mIoU: 67.0
Weights: https://download.openmmlab.com/mmdetection3d/v1.1.0_models/cylinder3d/cylinder3d_4xb4_3x_semantickitti/cylinder3d_4xb4_3x_semantickitti_20230318_191107-822a8c31.pth
# Dynamic Graph CNN for Learning on Point Clouds
> [Dynamic Graph CNN for Learning on Point Clouds](https://arxiv.org/abs/1801.07829)
<!-- [ALGORITHM] -->
## Abstract
Point clouds provide a flexible geometric representation suitable for countless applications in computer graphics; they also comprise the raw output of most 3D data acquisition devices. While hand-designed features on point clouds have long been proposed in graphics and vision, however, the recent overwhelming success of convolutional neural networks (CNNs) for image analysis suggests the value of adapting insight from CNN to the point cloud world. Point clouds inherently lack topological information so designing a model to recover topology can enrich the representation power of point clouds. To this end, we propose a new neural network module dubbed EdgeConv suitable for CNN-based high-level tasks on point clouds including classification and segmentation. EdgeConv acts on graphs dynamically computed in each layer of the network. It is differentiable and can be plugged into existing architectures. Compared to existing modules operating in extrinsic space or treating each point independently, EdgeConv has several appealing properties: It incorporates local neighborhood information; it can be stacked applied to learn global shape properties; and in multi-layer systems affinity in feature space captures semantic characteristics over potentially long distances in the original embedding. We show the performance of our model on standard benchmarks including ModelNet40, ShapeNetPart, and S3DIS.
<div align=center>
<img src="https://user-images.githubusercontent.com/30491025/143855852-3d7888ed-2cfc-416c-9ec8-57621edeaa34.png" width="800"/>
</div>
## Introduction
We implement DGCNN and provide the results and checkpoints on S3DIS dataset.
**Notice**: We follow the implementations in the original DGCNN paper and a PyTorch implementation of DGCNN [code](https://github.com/AnTao97/dgcnn.pytorch).
## Results and models
### S3DIS
| Method | Split | Lr schd | Mem (GB) | Inf time (fps) | mIoU (Val set) | Download |
| :--------------------------------------------------------: | :----: | :---------: | :------: | :------------: | :------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area1.py) | Area_1 | cosine 100e | 13.1 | | 68.33 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area1/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_000734-39658f14.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area1/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_000734.log.json) |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area2.py) | Area_2 | cosine 100e | 13.1 | | 40.68 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area2/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_144648-aea9ecb6.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area2/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210731_144648.log.json) |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area3.py) | Area_3 | cosine 100e | 13.1 | | 69.38 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area3/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210801_154629-2ff50ee0.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area3/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210801_154629.log.json) |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area4.py) | Area_4 | cosine 100e | 13.1 | | 50.07 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area4/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_073551-dffab9cd.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area4/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_073551.log.json) |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py) | Area_5 | cosine 100e | 13.1 | | 50.59 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area5/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210730_235824-f277e0c5.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area5/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210730_235824.log.json) |
| [DGCNN](./dgcnn_4xb32-cosine-100e_s3dis-seg_test-area6.py) | Area_6 | cosine 100e | 13.1 | | 77.94 | [model](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area6/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_154317-e3511b32.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.17.0_models/dgcnn/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class/area6/dgcnn_32x4_cosine_100e_s3dis_seg-3d-13class_20210802_154317.log.json) |
| DGCNN | 6-fold | | | | 59.43 | |
**Notes:**
- We use XYZ+Color+Normalized_XYZ as input in all the experiments on S3DIS datasets.
- `Area_5` Split means training the model on Area_1, 2, 3, 4, 6 and testing on Area_5.
- `6-fold` Split means the overall result of 6 different splits (Area_1, Area_2, Area_3, Area_4, Area_5 and Area_6 Splits).
- Users need to modify `train_area` and `test_area` in the S3DIS dataset's [config](./configs/_base_/datasets/s3dis_seg-3d-13class.py) to set the training and testing areas, respectively.
## Indeterminism
Since DGCNN testing adopts sliding patch inference which involves random point sampling, and the test script uses fixed random seeds while the random seeds of validation in training are not fixed, the test results may be slightly different from the results reported above.
## Citation
```latex
@article{dgcnn,
title={Dynamic Graph CNN for Learning on Point Clouds},
author={Wang, Yue and Sun, Yongbin and Liu, Ziwei and Sarma, Sanjay E. and Bronstein, Michael M. and Solomon, Justin M.},
journal={ACM Transactions on Graphics (TOG)},
year={2019}
}
```
_base_ = './dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py'
# data settings
train_area = [2, 3, 4, 5, 6]
test_area = 1
train_dataloader = dict(
batch_size=32,
dataset=dict(
ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
scene_idxs=[
f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
]))
test_dataloader = dict(
dataset=dict(
ann_files=f's3dis_infos_Area_{test_area}.pkl',
scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'))
val_dataloader = test_dataloader
_base_ = './dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py'
# data settings
train_area = [1, 3, 4, 5, 6]
test_area = 2
train_dataloader = dict(
batch_size=32,
dataset=dict(
ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
scene_idxs=[
f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
]))
test_dataloader = dict(
dataset=dict(
ann_files=f's3dis_infos_Area_{test_area}.pkl',
scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'))
val_dataloader = test_dataloader
_base_ = './dgcnn_4xb32-cosine-100e_s3dis-seg_test-area5.py'
# data settings
train_area = [1, 2, 4, 5, 6]
test_area = 3
train_dataloader = dict(
batch_size=32,
dataset=dict(
ann_files=[f's3dis_infos_Area_{i}.pkl' for i in train_area],
scene_idxs=[
f'seg_info/Area_{i}_resampled_scene_idxs.npy' for i in train_area
]))
test_dataloader = dict(
dataset=dict(
ann_files=f's3dis_infos_Area_{test_area}.pkl',
scene_idxs=f'seg_info/Area_{test_area}_resampled_scene_idxs.npy'))
val_dataloader = test_dataloader
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment