Commit eb1107e4 authored by raojy's avatar raojy
Browse files

fix_mmdetection

parent 7aa442d5
Pipeline #3461 canceled with stages
_base_ = './ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py'
# model settings
model = dict(
type='MVXFasterRCNN',
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_400mf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
# dataset settings
train_dataloader = dict(batch_size=1, num_workers=2)
_base_ = './ssn_hv_secfpn_sbn-all_16xb2-2x_nus-3d.py'
# model settings
model = dict(
type='MVXFasterRCNN',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_400mf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
_base_ = [
'../_base_/models/pointpillars_hv_fpn_lyft.py',
'../_base_/datasets/lyft-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
point_cloud_range = [-100, -100, -5, 100, 100, 3]
# Note that the order of class names should be consistent with
# the following anchors' order
class_names = [
'bicycle', 'motorcycle', 'pedestrian', 'animal', 'car',
'emergency_vehicle', 'bus', 'other_vehicle', 'truck'
]
backend_args = None
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# model settings
model = dict(
data_preprocessor=dict(
voxel_layer=dict(point_cloud_range=[-100, -100, -5, 100, 100, 3])),
pts_voxel_encoder=dict(
feat_channels=[32, 64],
point_cloud_range=[-100, -100, -5, 100, 100, 3]),
pts_middle_encoder=dict(output_shape=[800, 800]),
pts_neck=dict(
_delete_=True,
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
_delete_=True,
type='ShapeAwareHead',
num_classes=9,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGeneratorPerCls',
ranges=[[-100, -100, -1.0709302, 100, 100, -1.0709302],
[-100, -100, -1.3220503, 100, 100, -1.3220503],
[-100, -100, -0.9122268, 100, 100, -0.9122268],
[-100, -100, -1.8012227, 100, 100, -1.8012227],
[-100, -100, -1.0715024, 100, 100, -1.0715024],
[-100, -100, -0.8871424, 100, 100, -0.8871424],
[-100, -100, -0.3519405, 100, 100, -0.3519405],
[-100, -100, -0.6276341, 100, 100, -0.6276341],
[-100, -100, -0.3033737, 100, 100, -0.3033737]],
sizes=[
[1.76, 0.63, 1.44], # bicycle
[2.35, 0.96, 1.59], # motorcycle
[0.80, 0.76, 1.76], # pedestrian
[0.73, 0.35, 0.50], # animal
[4.75, 1.92, 1.71], # car
[6.52, 2.42, 2.34], # emergency vehicle
[12.70, 2.92, 3.42], # bus
[8.17, 2.75, 3.20], # other vehicle
[10.24, 2.84, 3.44] # truck
],
custom_values=[],
rotations=[0, 1.57],
reshape_out=False),
tasks=[
dict(
num_class=2,
class_names=['bicycle', 'motorcycle'],
shared_conv_channels=(64, 64),
shared_conv_strides=(1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=2,
class_names=['pedestrian', 'animal'],
shared_conv_channels=(64, 64),
shared_conv_strides=(1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=2,
class_names=['car', 'emergency_vehicle'],
shared_conv_channels=(64, 64, 64),
shared_conv_strides=(2, 1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=3,
class_names=['bus', 'other_vehicle', 'truck'],
shared_conv_channels=(64, 64, 64),
shared_conv_strides=(2, 1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01))
],
assign_per_class=True,
diff_rad_by_sin=True,
dir_offset=-0.7854, # -pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=[
dict( # bicycle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # motorcycle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # animal
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # emergency vehicle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # bus
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # other vehicle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # truck
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1)
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (16 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
_base_ = [
'../_base_/models/pointpillars_hv_fpn_nus.py',
'../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# Note that the order of class names should be consistent with
# the following anchors' order
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier', 'car',
'truck', 'trailer', 'bus', 'construction_vehicle'
]
backend_args = None
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
train_dataloader = dict(
batch_size=2,
num_workers=4,
dataset=dict(pipeline=train_pipeline, metainfo=dict(classes=class_names)))
test_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
val_dataloader = dict(
dataset=dict(pipeline=test_pipeline, metainfo=dict(classes=class_names)))
# model settings
model = dict(
data_preprocessor=dict(voxel_layer=dict(max_num_points=20)),
pts_voxel_encoder=dict(feat_channels=[64, 64]),
pts_neck=dict(
_delete_=True,
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
_delete_=True,
type='ShapeAwareHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='AlignedAnchor3DRangeGeneratorPerCls',
ranges=[[-50, -50, -1.67339111, 50, 50, -1.67339111],
[-50, -50, -1.71396371, 50, 50, -1.71396371],
[-50, -50, -1.61785072, 50, 50, -1.61785072],
[-50, -50, -1.80984986, 50, 50, -1.80984986],
[-50, -50, -1.76396500, 50, 50, -1.76396500],
[-50, -50, -1.80032795, 50, 50, -1.80032795],
[-50, -50, -1.74440365, 50, 50, -1.74440365],
[-50, -50, -1.68526504, 50, 50, -1.68526504],
[-50, -50, -1.80673031, 50, 50, -1.80673031],
[-50, -50, -1.64824291, 50, 50, -1.64824291]],
sizes=[
[1.68452161, 0.60058911, 1.27192197], # bicycle
[2.09973778, 0.76279481, 1.44403034], # motorcycle
[0.72564370, 0.66344886, 1.75748069], # pedestrian
[0.40359262, 0.39694519, 1.06232151], # traffic cone
[0.48578221, 2.49008838, 0.98297065], # barrier
[4.60718145, 1.95017717, 1.72270761], # car
[6.73778078, 2.45609390, 2.73004906], # truck
[12.01320693, 2.87427237, 3.81509561], # trailer
[11.1885991, 2.94046906, 3.47030982], # bus
[6.38352896, 2.73050468, 3.13312415] # construction vehicle
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=False),
tasks=[
dict(
num_class=2,
class_names=['bicycle', 'motorcycle'],
shared_conv_channels=(64, 64),
shared_conv_strides=(1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=1,
class_names=['pedestrian'],
shared_conv_channels=(64, 64),
shared_conv_strides=(1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=2,
class_names=['traffic_cone', 'barrier'],
shared_conv_channels=(64, 64),
shared_conv_strides=(1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=1,
class_names=['car'],
shared_conv_channels=(64, 64, 64),
shared_conv_strides=(2, 1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
dict(
num_class=4,
class_names=[
'truck', 'trailer', 'bus', 'construction_vehicle'
],
shared_conv_channels=(64, 64, 64),
shared_conv_strides=(2, 1, 1),
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01))
],
assign_per_class=True,
diff_rad_by_sin=True,
dir_offset=-0.7854, # -pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=[
dict( # bicycle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # motorcycle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
dict( # pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # traffic cone
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # barrier
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # truck
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # trailer
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # bus
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
dict( # construction vehicle
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1)
],
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False)))
# Deep Hough Voting for 3D Object Detection in Point Clouds
> [Deep Hough Voting for 3D Object Detection in Point Clouds](https://arxiv.org/abs/1904.09664)
<!-- [ALGORITHM] -->
## Abstract
Current 3D object detection methods are heavily influenced by 2D detectors. In order to leverage architectures in 2D detectors, they often convert 3D point clouds to regular grids (i.e., to voxel grids or to bird's eye view images), or rely on detection in 2D images to propose 3D boxes. Few works have attempted to directly detect objects in point clouds. In this work, we return to first principles to construct a 3D detection pipeline for point cloud data and as generic as possible. However, due to the sparse nature of the data -- samples from 2D manifolds in 3D space -- we face a major challenge when directly predicting bounding box parameters from scene points: a 3D object centroid can be far from any surface point thus hard to regress accurately in one step. To address the challenge, we propose VoteNet, an end-to-end 3D object detection network based on a synergy of deep point set networks and Hough voting. Our model achieves state-of-the-art 3D detection on two large datasets of real 3D scans, ScanNet and SUN RGB-D with a simple design, compact model size and high efficiency. Remarkably, VoteNet outperforms previous methods by using purely geometric information without relying on color images.
<div align=center>
<img src="https://user-images.githubusercontent.com/79644370/143888295-af7435b4-9f75-4669-b5f8-a19ae24a051c.png" width="800"/>
</div>
## Introduction
We implement VoteNet and provide the result and checkpoints on ScanNet and SUNRGBD datasets.
## Results and models
### ScanNet
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 | AP@0.5 | Download |
| :----------------------------------------: | :-----: | :------: | :------------: | :-----: | :----: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [PointNet++](./votenet_8xb8_scannet-3d.py) | 3x | 4.1 | | 62.34 | 40.82 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20210823_234503-cf8134fa.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20210823_234503.log.json) |
### SUNRGBD
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 | AP@0.5 | Download |
| :-----------------------------------------: | :-----: | :------: | :------------: | :-----: | :----: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [PointNet++](./votenet_8xb16_sunrgbd-3d.py) | 3x | 8.1 | | 59.78 | 35.77 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20210820_162823-bf11f014.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20210820_162823.log.json) |
**Notice**: If your current mmdetection3d version >= 0.6.0, and you are using the checkpoints downloaded from the above links or using checkpoints trained with mmdetection3d version \< 0.6.0, the checkpoints have to be first converted via [tools/model_converters/convert_votenet_checkpoints.py](../../tools/model_converters/convert_votenet_checkpoints.py):
```
python ./tools/model_converters/convert_votenet_checkpoints.py ${ORIGINAL_CHECKPOINT_PATH} --out=${NEW_CHECKPOINT_PATH}
```
Then you can use the converted checkpoints following [get_started.md](../../docs/en/get_started.md).
## Indeterminism
Since test data preparation randomly downsamples the points, and the test script uses fixed random seeds while the random seeds of validation in training are not fixed, the test results may be slightly different from the results reported above.
## IoU loss
Adding IoU loss (simply = 1-IoU) boosts VoteNet's performance. To use IoU loss, add this loss term to the config file:
```python
iou_loss=dict(type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 / 3.0)
```
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | AP@0.25 | AP@0.5 | Download |
| :-----------------------------------------------------: | :-----: | :------: | :------------: | :-----: | :----: | :------: |
| [PointNet++](./votenet_head-iouloss_8xb8_scannet-3d.py) | 3x | 4.1 | | 63.81 | 44.21 | / |
For now, we only support calculating IoU loss for axis-aligned bounding boxes since the CUDA op of general 3D IoU calculation does not implement the backward method. Therefore, IoU loss can only be used for ScanNet dataset for now.
## Citation
```latex
@inproceedings{qi2019deep,
author = {Qi, Charles R and Litany, Or and He, Kaiming and Guibas, Leonidas J},
title = {Deep Hough Voting for 3D Object Detection in Point Clouds},
booktitle = {Proceedings of the IEEE International Conference on Computer Vision},
year = {2019}
}
```
Collections:
- Name: VoteNet
Metadata:
Training Techniques:
- AdamW
Training Resources: 8x V100 GPUs
Architecture:
- PointNet++
Paper:
URL: https://arxiv.org/abs/1904.09664
Title: 'Deep Hough Voting for 3D Object Detection in Point Clouds'
README: configs/votenet/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/models/detectors/votenet.py#L10
Version: v0.5.0
Models:
- Name: votenet_8xb16_sunrgbd-3d.py
In Collection: VoteNet
Config: configs/votenet/votenet_8xb16_sunrgbd-3d.py
Metadata:
Training Data: SUNRGBD
Training Memory (GB): 8.1
Results:
- Task: 3D Object Detection
Dataset: SUNRGBD
Metrics:
AP@0.25: 59.78
AP@0.5: 35.77
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_16x8_sunrgbd-3d-10class/votenet_16x8_sunrgbd-3d-10class_20210820_162823-bf11f014.pth
- Name: votenet_8xb8_scannet-3d.py
In Collection: VoteNet
Config: configs/votenet/votenet_8xb8_scannet-3d.py
Metadata:
Training Data: ScanNet
Training Memory (GB): 4.1
Results:
- Task: 3D Object Detection
Dataset: ScanNet
Metrics:
AP@0.25: 62.34
AP@0.5: 40.82
Weights: https://download.openmmlab.com/mmdetection3d/v1.0.0_models/votenet/votenet_8x8_scannet-3d-18class/votenet_8x8_scannet-3d-18class_20210823_234503-cf8134fa.pth
- Name: votenet_iouloss_8x8_scannet-3d-18class
In Collection: VoteNet
Config: configs/votenet/votenet_head-iouloss_8xb8_scannet-3d.py
Metadata:
Training Data: ScanNet
Training Memory (GB): 4.1
Architecture:
- IoU Loss
Results:
- Task: 3D Object Detection
Dataset: ScanNet
Metrics:
AP@0.25: 63.81
AP@0.5: 44.21
# TODO refactor the config of sunrgbd
_base_ = [
'../_base_/datasets/sunrgbd-3d.py', '../_base_/models/votenet.py',
'../_base_/schedules/schedule-3x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
bbox_head=dict(
num_classes=10,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=10,
num_dir_bins=12,
with_rot=True,
mean_sizes=[
[2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182],
[0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272],
[0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364],
[0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424],
[0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728]
]),
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (16 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=128)
_base_ = [
'../_base_/datasets/scannet-3d.py', '../_base_/models/votenet.py',
'../_base_/schedules/schedule-3x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
bbox_head=dict(
num_classes=18,
bbox_coder=dict(
type='PartialBinBasedBBoxCoder',
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]])))
default_hooks = dict(logger=dict(type='LoggerHook', interval=30))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=64)
_base_ = ['./votenet_8xb8_scannet-3d.py']
# model settings, add iou loss
model = dict(
bbox_head=dict(
iou_loss=dict(
type='AxisAlignedIoULoss', reduction='sum', loss_weight=10.0 /
3.0)))
kitti:
# The name of dataset in OpenDataLab referring to
# https://opendatalab.com/KITTI_Object/cli. You can also download it
# by running `odl get ${dataset}` independently
dataset: KITTI_Object
download_root: data
data_root: data/kitti
# Scripts for unzipping datasets
script: tools/dataset_converters/kitti_unzip.sh
nuscenes:
# The name of dataset in OpenDataLab referring to
# https://opendatalab.com/nuScenes/cli. You can also download it
# by running `odl get ${dataset}` independently
dataset: nuScenes
download_root: data
data_root: data/nuscenes
# Scripts for unzipping datasets
script: tools/dataset_converters/nuscenes_unzip.sh
semantickitti:
# The name of dataset in OpenDataLab referring to
# https://opendatalab.com/SemanticKITTI/cli. You can also download it
# by running `odl get ${dataset}` independently
dataset: SemanticKITTI
download_root: data
data_root: data/semantickitti
# Scripts for unzipping datasets
script: tools/dataset_converters/semantickitti_unzip.sh
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
from argparse import ArgumentParser
from mmengine.logging import print_log
from mmdet3d.apis import MonoDet3DInferencer
def parse_args():
parser = ArgumentParser()
parser.add_argument('img', help='Image file')
parser.add_argument('infos', help='Infos file with annotations')
parser.add_argument('model', help='Config file')
parser.add_argument('weights', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--cam-type',
type=str,
default='CAM_BACK',
help='choose camera type to inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of prediction and visualization results.')
parser.add_argument(
'--show',
action='store_true',
help='Show online visualization results')
parser.add_argument(
'--wait-time',
type=float,
default=-1,
help='The interval of show (s). Demo will be blocked in showing'
'results, if wait_time is -1. Defaults to -1.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection visualization results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection prediction results')
parser.add_argument(
'--print-result',
action='store_true',
help='Whether to print the results.')
call_args = vars(parser.parse_args())
call_args['inputs'] = dict(
img=call_args.pop('img'), infos=call_args.pop('infos'))
call_args.pop('cam_type')
if call_args['no_save_vis'] and call_args['no_save_pred']:
call_args['out_dir'] = ''
init_kws = ['model', 'weights', 'device']
init_args = {}
for init_kw in init_kws:
init_args[init_kw] = call_args.pop(init_kw)
# NOTE: If your operating environment does not have a display device,
# (e.g. a remote server), you can save the predictions and visualize
# them in local devices.
if os.environ.get('DISPLAY') is None and call_args['show']:
print_log(
'Display device not found. `--show` is forced to False',
logger='current',
level=logging.WARNING)
call_args['show'] = False
return init_args, call_args
def main():
# TODO: Support inference of point cloud numpy file.
init_args, call_args = parse_args()
inferencer = MonoDet3DInferencer(**init_args)
inferencer(**call_args)
if call_args['out_dir'] != '' and not (call_args['no_save_vis']
and call_args['no_save_pred']):
print_log(
f'results have been saved at {call_args["out_dir"]}',
logger='current')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
from argparse import ArgumentParser
from mmengine.logging import print_log
from mmdet3d.apis import MultiModalityDet3DInferencer
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('img', help='Image file')
parser.add_argument('infos', help='Infos file with annotations')
parser.add_argument('model', help='Config file')
parser.add_argument('weights', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--cam-type',
type=str,
default='CAM_FRONT',
help='choose camera type to inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of prediction and visualization results.')
parser.add_argument(
'--show',
action='store_true',
help='Show online visualization results')
parser.add_argument(
'--wait-time',
type=float,
default=-1,
help='The interval of show (s). Demo will be blocked in showing'
'results, if wait_time is -1. Defaults to -1.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection visualization results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection prediction results')
parser.add_argument(
'--print-result',
action='store_true',
help='Whether to print the results.')
call_args = vars(parser.parse_args())
call_args['inputs'] = dict(
points=call_args.pop('pcd'),
img=call_args.pop('img'),
infos=call_args.pop('infos'))
call_args.pop('cam_type')
if call_args['no_save_vis'] and call_args['no_save_pred']:
call_args['out_dir'] = ''
init_kws = ['model', 'weights', 'device']
init_args = {}
for init_kw in init_kws:
init_args[init_kw] = call_args.pop(init_kw)
# NOTE: If your operating environment does not have a display device,
# (e.g. a remote server), you can save the predictions and visualize
# them in local devices.
if os.environ.get('DISPLAY') is None and call_args['show']:
print_log(
'Display device not found. `--show` is forced to False',
logger='current',
level=logging.WARNING)
call_args['show'] = False
return init_args, call_args
def main():
# TODO: Support inference of point cloud numpy file.
init_args, call_args = parse_args()
inferencer = MultiModalityDet3DInferencer(**init_args)
inferencer(**call_args)
if call_args['out_dir'] != '' and not (call_args['no_save_vis']
and call_args['no_save_pred']):
print_log(
f'results have been saved at {call_args["out_dir"]}',
logger='current')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
from argparse import ArgumentParser
from mmengine.logging import print_log
from mmdet3d.apis import LidarDet3DInferencer
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('model', help='Config file')
parser.add_argument('weights', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--pred-score-thr',
type=float,
default=0.3,
help='bbox score threshold')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of prediction and visualization results.')
parser.add_argument(
'--show',
action='store_true',
help='Show online visualization results')
parser.add_argument(
'--wait-time',
type=float,
default=-1,
help='The interval of show (s). Demo will be blocked in showing'
'results, if wait_time is -1. Defaults to -1.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection visualization results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection prediction results')
parser.add_argument(
'--print-result',
action='store_true',
help='Whether to print the results.')
call_args = vars(parser.parse_args())
call_args['inputs'] = dict(points=call_args.pop('pcd'))
if call_args['no_save_vis'] and call_args['no_save_pred']:
call_args['out_dir'] = ''
init_kws = ['model', 'weights', 'device']
init_args = {}
for init_kw in init_kws:
init_args[init_kw] = call_args.pop(init_kw)
# NOTE: If your operating environment does not have a display device,
# (e.g. a remote server), you can save the predictions and visualize
# them in local devices.
if os.environ.get('DISPLAY') is None and call_args['show']:
print_log(
'Display device not found. `--show` is forced to False',
logger='current',
level=logging.WARNING)
call_args['show'] = False
return init_args, call_args
def main():
# TODO: Support inference of point cloud numpy file.
init_args, call_args = parse_args()
inferencer = LidarDet3DInferencer(**init_args)
inferencer(**call_args)
if call_args['out_dir'] != '' and not (call_args['no_save_vis']
and call_args['no_save_pred']):
print_log(
f'results have been saved at {call_args["out_dir"]}',
logger='current')
if __name__ == '__main__':
main()
# Copyright (c) OpenMMLab. All rights reserved.
import logging
import os
from argparse import ArgumentParser
from mmengine.logging import print_log
from mmdet3d.apis import LidarSeg3DInferencer
def parse_args():
parser = ArgumentParser()
parser.add_argument('pcd', help='Point cloud file')
parser.add_argument('model', help='Config file')
parser.add_argument('weights', help='Checkpoint file')
parser.add_argument(
'--device', default='cuda:0', help='Device used for inference')
parser.add_argument(
'--out-dir',
type=str,
default='outputs',
help='Output directory of prediction and visualization results.')
parser.add_argument(
'--show',
action='store_true',
help='Show online visualization results')
parser.add_argument(
'--wait-time',
type=float,
default=-1,
help='The interval of show (s). Demo will be blocked in showing'
'results, if wait_time is -1. Defaults to -1.')
parser.add_argument(
'--no-save-vis',
action='store_true',
help='Do not save detection visualization results')
parser.add_argument(
'--no-save-pred',
action='store_true',
help='Do not save detection prediction results')
parser.add_argument(
'--print-result',
action='store_true',
help='Whether to print the results.')
call_args = vars(parser.parse_args())
call_args['inputs'] = dict(points=call_args.pop('pcd'))
if call_args['no_save_vis'] and call_args['no_save_pred']:
call_args['out_dir'] = ''
init_kws = ['model', 'weights', 'device']
init_args = {}
for init_kw in init_kws:
init_args[init_kw] = call_args.pop(init_kw)
# NOTE: If your operating environment does not have a display device,
# (e.g. a remote server), you can save the predictions and visualize
# them in local devices.
if os.environ.get('DISPLAY') is None and call_args['show']:
print_log(
'Display device not found. `--show` is forced to False',
logger='current',
level=logging.WARNING)
call_args['show'] = False
return init_args, call_args
def main():
# TODO: Support inference of point cloud numpy file.
init_args, call_args = parse_args()
inferencer = LidarSeg3DInferencer(**init_args)
inferencer(**call_args)
if call_args['out_dir'] != '' and not (call_args['no_save_vis']
and call_args['no_save_pred']):
print_log(
f'results have been saved at {call_args["out_dir"]}',
logger='current')
if __name__ == '__main__':
main()
ARG PYTORCH="1.9.0"
ARG CUDA="11.1"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ENV TORCH_CUDA_ARCH_LIST="6.0 6.1 7.0 7.5 8.0 8.6+PTX" \
TORCH_NVCC_FLAGS="-Xfatbin -compress-all" \
CMAKE_PREFIX_PATH="$(dirname $(which conda))/../" \
FORCE_CUDA="1"
# Avoid Public GPG key error
# https://github.com/NVIDIA/nvidia-docker/issues/1631
RUN rm /etc/apt/sources.list.d/cuda.list \
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
&& apt-key del 7fa2af80 \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
# (Optional, use Mirror to speed up downloads)
# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && \
# pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
# Install the required packages
RUN apt-get update \
&& apt-get install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install MMEngine, MMCV and MMDetection
RUN pip install openmim && \
mim install "mmengine" "mmcv>=2.0.0rc4" "mmdet>=3.0.0"
# Install MMDetection3D
RUN conda clean --all \
&& git clone https://github.com/open-mmlab/mmdetection3d.git -b dev-1.x /mmdetection3d \
&& cd /mmdetection3d \
&& pip install --no-cache-dir -e .
WORKDIR /mmdetection3d
ARG PYTORCH="1.9.0"
ARG CUDA="11.1"
ARG CUDNN="8"
FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
ARG MMCV="2.0.0rc4"
ARG MMDET="3.3.0"
ARG MMDET3D="1.4.0"
ENV PYTHONUNBUFFERED TRUE
# Avoid Public GPG key error
# https://github.com/NVIDIA/nvidia-docker/issues/1631
RUN rm /etc/apt/sources.list.d/cuda.list \
&& rm /etc/apt/sources.list.d/nvidia-ml.list \
&& apt-key del 7fa2af80 \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub \
&& apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
# (Optional, use Mirror to speed up downloads)
# RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list
# Install the required packages
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
ca-certificates \
g++ \
openjdk-11-jre-headless \
# MMDet3D Requirements
ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
&& rm -rf /var/lib/apt/lists/*
ENV PATH="/opt/conda/bin:$PATH" \
FORCE_CUDA="1"
# TORCHSEVER
RUN pip install torchserve torch-model-archiver
# MMLAB
ARG PYTORCH
ARG CUDA
RUN pip install openmim
RUN mim install mmengine
RUN mim install mmcv==${MMCV}
RUN mim install mmdet==${MMDET}
RUN mim install mmdet3d==${MMDET3D}
RUN useradd -m model-server \
&& mkdir -p /home/model-server/tmp
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh \
&& chown -R model-server /home/model-server
COPY config.properties /home/model-server/config.properties
RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
EXPOSE 8080 8081 8082
USER model-server
WORKDIR /home/model-server
ENV TEMP=/home/model-server/tmp
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
CMD ["serve"]
inference_address=http://0.0.0.0:8080
management_address=http://0.0.0.0:8081
metrics_address=http://0.0.0.0:8082
model_store=/home/model-server/model-store
load_models=all
#!/bin/bash
set -e
if [[ "$1" = "serve" ]]; then
shift 1
torchserve --start --ts-config /home/model-server/config.properties
else
eval "$@"
fi
# prevent docker exit
tail -f /dev/null
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.header-logo {
background-image: url("../image/mmdet3d-logo.png");
background-size: 182.5px 40px;
height: 40px;
width: 182.5px;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment