Commit eb1107e4 authored by raojy's avatar raojy
Browse files

fix_mmdetection

parent 7aa442d5
Pipeline #3461 canceled with stages
_base_ = [
'../_base_/models/pointpillars_hv_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-3class.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# data settings
train_dataloader = dict(dataset=dict(dataset=dict(load_interval=1)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (16 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
_base_ = [
'../_base_/models/pointpillars_hv_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-car.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# data settings
train_dataloader = dict(dataset=dict(dataset=dict(load_interval=1)))
# model settings
model = dict(
type='MVXFasterRCNN',
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]],
sizes=[[4.73, 2.08, 1.77]],
rotations=[0, 1.57],
reshape_out=True)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=dict(
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (16 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
_base_ = [
'../_base_/models/pointpillars_hv_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-3class.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
_base_ = [
'../_base_/models/pointpillars_hv_secfpn_waymo.py',
'../_base_/datasets/waymoD5-3d-car.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-74.88, -74.88, -0.0345, 74.88, 74.88, -0.0345]],
sizes=[[4.73, 2.08, 1.77]],
rotations=[0, 1.57],
reshape_out=True)),
# model training and testing settings
train_cfg=dict(
_delete_=True,
pts=dict(
assigner=dict(
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55,
neg_iou_thr=0.4,
min_pos_iou=0.4,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
pos_weight=-1,
debug=False)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (16 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
_base_ = [
'../_base_/models/pointpillars_hv_fpn_range100_lyft.py',
'../_base_/datasets/lyft-3d-range100.py',
'../_base_/schedules/schedule-2x.py', '../_base_/default_runtime.py'
]
# model settings
model = dict(
pts_neck=dict(
_delete_=True,
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[[-100, -100, -1.0715024, 100, 100, -1.0715024],
[-100, -100, -0.3033737, 100, 100, -0.3033737],
[-100, -100, -0.3519405, 100, 100, -0.3519405],
[-100, -100, -0.8871424, 100, 100, -0.8871424],
[-100, -100, -0.6276341, 100, 100, -0.6276341],
[-100, -100, -1.3220503, 100, 100, -1.3220503],
[-100, -100, -1.0709302, 100, 100, -1.0709302],
[-100, -100, -0.9122268, 100, 100, -0.9122268],
[-100, -100, -1.8012227, 100, 100, -1.8012227]],
sizes=[
[4.75, 1.92, 1.71], # car
[10.24, 2.84, 3.44], # truck
[12.70, 2.92, 3.42], # bus
[6.52, 2.42, 2.34], # emergency vehicle
[8.17, 2.75, 3.20], # other vehicle
[2.35, 0.96, 1.59], # motorcycle
[1.76, 0.63, 1.44], # bicycle
[0.80, 0.76, 1.76], # pedestrian
[0.73, 0.35, 0.50] # animal
],
rotations=[0, 1.57],
reshape_out=True)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
_base_ = [
'../_base_/models/pointpillars_hv_fpn_lyft.py',
'../_base_/datasets/lyft-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
pts_neck=dict(
_delete_=True,
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[[-80, -80, -1.0715024, 80, 80, -1.0715024],
[-80, -80, -0.3033737, 80, 80, -0.3033737],
[-80, -80, -0.3519405, 80, 80, -0.3519405],
[-80, -80, -0.8871424, 80, 80, -0.8871424],
[-80, -80, -0.6276341, 80, 80, -0.6276341],
[-80, -80, -1.3220503, 80, 80, -1.3220503],
[-80, -80, -1.0709302, 80, 80, -1.0709302],
[-80, -80, -0.9122268, 80, 80, -0.9122268],
[-80, -80, -1.8012227, 80, 80, -1.8012227]],
sizes=[
[4.75, 1.92, 1.71], # car
[10.24, 2.84, 3.44], # truck
[12.70, 2.92, 3.42], # bus
[6.52, 2.42, 2.34], # emergency vehicle
[8.17, 2.75, 3.20], # other vehicle
[2.35, 0.96, 1.59], # motorcycle
[1.76, 0.63, 1.44], # bicycle
[0.80, 0.76, 1.76], # pedestrian
[0.73, 0.35, 0.50] # animal
],
rotations=[0, 1.57],
reshape_out=True)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
_base_ = './pointpillars_hv_secfpn_sbn-all_8xb4-2x_nus-3d.py'
train_dataloader = dict(batch_size=2, num_workers=2)
# schedule settings
optim_wrapper = dict(type='AmpOptimWrapper', loss_scale=4096.)
_base_ = [
'../_base_/models/pointpillars_hv_fpn_nus.py',
'../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
pts_neck=dict(
_delete_=True,
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[4.60718145, 1.95017717, 1.72270761], # car
[6.73778078, 2.4560939, 2.73004906], # truck
[12.01320693, 2.87427237, 3.81509561], # trailer
[1.68452161, 0.60058911, 1.27192197], # bicycle
[0.7256437, 0.66344886, 1.75748069], # pedestrian
[0.40359262, 0.39694519, 1.06232151], # traffic_cone
[0.48578221, 2.49008838, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True)))
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
train_cfg = dict(val_interval=24)
# PV-RCNN: Point-Voxel Feature Set Abstraction for 3D Object Detection
> [PV-RCNN: Point-Voxel Feature Set Abstraction for 3D Object Detection](https://arxiv.org/abs/1912.13192)
<!-- [ALGORITHM] -->
## Abstract
3D object detection has been receiving increasing attention from both industry and academia thanks to its wide applications in various fields such as autonomous driving and robotics. LiDAR sensors are widely adopted in autonomous driving vehicles and robots for capturing 3D scene information as sparse and irregular point clouds, which provide vital cues for 3D scene perception and understanding. In this paper, we propose to achieve high performance 3D object detection by designing novel point-voxel integrated networks to learn better 3D features from irregular point clouds.
<div align=center>
<img src="https://user-images.githubusercontent.com/88368822/202114244-ccf52f56-b8c9-4f1b-9cc2-80c7a9952c99.png" width="800"/>
</div>
## Results and models
### KITTI
| Backbone | Class | Lr schd | Mem (GB) | Inf time (fps) | mAP | Download |
| :---------------------------------------------: | :-----: | :--------: | :------: | :------------: | :---: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [SECFPN](./pv_rcnn_8xb2-80e_kitti-3d-3class.py) | 3 Class | cyclic 80e | 5.4 | | 72.28 | [model](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth) \\ [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428.json) |
Note: mAP represents AP11 results on 3 Class under the moderate setting.
Detailed performance on KITTI 3D detection (3D) is as follows, evaluated by AP11 metric:
| | Easy | Moderate | Hard |
| ---------- | :---: | :------: | :---: |
| Car | 89.20 | 83.72 | 78.79 |
| Pedestrian | 66.64 | 59.84 | 55.33 |
| Cyclist | 87.25 | 73.27 | 69.61 |
## Citation
```latex
@article{ShaoshuaiShi2020PVRCNNPF,
title={PV-RCNN: Point-Voxel Feature Set Abstraction for 3D Object Detection},
author={Shaoshuai Shi and Chaoxu Guo and Li Jiang and Zhe Wang and Jianping Shi and Xiaogang Wang and Hongsheng Li},
journal={computer vision and pattern recognition},
year={2020}
}
```
Collections:
- Name: PV-RCNN
Metadata:
Training Data: KITTI
Training Techniques:
- AdamW
Training Resources: 8x A100 GPUs
Architecture:
- Feature Pyramid Network
Paper:
URL: https://arxiv.org/abs/1912.13192
Title: 'PV-RCNN: Point-Voxel Feature Set Abstraction for 3D Object Detection'
README: configs/pv_rcnn/README.md
Code:
URL: https://github.com/open-mmlab/mmdetection3d/blob/dev-1.x/mmdet3d/models/detectors/pv_rcnn.py#L12
Version: v1.1.0rc2
Models:
- Name: pv_rcnn_8xb2-80e_kitti-3d-3class
In Collection: PV-RCNN
Config: configs/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class.py
Metadata:
Training Memory (GB): 5.4
Results:
- Task: 3D Object Detection
Dataset: KITTI
Metrics:
mAP: 72.28
Weights: https://download.openmmlab.com/mmdetection3d/v1.1.0_models/pv_rcnn/pv_rcnn_8xb2-80e_kitti-3d-3class/pv_rcnn_8xb2-80e_kitti-3d-3class_20221117_234428-b384d22f.pth
_base_ = [
'../_base_/datasets/kitti-3d-3class.py',
'../_base_/schedules/cyclic-40e.py', '../_base_/default_runtime.py'
]
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(CLASSES=class_names)
backend_args = None
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=5, Cyclist=5)),
classes=class_names,
sample_groups=dict(Car=15, Pedestrian=10, Cyclist=10),
points_loader=dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(type='ObjectSample', db_sampler=db_sampler, use_ground_plane=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(
type='PointsRangeFilter', point_cloud_range=point_cloud_range)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
model = dict(
type='PointVoxelRCNN',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
voxel=True,
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000))),
voxel_encoder=dict(type='HardSimpleVFE'),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act'),
encoder_paddings=((0, 0, 0), ((1, 1, 1), 0, 0), ((1, 1, 1), 0, 0),
((0, 1, 1), 0, 0)),
return_middle_feats=True),
points_encoder=dict(
type='VoxelSetAbstraction',
num_keypoints=2048,
fused_out_channel=128,
voxel_size=voxel_size,
point_cloud_range=point_cloud_range,
voxel_sa_cfgs_list=[
dict(
type='StackedSAModuleMSG',
in_channels=16,
scale_factor=1,
radius=(0.4, 0.8),
sample_nums=(16, 16),
mlp_channels=((16, 16), (16, 16)),
use_xyz=True),
dict(
type='StackedSAModuleMSG',
in_channels=32,
scale_factor=2,
radius=(0.8, 1.2),
sample_nums=(16, 32),
mlp_channels=((32, 32), (32, 32)),
use_xyz=True),
dict(
type='StackedSAModuleMSG',
in_channels=64,
scale_factor=4,
radius=(1.2, 2.4),
sample_nums=(16, 32),
mlp_channels=((64, 64), (64, 64)),
use_xyz=True),
dict(
type='StackedSAModuleMSG',
in_channels=64,
scale_factor=8,
radius=(2.4, 4.8),
sample_nums=(16, 32),
mlp_channels=((64, 64), (64, 64)),
use_xyz=True)
],
rawpoints_sa_cfgs=dict(
type='StackedSAModuleMSG',
in_channels=1,
radius=(0.4, 0.8),
sample_nums=(16, 16),
mlp_channels=((16, 16), (16, 16)),
use_xyz=True),
bev_feat_channel=256,
bev_scale_factor=8),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
dir_offset=0.78539,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
roi_head=dict(
type='PVRCNNRoiHead',
num_classes=3,
semantic_head=dict(
type='ForegroundSegmentationHead',
in_channels=640,
extra_width=0.1,
loss_seg=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
activated=True,
loss_weight=1.0)),
bbox_roi_extractor=dict(
type='Batch3DRoIGridExtractor',
grid_size=6,
roi_layer=dict(
type='StackedSAModuleMSG',
in_channels=128,
radius=(0.8, 1.6),
sample_nums=(16, 16),
mlp_channels=((64, 64), (64, 64)),
use_xyz=True,
pool_mod='max'),
),
bbox_head=dict(
type='PVRCNNBBoxHead',
in_channels=128,
grid_size=6,
num_classes=3,
class_agnostic=True,
shared_fc_channels=(256, 256),
reg_channels=(256, 256),
cls_channels=(256, 256),
dropout_ratio=0.3,
with_corner_loss=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_bbox=dict(
type='mmdet.SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))),
# model training and testing settings
train_cfg=dict(
rpn=dict(
assigner=[
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Cyclist
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.5,
neg_iou_thr=0.35,
min_pos_iou=0.35,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1)
],
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
max_num=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
assigner=[
dict( # for Pedestrian
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(
type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.5,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25)),
test_cfg=dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
max_num=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True,
use_raw_score=True,
nms_thr=0.1,
score_thr=0.1)))
train_dataloader = dict(
batch_size=2,
num_workers=2,
dataset=dict(dataset=dict(pipeline=train_pipeline, metainfo=metainfo)))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo))
eval_dataloader = dict(dataset=dict(pipeline=test_pipeline, metainfo=metainfo))
lr = 0.001
optim_wrapper = dict(optimizer=dict(lr=lr))
param_scheduler = [
# learning rate scheduler
# During the first 16 epochs, learning rate increases from 0 to lr * 10
# during the next 24 epochs, learning rate decreases from lr * 10 to
# lr * 1e-4
dict(
type='CosineAnnealingLR',
T_max=15,
eta_min=lr * 10,
begin=0,
end=15,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingLR',
T_max=25,
eta_min=lr * 1e-4,
begin=15,
end=40,
by_epoch=True,
convert_to_iter_based=True),
# momentum scheduler
# During the first 16 epochs, momentum increases from 0 to 0.85 / 0.95
# during the next 24 epochs, momentum increases from 0.85 / 0.95 to 1
dict(
type='CosineAnnealingMomentum',
T_max=15,
eta_min=0.85 / 0.95,
begin=0,
end=15,
by_epoch=True,
convert_to_iter_based=True),
dict(
type='CosineAnnealingMomentum',
T_max=25,
eta_min=1,
begin=15,
end=40,
by_epoch=True,
convert_to_iter_based=True)
]
# Designing Network Design Spaces
> [Designing Network Design Spaces](https://arxiv.org/abs/2003.13678)
<!-- [BACKBONE] -->
## Abstract
In this work, we present a new network design paradigm. Our goal is to help advance the understanding of network design and discover design principles that generalize across settings. Instead of focusing on designing individual network instances, we design network design spaces that parametrize populations of networks. The overall process is analogous to classic manual design of networks, but elevated to the design space level. Using our methodology we explore the structure aspect of network design and arrive at a low-dimensional design space consisting of simple, regular networks that we call RegNet. The core insight of the RegNet parametrization is surprisingly simple: widths and depths of good networks can be explained by a quantized linear function. We analyze the RegNet design space and arrive at interesting findings that do not match the current practice of network design. The RegNet design space provides simple and fast networks that work well across a wide range of flop regimes. Under comparable training settings and flops, the RegNet models outperform the popular EfficientNet models while being up to 5x faster on GPUs.
<div align=center>
<img src="https://user-images.githubusercontent.com/79644370/144025148-b73002cb-3c82-42e4-8da4-65df97aead9c.png" width="800"/>
</div>
## Introduction
We implement RegNetX models in 3D detection systems and provide their first results with PointPillars on nuScenes and Lyft dataset.
The pre-trained modles are converted from [model zoo of pycls](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md) and maintained in [mmcv](https://github.com/open-mmlab/mmcv).
## Usage
To use a regnet model, there are two steps to do:
1. Convert the model to ResNet-style supported by MMDetection
2. Modify backbone and neck in config accordingly
### Convert model
We already prepare models of FLOPs from 800M to 12G in our model zoo.
For more general usage, we also provide script `regnet2mmdet.py` in the tools directory to convert the key of models pretrained by [pycls](https://github.com/facebookresearch/pycls/) to
ResNet-style checkpoints used in MMDetection.
```bash
python -u tools/model_converters/regnet2mmdet.py ${PRETRAIN_PATH} ${STORE_PATH}
```
This script convert model from `PRETRAIN_PATH` and store the converted model in `STORE_PATH`.
### Modify config
The users can modify the config's `depth` of backbone and corresponding keys in `arch` according to the configs in the [pycls model zoo](https://github.com/facebookresearch/pycls/blob/master/MODEL_ZOO.md).
The parameter `in_channels` in FPN can be found in the Figure 15 & 16 of the paper (`wi` in the legend).
This directory already provides some configs with their performance, using RegNetX from 800MF to 12GF level.
For other pre-trained models or self-implemented regnet models, the users are responsible to check these parameters by themselves.
**Note**: Although Fig. 15 & 16 also provide `w0`, `wa`, `wm`, `group_w`, and `bot_mul` for `arch`, they are quantized thus inaccurate, using them sometimes produces different backbone that does not match the key in the pre-trained model.
## Results and models
### nuScenes
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | mAP | NDS | Download |
| :-------------------------------------------------------------------------------------: | :-----: | :------: | :------------: | :---: | :--: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [SECFPN](../pointpillars/pointpillars_hv_secfpn_sbn-all_8xb4-2x_nus-3d.py) | 2x | 16.4 | | 35.17 | 49.7 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725-0817d270.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230725.log.json) |
| [RegNetX-400MF-SECFPN](./pointpillars_hv_regnet-400mf_secfpn_sbn-all_8xb4-2x_nus-3d.py) | 2x | 16.4 | | 41.2 | 55.2 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334-53044f32.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334.log.json) |
| [FPN](../pointpillars/pointpillars_hv_fpn_sbn-all_8xb4-2x_nus-3d.py) | 2x | 17.1 | | 40.0 | 53.3 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405-2fa62f3d.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d_20200620_230405.log.json) |
| [RegNetX-400MF-FPN](./pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d.py) | 2x | 17.3 | | 44.8 | 56.4 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239.log.json) |
| [RegNetX-1.6gF-FPN](./pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d.py) | 2x | 24.0 | | 48.2 | 59.3 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311-dcd4e090.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311.log.json) |
### Lyft
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | Private Score | Public Score | Download |
| :-------------------------------------------------------------------------------------: | :-----: | :------: | :------------: | :-----------: | :----------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [SECFPN](../pointpillars/pointpillars_hv_secfpn_sbn-all_8xb2-2x_lyft-3d.py) | 2x | 12.2 | | 13.9 | 14.1 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d_20210517_204807-2518e3de.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_secfpn_sbn-all_2x8_2x_lyft-3d_20210517_204807.log.json) |
| [RegNetX-400MF-SECFPN](./hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_lyft-3d.py) | 2x | 15.9 | | 14.9 | 15.1 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d_20210524_092151-42513826.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d_20210524_092151.log.json) |
| [FPN](../pointpillars/pointpillars_hv_fpn_sbn-all_8xb2-2x_lyft-3d.py) | 2x | 9.2 | | 14.9 | 15.1 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d_20210517_202818-fc6904c3.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/pointpillars/hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_fpn_sbn-all_2x8_2x_lyft-3d_20210517_202818.log.json) |
| [RegNetX-400MF-FPN](./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_lyft-3d.py) | 2x | 13.0 | | 16.0 | 16.1 | [model](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d_20210521_115618-823dcf18.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d_20210521_115618.log.json) |
## Citation
```latex
@article{radosavovic2020designing,
title={Designing Network Design Spaces},
author={Ilija Radosavovic and Raj Prateek Kosaraju and Ross Girshick and Kaiming He and Piotr Dollár},
year={2020},
eprint={2003.13678},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
```
Models:
- Name: pointpillars_hv_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d
In Collection: PointPillars
Config: configs/regnet/pointpillars_hv_regnet-400mf_secfpn_sbn-all_8xb4-2x_nus-3d.py
Metadata:
Training Data: nuScenes
Training Memory (GB): 16.4
Architecture:
- RegNetX
- Hard Voxelization
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 41.2
NDS: 55.2
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d_20200620_230334-53044f32.pth
- Name: pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d
In Collection: PointPillars
Config: configs/regnet/pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d.py
Metadata:
Training Data: nuScenes
Training Memory (GB): 17.3
Architecture:
- RegNetX
- Hard Voxelization
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 44.8
NDS: 56.4
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d_20200620_230239-c694dce7.pth
- Name: pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d
In Collection: PointPillars
Config: configs/regnet/pointpillars_hv_regnet-1.6gf_fpn_sbn-all_8xb4-2x_nus-3d.py
Metadata:
Training Data: nuScenes
Training Memory (GB): 24.0
Architecture:
- RegNetX
- Hard Voxelization
Results:
- Task: 3D Object Detection
Dataset: nuScenes
Metrics:
mAP: 48.2
NDS: 59.3
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d/hv_pointpillars_regnet-1.6gf_fpn_sbn-all_4x8_2x_nus-3d_20200629_050311-dcd4e090.pth
- Name: pointpillars_hv_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d
In Collection: PointPillars
Config: configs/regnet/pointpillars_hv_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d.py
Metadata:
Training Data: Lyft
Training Memory (GB): 15.9
Architecture:
- RegNetX
- Hard Voxelization
Results:
- Task: 3D Object Detection
Dataset: Lyft
Metrics:
Private Score: 14.9
Public Score: 15.1
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_secfpn_sbn-all_2x8_2x_lyft-3d_20210524_092151-42513826.pth
- Name: pointpillars_hv_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d
In Collection: PointPillars
Config: configs/regnet/pointpillars_hv_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d.py
Metadata:
Training Data: Lyft
Training Memory (GB): 13.0
Architecture:
- RegNetX
- Hard Voxelization
Results:
- Task: 3D Object Detection
Dataset: Lyft
Metrics:
Private Score: 16.0
Public Score: 16.1
Weights: https://download.openmmlab.com/mmdetection3d/v0.1.0_models/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d/hv_pointpillars_regnet-400mf_fpn_sbn-all_2x8_2x_lyft-3d_20210521_115618-823dcf18.pth
_base_ = [
'../_base_/models/pointpillars_hv_fpn_nus.py',
'../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch='regnetx_1.6gf',
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[168, 408, 912]))
_base_ = [
'../_base_/models/pointpillars_hv_fpn_lyft.py',
'../_base_/datasets/lyft-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_400mf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
_base_ = [
'../_base_/models/pointpillars_hv_fpn_nus.py',
'../_base_/datasets/nus-3d.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_400mf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
_base_ = [
'../_base_/models/pointpillars_hv_fpn_range100_lyft.py',
'../_base_/datasets/lyft-3d-range100.py',
'../_base_/schedules/schedule-2x.py',
'../_base_/default_runtime.py',
]
# model settings
model = dict(
type='MVXFasterRCNN',
pts_backbone=dict(
_delete_=True,
type='NoStemRegNet',
arch=dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),
init_cfg=dict(
type='Pretrained', checkpoint='open-mmlab://regnetx_400mf'),
out_indices=(1, 2, 3),
frozen_stages=-1,
strides=(1, 2, 2, 2),
base_channels=64,
stem_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
norm_eval=False,
style='pytorch'),
pts_neck=dict(in_channels=[64, 160, 384]))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
_base_ = './pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb2-2x_lyft-3d.py'
# model settings
model = dict(
pts_neck=dict(
type='SECONDFPN',
_delete_=True,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 160, 384],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[[-80, -80, -1.0715024, 80, 80, -1.0715024],
[-80, -80, -0.3033737, 80, 80, -0.3033737],
[-80, -80, -0.3519405, 80, 80, -0.3519405],
[-80, -80, -0.8871424, 80, 80, -0.8871424],
[-80, -80, -0.6276341, 80, 80, -0.6276341],
[-80, -80, -1.3220503, 80, 80, -1.3220503],
[-80, -80, -1.0709302, 80, 80, -1.0709302],
[-80, -80, -0.9122268, 80, 80, -0.9122268],
[-80, -80, -1.8012227, 80, 80, -1.8012227]],
sizes=[
[4.75, 1.92, 1.71], # car
[10.24, 2.84, 3.44], # truck
[12.70, 2.92, 3.42], # bus
[6.52, 2.42, 2.34], # emergency vehicle
[8.17, 2.75, 3.20], # other vehicle
[2.35, 0.96, 1.59], # motorcycle
[1.76, 0.63, 1.44], # bicycle
[0.80, 0.76, 1.76], # pedestrian
[0.73, 0.35, 0.50] # animal
],
rotations=[0, 1.57],
reshape_out=True)))
_base_ = './pointpillars_hv_regnet-400mf_fpn_sbn-all_8xb4-2x_nus-3d.py'
# model settings
model = dict(
pts_neck=dict(
type='SECONDFPN',
_delete_=True,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 160, 384],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[4.60718145, 1.95017717, 1.72270761], # car
[6.73778078, 2.4560939, 2.73004906], # truck
[12.01320693, 2.87427237, 3.81509561], # trailer
[1.68452161, 0.60058911, 1.27192197], # bicycle
[0.7256437, 0.66344886, 1.75748069], # pedestrian
[0.40359262, 0.39694519, 1.06232151], # traffic_cone
[0.48578221, 2.49008838, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True)))
_base_ = \
'./pointpillars_hv_regnet-400mf_fpn_sbn-all_range100_8xb2-2x_lyft-3d.py'
# model settings
model = dict(
pts_neck=dict(
type='SECONDFPN',
_delete_=True,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 160, 384],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128]),
pts_bbox_head=dict(
type='Anchor3DHead',
in_channels=384,
feat_channels=384,
anchor_generator=dict(
_delete_=True,
type='AlignedAnchor3DRangeGenerator',
ranges=[[-100, -100, -1.0715024, 100, 100, -1.0715024],
[-100, -100, -0.3033737, 100, 100, -0.3033737],
[-100, -100, -0.3519405, 100, 100, -0.3519405],
[-100, -100, -0.8871424, 100, 100, -0.8871424],
[-100, -100, -0.6276341, 100, 100, -0.6276341],
[-100, -100, -1.3220503, 100, 100, -1.3220503],
[-100, -100, -1.0709302, 100, 100, -1.0709302],
[-100, -100, -0.9122268, 100, 100, -0.9122268],
[-100, -100, -1.8012227, 100, 100, -1.8012227]],
sizes=[
[4.75, 1.92, 1.71], # car
[10.24, 2.84, 3.44], # truck
[12.70, 2.92, 3.42], # bus
[6.52, 2.42, 2.34], # emergency vehicle
[8.17, 2.75, 3.20], # other vehicle
[2.35, 0.96, 1.59], # motorcycle
[1.76, 0.63, 1.44], # bicycle
[0.80, 0.76, 1.76], # pedestrian
[0.73, 0.35, 0.50] # animal
],
rotations=[0, 1.57],
reshape_out=True)))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment