Commit d2b71343 authored by 雍大凯's avatar 雍大凯
Browse files

add code

parent 69e57885
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
model = dict(
type='BEVDepthPano', # single-frame
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=256,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=256,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
aux_centerness_head=dict(
type='Centerness_Head',
task_specific_weight=[1, 1, 0, 0, 0],
in_channels=256,
tasks=[
dict(num_class=10, class_names=['car', 'truck',
'construction_vehicle',
'bus', 'trailer',
'barrier',
'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
pc_range=point_cloud_range[:2],
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.3, #
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
),
# model training and testing settings
train_cfg=dict(
pts=dict(
point_cloud_range=point_cloud_range,
grid_size=[800, 800, 40],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
pre_max_size=1000,
post_max_size=500,
# Scale-NMS
nms_type=['rotate'],
nms_thr=[0.2],
nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
1.1, 1.0, 1.0, 1.5, 3.5]]
)
),
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=False),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet',
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# use_mask = False
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.090 | 0.102 | 0.105 |
# | barrier | 0.387 | 0.442 | 0.465 |
# | bicycle | 0.218 | 0.257 | 0.265 |
# | bus | 0.514 | 0.613 | 0.669 |
# | car | 0.487 | 0.564 | 0.592 |
# | construction_vehicle | 0.176 | 0.254 | 0.288 |
# | motorcycle | 0.203 | 0.292 | 0.310 |
# | pedestrian | 0.301 | 0.349 | 0.366 |
# | traffic_cone | 0.280 | 0.313 | 0.321 |
# | trailer | 0.227 | 0.313 | 0.390 |
# | truck | 0.395 | 0.493 | 0.537 |
# | driveable_surface | 0.534 | 0.618 | 0.708 |
# | other_flat | 0.289 | 0.326 | 0.356 |
# | sidewalk | 0.234 | 0.280 | 0.329 |
# | terrain | 0.222 | 0.291 | 0.356 |
# | manmade | 0.280 | 0.351 | 0.401 |
# | vegetation | 0.176 | 0.273 | 0.359 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.295 | 0.361 | 0.401 |
# +----------------------+----------+----------+----------+
# +----------------------+---------+---------+---------+
# | Class Names | RayPQ@1 | RayPQ@2 | RayPQ@4 |
# +----------------------+---------+---------+---------+
# | others | 0.017 | 0.025 | 0.026 |
# | barrier | 0.125 | 0.182 | 0.218 |
# | bicycle | 0.051 | 0.072 | 0.076 |
# | bus | 0.275 | 0.366 | 0.422 |
# | car | 0.242 | 0.332 | 0.356 |
# | construction_vehicle | 0.016 | 0.058 | 0.092 |
# | motorcycle | 0.071 | 0.124 | 0.137 |
# | pedestrian | 0.017 | 0.022 | 0.023 |
# | traffic_cone | 0.032 | 0.040 | 0.044 |
# | trailer | 0.035 | 0.055 | 0.063 |
# | truck | 0.145 | 0.232 | 0.282 |
# | driveable_surface | 0.410 | 0.537 | 0.665 |
# | other_flat | 0.062 | 0.087 | 0.109 |
# | sidewalk | 0.008 | 0.030 | 0.064 |
# | terrain | 0.010 | 0.026 | 0.047 |
# | manmade | 0.054 | 0.091 | 0.134 |
# | vegetation | 0.003 | 0.022 | 0.092 |
# +----------------------+---------+---------+---------+
# | MEAN | 0.092 | 0.135 | 0.168 |
# +----------------------+---------+---------+---------+
# {'RayIoU': 0.35223182059688496, 'RayIoU@1': 0.29499743138394385, 'RayIoU@2': 0.3607063492639709, 'RayIoU@4': 0.4009916811427401, 'RayPQ': 0.13182524545677765, 'RayPQ@1': 0.09247682620339576, 'RayPQ@2': 0.1354024129684159, 'RayPQ@4': 0.16759649719852124}
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 16+1, 1)
model = dict(
type='BEVDepth4DPano',
num_adj=multi_adj_frame_id_cfg[1]-1,
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
aux_centerness_head=dict(
type='Centerness_Head',
task_specific_weight=[1, 1, 0, 0, 0],
in_channels=256,
tasks=[
dict(num_class=10, class_names=['car', 'truck',
'construction_vehicle',
'bus', 'trailer',
'barrier',
'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
pc_range=point_cloud_range[:2],
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.1, #
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
),
# model training and testing settings
train_cfg=dict(
pts=dict(
point_cloud_range=point_cloud_range,
grid_size=[800, 800, 40],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
pre_max_size=1000,
post_max_size=500,
# Scale-NMS
nms_type=['rotate'],
nms_thr=[0.2],
nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
1.1, 1.0, 1.0, 1.5, 3.5]]
)
),
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 16+1, 1)
model = dict(
type='BEVDepth4DOCC',
num_adj=multi_adj_frame_id_cfg[1]-1,
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
)
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# use_mask = False
# ===> per class IoU of 6019 samples:
# ===> others - IoU = 11.94
# ===> barrier - IoU = 44.84
# ===> bicycle - IoU = 26.66
# ===> bus - IoU = 41.53
# ===> car - IoU = 44.42
# ===> construction_vehicle - IoU = 20.79
# ===> motorcycle - IoU = 26.96
# ===> pedestrian - IoU = 25.98
# ===> traffic_cone - IoU = 29.25
# ===> trailer - IoU = 24.24
# ===> truck - IoU = 32.28
# ===> driveable_surface - IoU = 60.5
# ===> other_flat - IoU = 33.07
# ===> sidewalk - IoU = 37.01
# ===> terrain - IoU = 33.54
# ===> manmade - IoU = 21.75
# ===> vegetation - IoU = 21.58
# ===> mIoU of 6019 samples: 31.55
# {'mIoU': array([0.119, 0.448, 0.267, 0.415, 0.444, 0.208, 0.27 , 0.26 , 0.293,
# 0.242, 0.323, 0.605, 0.331, 0.37 , 0.335, 0.217, 0.216, 0.839])}
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.110 | 0.118 | 0.119 |
# | barrier | 0.444 | 0.484 | 0.499 |
# | bicycle | 0.278 | 0.311 | 0.319 |
# | bus | 0.537 | 0.635 | 0.691 |
# | car | 0.512 | 0.585 | 0.611 |
# | construction_vehicle | 0.153 | 0.218 | 0.238 |
# | motorcycle | 0.228 | 0.310 | 0.330 |
# | pedestrian | 0.338 | 0.387 | 0.401 |
# | traffic_cone | 0.342 | 0.362 | 0.370 |
# | trailer | 0.209 | 0.293 | 0.368 |
# | truck | 0.422 | 0.511 | 0.555 |
# | driveable_surface | 0.570 | 0.653 | 0.742 |
# | other_flat | 0.301 | 0.340 | 0.375 |
# | sidewalk | 0.266 | 0.319 | 0.370 |
# | terrain | 0.261 | 0.334 | 0.400 |
# | manmade | 0.360 | 0.435 | 0.485 |
# | vegetation | 0.244 | 0.354 | 0.442 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.328 | 0.391 | 0.430 |
# +----------------------+----------+----------+----------+
# {'RayIoU': 0.38313147213727416, 'RayIoU@1': 0.3279517851047602, 'RayIoU@2': 0.3911038935232673, 'RayIoU@4': 0.4303387377837949}
\ No newline at end of file
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 8+1, 1)
model = dict(
type='BEVDepth4DPano',
num_adj=multi_adj_frame_id_cfg[1]-1,
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
aux_centerness_head=dict(
type='Centerness_Head',
task_specific_weight=[1, 1, 0, 0, 0],
in_channels=256,
tasks=[
dict(num_class=10, class_names=['car', 'truck',
'construction_vehicle',
'bus', 'trailer',
'barrier',
'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
pc_range=point_cloud_range[:2],
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.3, #
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
),
# model training and testing settings
train_cfg=dict(
pts=dict(
point_cloud_range=point_cloud_range,
grid_size=[800, 800, 40],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
pre_max_size=1000,
post_max_size=500,
# Scale-NMS
nms_type=['rotate'],
nms_thr=[0.2],
nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
1.1, 1.0, 1.0, 1.5, 3.5]]
)
),
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# use_mask = False
# ===> per class IoU of 6019 samples:
# ===> others - IoU = 11.51
# ===> barrier - IoU = 45.87
# ===> bicycle - IoU = 24.65
# ===> bus - IoU = 41.75
# ===> car - IoU = 46.15
# ===> construction_vehicle - IoU = 20.96
# ===> motorcycle - IoU = 26.82
# ===> pedestrian - IoU = 26.77
# ===> traffic_cone - IoU = 29.66
# ===> trailer - IoU = 24.65
# ===> truck - IoU = 32.75
# ===> driveable_surface - IoU = 60.39
# ===> other_flat - IoU = 32.87
# ===> sidewalk - IoU = 36.49
# ===> terrain - IoU = 33.16
# ===> manmade - IoU = 21.3
# ===> vegetation - IoU = 20.92
# ===> mIoU of 6019 samples: 31.57
# {'mIoU': array([0.115, 0.459, 0.247, 0.418, 0.461, 0.21 , 0.268, 0.268, 0.297,
# 0.247, 0.328, 0.604, 0.329, 0.365, 0.332, 0.213, 0.209, 0.839])}
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.101 | 0.108 | 0.110 |
# | barrier | 0.439 | 0.480 | 0.497 |
# | bicycle | 0.258 | 0.286 | 0.293 |
# | bus | 0.540 | 0.649 | 0.700 |
# | car | 0.531 | 0.603 | 0.629 |
# | construction_vehicle | 0.180 | 0.252 | 0.282 |
# | motorcycle | 0.247 | 0.328 | 0.343 |
# | pedestrian | 0.347 | 0.393 | 0.409 |
# | traffic_cone | 0.346 | 0.371 | 0.378 |
# | trailer | 0.209 | 0.292 | 0.384 |
# | truck | 0.452 | 0.544 | 0.587 |
# | driveable_surface | 0.562 | 0.646 | 0.734 |
# | other_flat | 0.290 | 0.328 | 0.363 |
# | sidewalk | 0.261 | 0.313 | 0.363 |
# | terrain | 0.260 | 0.330 | 0.394 |
# | manmade | 0.345 | 0.421 | 0.471 |
# | vegetation | 0.229 | 0.337 | 0.423 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.329 | 0.393 | 0.433 |
# +----------------------+----------+----------+----------+
# 6019it [10:36, 9.46it/s]
# +----------------------+---------+---------+---------+
# | Class Names | RayPQ@1 | RayPQ@2 | RayPQ@4 |
# +----------------------+---------+---------+---------+
# | others | 0.026 | 0.032 | 0.033 |
# | barrier | 0.184 | 0.232 | 0.253 |
# | bicycle | 0.088 | 0.103 | 0.108 |
# | bus | 0.311 | 0.406 | 0.458 |
# | car | 0.300 | 0.380 | 0.403 |
# | construction_vehicle | 0.032 | 0.057 | 0.081 |
# | motorcycle | 0.114 | 0.156 | 0.169 |
# | pedestrian | 0.025 | 0.030 | 0.031 |
# | traffic_cone | 0.071 | 0.081 | 0.085 |
# | trailer | 0.049 | 0.077 | 0.088 |
# | truck | 0.182 | 0.274 | 0.314 |
# | driveable_surface | 0.457 | 0.574 | 0.702 |
# | other_flat | 0.062 | 0.086 | 0.106 |
# | sidewalk | 0.018 | 0.042 | 0.091 |
# | terrain | 0.017 | 0.039 | 0.074 |
# | manmade | 0.077 | 0.144 | 0.194 |
# | vegetation | 0.002 | 0.061 | 0.162 |
# +----------------------+---------+---------+---------+
# | MEAN | 0.119 | 0.163 | 0.197 |
# +----------------------+---------+---------+---------+
# {'RayIoU': 0.3850202377154096, 'RayIoU@1': 0.3291477679560127, 'RayIoU@2': 0.39307010079658805, 'RayIoU@4': 0.4328428443936281,
# 'RayPQ': 0.15961266397677248, 'RayPQ@1': 0.11850092407498894, 'RayPQ@2': 0.1631862461686837, 'RayPQ@4': 0.19715082168664483}
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 8+1, 1)
model = dict(
type='BEVDepth4DOCC',
num_adj=multi_adj_frame_id_cfg[1]-1,
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
)
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# use_mask = False
# ===> per class IoU of 6019 samples:
# ===> others - IoU = 11.5
# ===> barrier - IoU = 44.1
# ===> bicycle - IoU = 25.89
# ===> bus - IoU = 41.0
# ===> car - IoU = 44.57
# ===> construction_vehicle - IoU = 21.88
# ===> motorcycle - IoU = 27.31
# ===> pedestrian - IoU = 25.95
# ===> traffic_cone - IoU = 29.04
# ===> trailer - IoU = 24.17
# ===> truck - IoU = 31.81
# ===> driveable_surface - IoU = 60.74
# ===> other_flat - IoU = 33.84
# ===> sidewalk - IoU = 36.62
# ===> terrain - IoU = 33.96
# ===> manmade - IoU = 21.54
# ===> vegetation - IoU = 21.36
# ===> mIoU of 6019 samples: 31.49
# {'mIoU': array([0.115, 0.441, 0.259, 0.41 , 0.446, 0.219, 0.273, 0.259, 0.29 ,
# 0.242, 0.318, 0.607, 0.338, 0.366, 0.34 , 0.215, 0.214, 0.839])}
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.107 | 0.115 | 0.116 |
# | barrier | 0.442 | 0.485 | 0.501 |
# | bicycle | 0.267 | 0.296 | 0.302 |
# | bus | 0.533 | 0.632 | 0.683 |
# | car | 0.516 | 0.590 | 0.616 |
# | construction_vehicle | 0.170 | 0.251 | 0.282 |
# | motorcycle | 0.231 | 0.325 | 0.350 |
# | pedestrian | 0.340 | 0.386 | 0.400 |
# | traffic_cone | 0.348 | 0.372 | 0.380 |
# | trailer | 0.232 | 0.317 | 0.400 |
# | truck | 0.427 | 0.514 | 0.559 |
# | driveable_surface | 0.566 | 0.649 | 0.736 |
# | other_flat | 0.302 | 0.341 | 0.374 |
# | sidewalk | 0.261 | 0.313 | 0.363 |
# | terrain | 0.258 | 0.333 | 0.399 |
# | manmade | 0.348 | 0.426 | 0.479 |
# | vegetation | 0.234 | 0.342 | 0.430 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.328 | 0.393 | 0.434 |
# +----------------------+----------+----------+----------+
# {'RayIoU': 0.3851476341258822, 'RayIoU@1': 0.3284556495395326, 'RayIoU@2': 0.39334760720480005, 'RayIoU@4': 0.43363964563331386}
\ No newline at end of file
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
# point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
point_cloud_range = [-40.0, -40.0, -5.0, 40.0, 40.0, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 1+1, 1)
model = dict(
type='BEVDepth4DPano',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
aux_centerness_head=dict(
type='Centerness_Head',
task_specific_weight=[1, 1, 0, 0, 0],
in_channels=256,
tasks=[
dict(num_class=10, class_names=['car', 'truck',
'construction_vehicle',
'bus', 'trailer',
'barrier',
'motorcycle', 'bicycle',
'pedestrian', 'traffic_cone']),
],
common_heads=dict(
reg=(2, 2), height=(1, 2), dim=(3, 2), rot=(2, 2), vel=(2, 2)),
share_conv_channel=64,
bbox_coder=dict(
type='CenterPointBBoxCoder',
pc_range=point_cloud_range[:2],
post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
max_num=500,
score_threshold=0.3, #
out_size_factor=4,
voxel_size=voxel_size[:2],
code_size=9),
separate_head=dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3),
loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
loss_bbox=dict(type='L1Loss', reduction='mean', loss_weight=0.25),
norm_bbox=True),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
),
# model training and testing settings
train_cfg=dict(
pts=dict(
point_cloud_range=point_cloud_range,
grid_size=[800, 800, 40],
voxel_size=voxel_size,
out_size_factor=4,
dense_reg=1,
gaussian_overlap=0.1,
max_objs=500,
min_radius=2,
code_weights=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2])),
test_cfg=dict(
pts=dict(
max_per_img=500,
max_pool_nms=False,
min_radius=[4, 12, 10, 1, 0.85, 0.175],
score_threshold=0.1,
out_size_factor=4,
voxel_size=voxel_size[:2],
pre_max_size=1000,
post_max_size=500,
# Scale-NMS
nms_type=['rotate'],
nms_thr=[0.2],
nms_rescale_factor=[[1.0, 0.7, 0.7, 0.4, 0.55,
1.1, 1.0, 1.0, 1.5, 3.5]]
)
),
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs', 'gt_bboxes_3d', 'gt_labels_3d'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# ===> per class IoU of 6019 samples:
# ===> others - IoU = 10.89
# ===> barrier - IoU = 43.92
# ===> bicycle - IoU = 24.42
# ===> bus - IoU = 41.91
# ===> car - IoU = 45.18
# ===> construction_vehicle - IoU = 18.73
# ===> motorcycle - IoU = 25.59
# ===> pedestrian - IoU = 25.67
# ===> traffic_cone - IoU = 25.86
# ===> trailer - IoU = 25.29
# ===> truck - IoU = 31.84
# ===> driveable_surface - IoU = 59.03
# ===> other_flat - IoU = 31.53
# ===> sidewalk - IoU = 34.67
# ===> terrain - IoU = 31.49
# ===> manmade - IoU = 19.91
# ===> vegetation - IoU = 19.31
# ===> mIoU of 6019 samples: 30.31
# {'mIoU': array([0.109, 0.439, 0.244, 0.419, 0.452, 0.187, 0.256, 0.257, 0.259,
# 0.253, 0.318, 0.59 , 0.315, 0.347, 0.315, 0.199, 0.193, 0.835])}
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.094 | 0.105 | 0.107 |
# | barrier | 0.411 | 0.460 | 0.480 |
# | bicycle | 0.252 | 0.286 | 0.293 |
# | bus | 0.541 | 0.646 | 0.698 |
# | car | 0.520 | 0.594 | 0.621 |
# | construction_vehicle | 0.164 | 0.235 | 0.264 |
# | motorcycle | 0.212 | 0.305 | 0.321 |
# | pedestrian | 0.326 | 0.373 | 0.389 |
# | traffic_cone | 0.312 | 0.341 | 0.348 |
# | trailer | 0.220 | 0.291 | 0.372 |
# | truck | 0.430 | 0.520 | 0.565 |
# | driveable_surface | 0.552 | 0.633 | 0.720 |
# | other_flat | 0.293 | 0.330 | 0.361 |
# | sidewalk | 0.242 | 0.291 | 0.340 |
# | terrain | 0.236 | 0.305 | 0.369 |
# | manmade | 0.303 | 0.378 | 0.429 |
# | vegetation | 0.193 | 0.294 | 0.381 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.312 | 0.376 | 0.415 |
# +----------------------+----------+----------+----------+
# 6019it [09:13, 10.87it/s]
# +----------------------+---------+---------+---------+
# | Class Names | RayPQ@1 | RayPQ@2 | RayPQ@4 |
# +----------------------+---------+---------+---------+
# | others | 0.020 | 0.028 | 0.030 |
# | barrier | 0.155 | 0.211 | 0.235 |
# | bicycle | 0.083 | 0.097 | 0.102 |
# | bus | 0.299 | 0.391 | 0.442 |
# | car | 0.277 | 0.360 | 0.384 |
# | construction_vehicle | 0.011 | 0.062 | 0.077 |
# | motorcycle | 0.098 | 0.149 | 0.166 |
# | pedestrian | 0.021 | 0.026 | 0.027 |
# | traffic_cone | 0.052 | 0.069 | 0.071 |
# | trailer | 0.043 | 0.062 | 0.071 |
# | truck | 0.158 | 0.248 | 0.293 |
# | driveable_surface | 0.440 | 0.559 | 0.680 |
# | other_flat | 0.065 | 0.089 | 0.107 |
# | sidewalk | 0.012 | 0.029 | 0.060 |
# | terrain | 0.009 | 0.028 | 0.053 |
# | manmade | 0.060 | 0.108 | 0.153 |
# | vegetation | 0.001 | 0.029 | 0.111 |
# +----------------------+---------+---------+---------+
# | MEAN | 0.106 | 0.150 | 0.180 |
# +----------------------+---------+---------+---------+
# {'RayIoU': 0.3676099569727112, 'RayIoU@1': 0.3118578145261225, 'RayIoU@2': 0.3757836068619914, 'RayIoU@4': 0.4151884495300196,
# 'RayPQ': 0.14529917059571107, 'RayPQ@1': 0.1061843618020449, 'RayPQ@2': 0.14961373290314467, 'RayPQ@4': 0.18009941708194366}
_base_ = ['../../../mmdetection3d/configs/_base_/datasets/nus-3d.py',
'../../../mmdetection3d/configs/_base_/default_runtime.py']
plugin = True
plugin_dir = 'projects/mmdet3d_plugin/'
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_config = {
'cams': [
'CAM_FRONT_LEFT', 'CAM_FRONT', 'CAM_FRONT_RIGHT', 'CAM_BACK_LEFT',
'CAM_BACK', 'CAM_BACK_RIGHT'
],
'Ncams':
6,
'input_size': (256, 704),
'src_size': (900, 1600),
# Augmentation
'resize': (-0.06, 0.11),
'rot': (-5.4, 5.4),
'flip': True,
'crop_h': (0.0, 0.0),
'resize_test': 0.00,
}
grid_config = {
'x': [-40, 40, 0.4],
'y': [-40, 40, 0.4],
'z': [-1, 5.4, 6.4],
'depth': [1.0, 45.0, 0.5],
}
voxel_size = [0.1, 0.1, 0.2]
numC_Trans = 80
multi_adj_frame_id_cfg = (1, 1+1, 1)
model = dict(
type='BEVDepth4DOCC',
img_backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(2, 3),
frozen_stages=-1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=False,
with_cp=True,
style='pytorch',
pretrained='torchvision://resnet50',
),
img_neck=dict(
type='CustomFPN',
in_channels=[1024, 2048],
out_channels=512,
num_outs=1,
start_level=0,
out_ids=[0]),
img_view_transformer=dict(
type='LSSViewTransformerBEVDepth',
grid_config=grid_config,
input_size=data_config['input_size'],
in_channels=512,
out_channels=numC_Trans,
loss_depth_weight=1,
depthnet_cfg=dict(use_dcn=False, aspp_mid_channels=96),
downsample=16),
pre_process=dict(
type='CustomResNet',
numC_input=numC_Trans,
num_layer=[1, ],
num_channels=[numC_Trans, ],
stride=[1, ],
backbone_output_ids=[0, ]),
img_bev_encoder_backbone=dict(
type='CustomResNet',
numC_input=numC_Trans * (len(range(*multi_adj_frame_id_cfg))+1),
num_channels=[numC_Trans * 2, numC_Trans * 4, numC_Trans * 8]),
img_bev_encoder_neck=dict(
type='FPN_LSS',
in_channels=numC_Trans * 8 + numC_Trans * 2,
out_channels=256),
occ_head=dict(
type='BEVOCCHead2D_V2',
in_dim=256,
out_dim=256,
Dz=16,
use_mask=False,
num_classes=18,
use_predicter=True,
class_balance=True,
loss_occ=dict(
type='CustomFocalLoss',
use_sigmoid=True,
loss_weight=1.0
),
)
)
# Data
dataset_type = 'NuScenesDatasetOccpancy'
data_root = 'data/nuscenes/'
file_client_args = dict(backend='disk')
bda_aug_conf = dict(
rot_lim=(-0., 0.),
scale_lim=(1., 1.),
flip_dx_ratio=0.5,
flip_dy_ratio=0.5
)
train_pipeline = [
dict(
type='PrepareImageInputs',
is_train=True,
data_config=data_config,
sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=True),
dict(type='LoadOccGTFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(type='PointToMultiViewDepth', downsample=1, grid_config=grid_config),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D', keys=['img_inputs', 'gt_depth', 'voxel_semantics',
'mask_lidar', 'mask_camera'])
]
test_pipeline = [
dict(type='PrepareImageInputs', data_config=data_config, sequential=True),
dict(
type='LoadAnnotationsBEVDepth',
bda_aug_conf=bda_aug_conf,
classes=class_names,
is_train=False),
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'img_inputs'])
])
]
input_modality = dict(
use_lidar=False,
use_camera=True,
use_radar=False,
use_map=False,
use_external=False)
share_data_config = dict(
type=dataset_type,
data_root=data_root,
classes=class_names,
modality=input_modality,
stereo=False,
filter_empty_gt=False,
img_info_prototype='bevdet4d',
multi_adj_frame_id_cfg=multi_adj_frame_id_cfg,
)
test_data_config = dict(
pipeline=test_pipeline,
ann_file=data_root + 'bevdetv2-nuscenes_infos_val.pkl')
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
data_root=data_root,
ann_file=data_root + 'bevdetv2-nuscenes_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
test_mode=False,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR'),
val=test_data_config,
test=test_data_config)
for key in ['val', 'train', 'test']:
data[key].update(share_data_config)
# Optimizer
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=1e-2)
optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=200,
warmup_ratio=0.001,
step=[24, ])
runner = dict(type='EpochBasedRunner', max_epochs=24)
custom_hooks = [
dict(
type='MEGVIIEMAHook',
init_updates=10560,
priority='NORMAL',
),
]
load_from = "ckpts/bevdet-r50-4d-depth-cbgs.pth"
# fp16 = dict(loss_scale='dynamic')
evaluation = dict(interval=1, start=20, pipeline=test_pipeline)
checkpoint_config = dict(interval=1, max_keep_ckpts=5)
# use_mask = False
# ===> others - IoU = 9.99
# ===> barrier - IoU = 41.3
# ===> bicycle - IoU = 22.84
# ===> bus - IoU = 41.17
# ===> car - IoU = 41.89
# ===> construction_vehicle - IoU = 20.84
# ===> motorcycle - IoU = 25.25
# ===> pedestrian - IoU = 23.98
# ===> traffic_cone - IoU = 24.36
# ===> trailer - IoU = 26.39
# ===> truck - IoU = 30.41
# ===> driveable_surface - IoU = 58.26
# ===> other_flat - IoU = 31.86
# ===> sidewalk - IoU = 34.47
# ===> terrain - IoU = 31.96
# ===> manmade - IoU = 18.87
# ===> vegetation - IoU = 18.95
# ===> mIoU of 6019 samples: 29.57
# {'mIoU': array([0.1 , 0.413, 0.228, 0.412, 0.419, 0.208, 0.253, 0.24 , 0.244,
# 0.264, 0.304, 0.583, 0.319, 0.345, 0.32 , 0.189, 0.189, 0.833])}
# +----------------------+----------+----------+----------+
# | Class Names | RayIoU@1 | RayIoU@2 | RayIoU@4 |
# +----------------------+----------+----------+----------+
# | others | 0.095 | 0.106 | 0.109 |
# | barrier | 0.392 | 0.444 | 0.466 |
# | bicycle | 0.236 | 0.279 | 0.287 |
# | bus | 0.513 | 0.616 | 0.675 |
# | car | 0.492 | 0.567 | 0.596 |
# | construction_vehicle | 0.170 | 0.256 | 0.296 |
# | motorcycle | 0.216 | 0.304 | 0.330 |
# | pedestrian | 0.315 | 0.363 | 0.378 |
# | traffic_cone | 0.280 | 0.315 | 0.323 |
# | trailer | 0.210 | 0.294 | 0.397 |
# | truck | 0.419 | 0.517 | 0.565 |
# | driveable_surface | 0.540 | 0.621 | 0.708 |
# | other_flat | 0.284 | 0.320 | 0.354 |
# | sidewalk | 0.242 | 0.289 | 0.337 |
# | terrain | 0.233 | 0.302 | 0.367 |
# | manmade | 0.291 | 0.370 | 0.422 |
# | vegetation | 0.190 | 0.290 | 0.376 |
# +----------------------+----------+----------+----------+
# | MEAN | 0.301 | 0.368 | 0.411 |
# +----------------------+----------+----------+----------+
# {'RayIoU': 0.3599406945036808, 'RayIoU@1': 0.30094679699387594, 'RayIoU@2': 0.36785252629427645, 'RayIoU@4': 0.4110227602228899}
\ No newline at end of file
Metadata-Version: 2.1
Name: flashocc-plugin
Version: 0.0.0
Summary: OpenMMLab's next-generation platformfor general 3D object detection.
Home-page: https://github.com/open-mmlab/mmdetection3d
Author: MMDetection3D Contributors
Author-email: zwwdev@gmail.com
License: Apache License 2.0
Keywords: computer vision,3D object detection
Classifier: Development Status :: 4 - Beta
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Operating System :: OS Independent
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
Description-Content-Type: text/markdown
__init__.py
setup.py
configs/bevdet_occ/bevdet-occ-r50-4d-stereo.py
configs/bevdet_occ/bevdet-occ-r50.py
configs/bevdet_occ/bevdet-occ-stbase-4d-stereo-512x1408.py
configs/flashocc/flashocc-r50-4d-stereo.py
configs/flashocc/flashocc-r50-M0-trt.py
configs/flashocc/flashocc-r50-M0.py
configs/flashocc/flashocc-r50-trt.py
configs/flashocc/flashocc-r50.py
configs/flashocc/flashocc-stbase-4d-stereo-512x1408_4x4_1e-2.py
configs/flashocc/flashocc-stbase-4d-stereo-512x1408_4x4_2e-4.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth-pano.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny-pano-trt.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny-pano.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth-tiny.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth-trt.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f-pano.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm16f.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f-pano.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-longterm8f.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d-pano.py
configs/panoptic-flashocc/panoptic-flashocc-r50-depth4d.py
flashocc_plugin.egg-info/PKG-INFO
flashocc_plugin.egg-info/SOURCES.txt
flashocc_plugin.egg-info/dependency_links.txt
flashocc_plugin.egg-info/not-zip-safe
flashocc_plugin.egg-info/top_level.txt
mmdet3d_plugin/__init__.py
mmdet3d_plugin/core/__init__.py
mmdet3d_plugin/core/bbox/__init__.py
mmdet3d_plugin/core/bbox/coders/__init__.py
mmdet3d_plugin/core/bbox/coders/centerpoint_bbox_coders.py
mmdet3d_plugin/core/evaluation/__init__.py
mmdet3d_plugin/core/evaluation/occ_metrics.py
mmdet3d_plugin/core/evaluation/ray_metrics.py
mmdet3d_plugin/core/evaluation/ray_pq.py
mmdet3d_plugin/core/hook/__init__.py
mmdet3d_plugin/core/hook/ema.py
mmdet3d_plugin/core/hook/sequentialcontrol.py
mmdet3d_plugin/core/hook/syncbncontrol.py
mmdet3d_plugin/core/hook/utils.py
mmdet3d_plugin/core/post_processing/__init__.py
mmdet3d_plugin/core/post_processing/box3d_nms.py
mmdet3d_plugin/datasets/__init__.py
mmdet3d_plugin/datasets/ego_pose_dataset.py
mmdet3d_plugin/datasets/nuscenes_dataset_bevdet.py
mmdet3d_plugin/datasets/nuscenes_dataset_occ.py
mmdet3d_plugin/datasets/pipelines/__init__.py
mmdet3d_plugin/datasets/pipelines/formating.py
mmdet3d_plugin/datasets/pipelines/loading.py
mmdet3d_plugin/models/__init__.py
mmdet3d_plugin/models/backbones/__init__.py
mmdet3d_plugin/models/backbones/resnet.py
mmdet3d_plugin/models/backbones/swin.py
mmdet3d_plugin/models/dense_heads/__init__.py
mmdet3d_plugin/models/dense_heads/bev_centerpoint_head.py
mmdet3d_plugin/models/dense_heads/bev_occ_head.py
mmdet3d_plugin/models/detectors/__init__.py
mmdet3d_plugin/models/detectors/bevdepth.py
mmdet3d_plugin/models/detectors/bevdepth4d.py
mmdet3d_plugin/models/detectors/bevdet.py
mmdet3d_plugin/models/detectors/bevdet4d.py
mmdet3d_plugin/models/detectors/bevdet_occ.py
mmdet3d_plugin/models/detectors/bevstereo4d.py
mmdet3d_plugin/models/losses/__init__.py
mmdet3d_plugin/models/losses/cross_entropy_loss.py
mmdet3d_plugin/models/losses/focal_loss.py
mmdet3d_plugin/models/losses/lovasz_softmax.py
mmdet3d_plugin/models/losses/semkitti_loss.py
mmdet3d_plugin/models/model_utils/__init__.py
mmdet3d_plugin/models/model_utils/depthnet.py
mmdet3d_plugin/models/necks/__init__.py
mmdet3d_plugin/models/necks/fpn.py
mmdet3d_plugin/models/necks/lss_fpn.py
mmdet3d_plugin/models/necks/view_transformer.py
mmdet3d_plugin/ops/__init__.py
mmdet3d_plugin/ops/bev_pool/__init__.py
mmdet3d_plugin/ops/bev_pool/bev_pool.py
mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.cpp
mmdet3d_plugin/ops/bev_pool/src/bev_max_pool.h
mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.cu
mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_cuda.hip
mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_hip.cpp
mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
mmdet3d_plugin/ops/bev_pool/src/bev_pooling_hip.cpp
mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.hip
mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.cpp
mmdet3d_plugin/ops/bev_pool_v2/__init__.py
mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.hip
mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_hip.cpp
mmdet3d_plugin/ops/nearest_assign/__init__.py
mmdet3d_plugin/ops/nearest_assign/nearest_assign.py
mmdet3d_plugin/ops/nearest_assign/src/nearest_assign.cpp
mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.cu
mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_cuda.hip
mmdet3d_plugin/ops/nearest_assign/src/nearest_assign_hip.cpp
\ No newline at end of file
from .datasets import *
from .core import *
from .models import *
from .centerpoint_bbox_coders import CenterPointBBoxCoder
__all__ = ['CenterPointBBoxCoder']
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment