Unverified Commit 88b86943 authored by Sun Jiahao's avatar Sun Jiahao Committed by GitHub
Browse files

[Feature] Support PGD and multi-view FCOS3D++ on Waymo (#2835)


Co-authored-by: default avatarJingweiZhang12 <zjw18@mails.tsinghua.edu.cn>
Co-authored-by: sjh <sunjiahao1999>
parent 2dad86c2
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=False, use_camera=True)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(0.95, 1.05),
# ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True,
),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Pack3DDetInputs',
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='RandomResize3D',
scale=(1248, 832),
ratio_range=(1., 1.),
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
# load one frame every three frames
load_interval=3,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
load_eval_anns=False,
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='fov_image_based',
backend_args=backend_args))
val_evaluator = dict(
type='WaymoMetric',
waymo_bin_file='./data/waymo/waymo_format/fov_gt.bin',
metric='LET_mAP',
load_type='fov_image_based',
result_prefix='./pgd_fov_pred')
test_evaluator = val_evaluator
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=False, use_camera=True)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args = None
train_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
# base shape (1248, 832), scale (0.95, 1.05)
dict(
type='RandomResize3D',
scale=(1248, 832),
# ratio_range=(1., 1.),
ratio_range=(0.95, 1.05),
interpolation='nearest',
keep_ratio=True,
),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(
type='Pack3DDetInputs',
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]
test_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='Resize3D',
scale_factor=0.65,
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFileMono3D', backend_args=backend_args),
dict(
type='Resize3D',
scale_factor=0.65,
interpolation='nearest',
keep_ratio=True),
dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam'
]),
]
train_dataloader = dict(
batch_size=3,
num_workers=3,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='waymo_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
pipeline=train_pipeline,
modality=input_modality,
test_mode=False,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
# load one frame every three frames
load_interval=3,
backend_args=backend_args))
val_dataloader = dict(
batch_size=1,
num_workers=0,
persistent_workers=False,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
# load_eval_anns=False,
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=0,
persistent_workers=False,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
data_prefix=dict(
pts='training/velodyne',
CAM_FRONT='training/image_0',
CAM_FRONT_LEFT='training/image_1',
CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'),
ann_file='waymo_infos_val.pkl',
pipeline=eval_pipeline,
modality=input_modality,
test_mode=True,
metainfo=metainfo,
cam_sync_instances=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Camera',
load_type='mv_image_based',
load_eval_anns=False,
backend_args=backend_args))
val_evaluator = dict(
type='WaymoMetric',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
metric='LET_mAP',
load_type='mv_image_based',
result_prefix='./pgd_mv_pred',
nms_cfg=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=500,
nms_thr=0.05,
score_thr=0.001,
min_bbox_size=0,
max_per_frame=100))
test_evaluator = val_evaluator
vis_backends = [dict(type='LocalVisBackend')]
visualizer = dict(
type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
# dataset settings # dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds # D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments # We only use one fold for efficient experiments
dataset_type = 'WaymoDataset' dataset_type = 'WaymoDataset'
data_root = 'data/waymo/kitti_format/' data_root = 'data/waymo/kitti_format/'
...@@ -19,7 +19,7 @@ data_root = 'data/waymo/kitti_format/' ...@@ -19,7 +19,7 @@ data_root = 'data/waymo/kitti_format/'
# })) # }))
backend_args = None backend_args = None
class_names = ['Car', 'Pedestrian', 'Cyclist'] class_names = ['Pedestrian', 'Cyclist', 'Car']
input_modality = dict(use_lidar=False, use_camera=True) input_modality = dict(use_lidar=False, use_camera=True)
point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4] point_cloud_range = [-35.0, -75.0, -2, 75.0, 75.0, 4]
...@@ -30,7 +30,7 @@ train_transforms = [ ...@@ -30,7 +30,7 @@ train_transforms = [
scale=(1248, 832), scale=(1248, 832),
ratio_range=(0.95, 1.05), ratio_range=(0.95, 1.05),
keep_ratio=True), keep_ratio=True),
dict(type='RandomCrop3D', crop_size=(720, 1080)), dict(type='RandomCrop3D', crop_size=(1080, 720)),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False), dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5, flip_box3d=False),
] ]
...@@ -70,7 +70,14 @@ test_pipeline = [ ...@@ -70,7 +70,14 @@ test_pipeline = [
to_float32=True, to_float32=True,
backend_args=backend_args), backend_args=backend_args),
dict(type='MultiViewWrapper', transforms=test_transforms), dict(type='MultiViewWrapper', transforms=test_transforms),
dict(type='Pack3DDetInputs', keys=['img']) dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
'num_ref_frames', 'num_views'
])
] ]
# construct a pipeline for data and gt loading in show function # construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client) # please keep its loading function consistent with test_pipeline (e.g. client)
...@@ -80,7 +87,14 @@ eval_pipeline = [ ...@@ -80,7 +87,14 @@ eval_pipeline = [
to_float32=True, to_float32=True,
backend_args=backend_args), backend_args=backend_args),
dict(type='MultiViewWrapper', transforms=test_transforms), dict(type='MultiViewWrapper', transforms=test_transforms),
dict(type='Pack3DDetInputs', keys=['img']) dict(
type='Pack3DDetInputs',
keys=['img'],
meta_keys=[
'box_type_3d', 'img_shape', 'ori_cam2img', 'scale_factor',
'sample_idx', 'context_name', 'timestamp', 'lidar2cam',
'num_ref_frames', 'num_views'
])
] ]
metainfo = dict(classes=class_names) metainfo = dict(classes=class_names)
...@@ -103,6 +117,7 @@ train_dataloader = dict( ...@@ -103,6 +117,7 @@ train_dataloader = dict(
pipeline=train_pipeline, pipeline=train_pipeline,
modality=input_modality, modality=input_modality,
test_mode=False, test_mode=False,
cam_sync_instances=True,
metainfo=metainfo, metainfo=metainfo,
box_type_3d='Lidar', box_type_3d='Lidar',
load_interval=5, load_interval=5,
...@@ -149,7 +164,7 @@ test_dataloader = dict( ...@@ -149,7 +164,7 @@ test_dataloader = dict(
CAM_FRONT_RIGHT='training/image_2', CAM_FRONT_RIGHT='training/image_2',
CAM_SIDE_LEFT='training/image_3', CAM_SIDE_LEFT='training/image_3',
CAM_SIDE_RIGHT='training/image_4'), CAM_SIDE_RIGHT='training/image_4'),
pipeline=eval_pipeline, pipeline=test_pipeline,
modality=input_modality, modality=input_modality,
test_mode=True, test_mode=True,
metainfo=metainfo, metainfo=metainfo,
...@@ -157,10 +172,7 @@ test_dataloader = dict( ...@@ -157,10 +172,7 @@ test_dataloader = dict(
backend_args=backend_args)) backend_args=backend_args))
val_evaluator = dict( val_evaluator = dict(
type='WaymoMetric', type='WaymoMetric',
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin', waymo_bin_file='./data/waymo/waymo_format/cam_gt.bin',
data_root='./data/waymo/waymo_format', metric='LET_mAP')
metric='LET_mAP',
backend_args=backend_args)
test_evaluator = val_evaluator test_evaluator = val_evaluator
...@@ -35,7 +35,7 @@ model = dict( ...@@ -35,7 +35,7 @@ model = dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-35.0, -75.0, -2, 75.0, 75.0, 4]], ranges=[[-35.0, -75.0, -2, 75.0, 75.0, 4]],
rotations=[.0]), rotations=[.0]),
bbox_head=dict( bbox_head_3d=dict(
type='Anchor3DHead', type='Anchor3DHead',
num_classes=3, num_classes=3,
in_channels=256, in_channels=256,
...@@ -43,13 +43,13 @@ model = dict( ...@@ -43,13 +43,13 @@ model = dict(
use_direction_classifier=True, use_direction_classifier=True,
anchor_generator=dict( anchor_generator=dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345], ranges=[[-35.0, -75.0, 0, 75.0, 75.0, 0],
[-35.0, -75.0, 0, 75.0, 75.0, 0], [-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188],
[-35.0, -75.0, -0.1188, 75.0, 75.0, -0.1188]], [-35.0, -75.0, -0.0345, 75.0, 75.0, -0.0345]],
sizes=[ sizes=[
[4.73, 2.08, 1.77], # car
[0.91, 0.84, 1.74], # pedestrian [0.91, 0.84, 1.74], # pedestrian
[1.81, 0.84, 1.77], # cyclist [1.81, 0.84, 1.77], # cyclist
[4.73, 2.08, 1.77], # car
], ],
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=False), reshape_out=False),
...@@ -69,13 +69,6 @@ model = dict( ...@@ -69,13 +69,6 @@ model = dict(
loss_weight=0.2)), loss_weight=0.2)),
train_cfg=dict( train_cfg=dict(
assigner=[ assigner=[
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
dict( # for Pedestrian dict( # for Pedestrian
type='Max3DIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
...@@ -90,6 +83,13 @@ model = dict( ...@@ -90,6 +83,13 @@ model = dict(
neg_iou_thr=0.35, neg_iou_thr=0.35,
min_pos_iou=0.35, min_pos_iou=0.35,
ignore_iof_thr=-1), ignore_iof_thr=-1),
dict( # for Car
type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1)
], ],
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
...@@ -100,5 +100,5 @@ model = dict( ...@@ -100,5 +100,5 @@ model = dict(
nms_thr=0.05, nms_thr=0.05,
score_thr=0.001, score_thr=0.001,
min_bbox_size=0, min_bbox_size=0,
nms_pre=500, nms_pre=4096,
max_num=100)) max_num=500))
# MV-FCOS3D++: Multi-View Camera-Only 4D Object Detection with Pretrained Monocular Backbones
> [MV-FCOS3D++: Multi-View} Camera-Only 4D Object Detection with Pretrained Monocular Backbones](https://arxiv.org/abs/2207.12716)
<!-- [ALGORITHM] -->
## Abstract
In this technical report, we present our solution, dubbed MV-FCOS3D++, for the Camera-Only 3D Detection track in Waymo Open Dataset Challenge 2022. For multi-view camera-only 3D detection, methods based on bird-eye-view or 3D geometric representations can leverage the stereo cues from overlapped regions between adjacent views and directly perform 3D detection without hand-crafted post-processing. However, it lacks direct semantic supervision for 2D backbones, which can be complemented by pretraining simple monocular-based detectors. Our solution is a multi-view framework for 4D detection following this paradigm. It is built upon a simple monocular detector FCOS3D++, pretrained only with object annotations of Waymo, and converts multi-view features to a 3D grid space to detect 3D objects thereon. A dual-path neck for single-frame understanding and temporal stereo matching is devised to incorporate multi-frame information. Our method finally achieves 49.75% mAPL with a single model and wins 2nd place in the WOD challenge, without any LiDAR-based depth supervision during training. The code will be released at [this https URL](https://github.com/Tai-Wang/Depth-from-Motion).
<div align=center>
<img src="https://github.com/open-mmlab/mmdetection3d/assets/72679458/9313eb3c-cc41-40be-9ead-549b3b5fef44" width="800"/>
</div>
## Introduction
We implement multi-view FCOS3D++ and provide the results on Waymo dataset.
## Usage
### Training commands
1. You should train PGD first:
```bash
bash tools/dist_train.py configs/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py 8
```
2. Given pre-trained PGD backbone, you could train multi-view FCOS3D++:
```bash
bash tools/dist_train.sh configs/mvfcos3d/multiview-fcos3d_r101-dcn_8xb2_waymoD5-3d-3class.py --cfg-options load_from=${PRETRAINED_CHECKPOINT}
```
**Note**:
the path of `load_from` needs to be changed to yours accordingly.
## Results and models
### Waymo
| Backbone | Load Interval | mAPL | mAP | mAPH | Download |
| :--------------------------------------------------------------------: | :-----------: | :--: | :--: | :--: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet101+DCN](./multiview-fcos3d_r101-dcn_8xb2_waymoD5-3d-3class.py) | 5x | 38.2 | 52.9 | 49.5 | [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/mvfcos3d/multiview-fcos3d_r101-dcn_8xb2_waymoD5-3d-3class/multiview-fcos3d_r101-dcn_8xb2_waymoD5-3d-3class_20231127_122815.log) |
| above @ Car | | 56.5 | 73.3 | 72.3 | |
| above @ Pedestrian | | 34.8 | 49.5 | 43.1 | |
| above @ Cyclist | | 23.2 | 35.9 | 33.3 | |
**Note**:
Regrettably, we are unable to provide the pre-trained model weights due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), so we only provide the training logs as shown above.
## Citation
```latex
@article{wang2022mvfcos3d++,
title={{MV-FCOS3D++: Multi-View} Camera-Only 4D Object Detection with Pretrained Monocular Backbones},
author={Wang, Tai and Lian, Qing and Zhu, Chenming and Zhu, Xinge and Zhang, Wenwei},
journal={arXiv preprint},
year={2022}
}
```
...@@ -50,6 +50,23 @@ Note: mAP represents Car moderate 3D strict AP11 / AP40 results. Because of the ...@@ -50,6 +50,23 @@ Note: mAP represents Car moderate 3D strict AP11 / AP40 results. Because of the
| [above w/ finetune](./pgd_r101-caffe_fpn_head-gn_16xb2-2x_nus-mono3d_finetune.py) | 2x | 9.20 | 35.8 | 42.5 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune_20211114_162135-5ec7c1cd.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune_20211114_162135.log.json) | | [above w/ finetune](./pgd_r101-caffe_fpn_head-gn_16xb2-2x_nus-mono3d_finetune.py) | 2x | 9.20 | 35.8 | 42.5 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune_20211114_162135-5ec7c1cd.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/pgd/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune/pgd_r101_caffe_fpn_gn-head_2x16_2x_nus-mono3d_finetune_20211114_162135.log.json) |
| above w/ tta | 2x | 9.20 | 36.8 | 43.1 | | | above w/ tta | 2x | 9.20 | 36.8 | 43.1 | |
### Waymo
| Backbone | Load Interval | Camera view | mAPL | mAP | mAPH | Download |
| :--------------------------------------------------------------------------: | :-----------: | :-----------: | :--: | :--: | :---: | :-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet101 w/ DCN](./pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-fov-mono3d.py) | 3x | front-of-view | 15.8 | 22.7 | 21.51 | [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-fov-mono3d/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-fov-mono3d_20231107_164117.log) |
| above @ Car | | | 36.7 | 51.6 | 51.0 | |
| above @ Pedestrian | | | 9.0 | 14.1 | 11.4 | |
| above @ Cyclist | | | 1.6 | 2.5 | 2.2 | |
| [ResNet101 w/ DCN](./pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d.py) | 3x | multi-view | 20.8 | 29.3 | 27.7 | [log](https://download.openmmlab.com/mmdetection3d/v1.1.0_models/pgd/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d/pgd_r101_fpn_gn-head_dcn_8xb3-2x_waymoD3-mv-mono3d_20231120_202732.log) |
| above @ Car | | | 41.2 | 56.1 | 55.2 | |
| above @ Pedestrian | | | 20.0 | 29.6 | 25.8 | |
| above @ Cyclist | | | 1.4 | 2.2 | 2.0 | |
**Note**:
Regrettably, we are unable to provide the pre-trained model weights due to [Waymo Dataset License Agreement](https://waymo.com/open/terms/), so we only provide the training logs as shown above.
## Citation ## Citation
```latex ```latex
......
...@@ -68,9 +68,9 @@ model = dict( ...@@ -68,9 +68,9 @@ model = dict(
type='PGDBBoxCoder', type='PGDBBoxCoder',
base_depths=((41.01, 18.44), ), base_depths=((41.01, 18.44), ),
base_dims=( base_dims=(
(4.73, 1.77, 2.08), (4.73, 1.77, 2.08), # Car
(0.91, 1.74, 0.84), (0.91, 1.74, 0.84), # Pedestrian
(1.81, 1.77, 0.84), (1.81, 1.77, 0.84), # Cyclist
), ),
code_size=7)), code_size=7)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot) # set weight 1.0 for base 7 dims (offset, depth, size, rot)
......
_base_ = [
'../_base_/datasets/waymoD3-fov-mono3d-3class.py',
'../_base_/models/pgd.py', '../_base_/schedules/mmdet-schedule-1x.py',
'../_base_/default_runtime.py'
]
# load_from = '../Depth-from-Motion/checkpoints/pgd_init.pth'
# model settings
model = dict(
backbone=dict(
type='mmdet.ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
neck=dict(num_outs=3),
bbox_head=dict(
num_classes=3,
bbox_code_size=7,
pred_attrs=False,
pred_velo=False,
pred_bbox2d=True,
use_onlyreg_proj=True,
strides=(8, 16, 32),
regress_ranges=((-1, 128), (128, 256), (256, 1e8)),
group_reg_dims=(2, 1, 3, 1, 16,
4), # offset, depth, size, rot, kpts, bbox2d
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
(256, ), # kpts
(256, ) # bbox2d
),
centerness_branch=(256, ),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
use_depth_classifier=True,
depth_branch=(256, ),
depth_range=(0, 50),
depth_unit=10,
division='uniform',
depth_bins=6,
pred_keypoints=True,
weight_dim=1,
loss_depth=dict(
type='UncertainSmoothL1Loss', alpha=1.0, beta=3.0,
loss_weight=1.0),
loss_bbox2d=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.0),
loss_consistency=dict(type='mmdet.GIoULoss', loss_weight=0.0),
bbox_coder=dict(
type='PGDBBoxCoder',
base_depths=((41.01, 18.44), ),
base_dims=(
(0.91, 1.74, 0.84), # Pedestrian
(1.81, 1.77, 0.84), # Cyclist
(4.73, 1.77, 2.08)), # Car
code_size=7)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
train_cfg=dict(code_weight=[
1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
]),
test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD',
lr=0.008,
),
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
clip_grad=dict(max_norm=35, norm_type=2))
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0 / 3,
by_epoch=False,
begin=0,
end=500),
dict(
type='MultiStepLR',
begin=0,
end=24,
by_epoch=True,
milestones=[16, 22],
gamma=0.1)
]
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
auto_scale_lr = dict(enable=False, base_batch_size=48)
_base_ = [
'../_base_/datasets/waymoD3-mv-mono3d-3class.py',
'../_base_/models/pgd.py', '../_base_/schedules/mmdet-schedule-1x.py',
'../_base_/default_runtime.py'
]
# model settings
model = dict(
backbone=dict(
type='mmdet.ResNet',
depth=101,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet101'),
dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
stage_with_dcn=(False, False, True, True)),
neck=dict(num_outs=3),
bbox_head=dict(
num_classes=3,
bbox_code_size=7,
pred_attrs=False,
pred_velo=False,
pred_bbox2d=True,
use_onlyreg_proj=True,
strides=(8, 16, 32),
regress_ranges=((-1, 128), (128, 256), (256, 1e8)),
group_reg_dims=(2, 1, 3, 1, 16,
4), # offset, depth, size, rot, kpts, bbox2d
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
(256, ), # kpts
(256, ) # bbox2d
),
centerness_branch=(256, ),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_centerness=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
use_depth_classifier=True,
depth_branch=(256, ),
depth_range=(0, 50),
depth_unit=10,
division='uniform',
depth_bins=6,
pred_keypoints=True,
weight_dim=1,
loss_depth=dict(
type='UncertainSmoothL1Loss', alpha=1.0, beta=3.0,
loss_weight=1.0),
loss_bbox2d=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.0),
loss_consistency=dict(type='mmdet.GIoULoss', loss_weight=0.0),
bbox_coder=dict(
type='PGDBBoxCoder',
base_depths=((41.01, 18.44), ),
base_dims=(
(0.91, 1.74, 0.84), # Pedestrian
(1.81, 1.77, 0.84), # Cyclist
(4.73, 1.77, 2.08)), # Car
code_size=7)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
train_cfg=dict(code_weight=[
1.0, 1.0, 0.2, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0, 1.0
]),
test_cfg=dict(nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20))
# optimizer
optim_wrapper = dict(
optimizer=dict(
type='SGD',
lr=0.008,
),
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
clip_grad=dict(max_norm=35, norm_type=2))
param_scheduler = [
dict(
type='LinearLR',
start_factor=1.0 / 3,
by_epoch=False,
begin=0,
end=500),
dict(
type='MultiStepLR',
begin=0,
end=24,
by_epoch=True,
milestones=[16, 22],
gamma=0.1)
]
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=24, val_interval=24)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
auto_scale_lr = dict(enable=False, base_batch_size=48)
...@@ -133,10 +133,12 @@ sh tools/create_data.sh <partition> kitti ...@@ -133,10 +133,12 @@ sh tools/create_data.sh <partition> kitti
### Waymo ### Waymo
Download Waymo open dataset V1.4.1 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put `.tfrecord` files into corresponding folders in `data/waymo/waymo_format/` and put the data split `.txt` files into `data/waymo/kitti_format/ImageSets`. Download ground truth `.bin` file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_4_1/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running: Download Waymo open dataset V1.4.1 [HERE](https://waymo.com/open/download/) and its data split [HERE](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing). Then put `.tfrecord` files into corresponding folders in `data/waymo/waymo_format/` and put the data split `.txt` files into `data/waymo/kitti_format/ImageSets`. Download ground truth `.bin` file for validation set [HERE](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects) and put it into `data/waymo/waymo_format/`. A tip is that you can use `gsutil` to download the large-scale dataset with commands. You can take this [tool](https://github.com/RalphMao/Waymo-Dataset-Tool) as an example for more details. Subsequently, prepare waymo data by running:
```bash ```bash
python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/waymo/ --workers 128 --extra-tag waymo # TF_CPP_MIN_LOG_LEVEL=3 will disable all logging output from TensorFlow.
# The number of `--workers` depends on the maximum number of cores in your CPU.
TF_CPP_MIN_LOG_LEVEL=3 python tools/create_data.py waymo --root-path ./data/waymo --out-dir ./data/waymo --workers 128 --extra-tag waymo --version v1.4
``` ```
Note that: Note that:
...@@ -149,28 +151,12 @@ Note that: ...@@ -149,28 +151,12 @@ Note that:
- **Ready-made Annotations**. We have provided the annotation files generated offline [here](#summary-of-annotation-files). However, the original Waymo data still needs to be converted to `kitti-format` data by yourself. - **Ready-made Annotations**. We have provided the annotation files generated offline [here](#summary-of-annotation-files). However, the original Waymo data still needs to be converted to `kitti-format` data by yourself.
- **Waymo-mini**. If you just want to use a part of Waymo Dataset to verify some methods or debug quickly, you could use our provided [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_kitti_format.tar.gz) which only contains two segments in train split and one segment in val split from the original dataset. All the images, point clouds and annotations in this compressed file have been processed offline so that you can directly download and unzip it to `data/waymo/`: - **Waymo-mini**. If you just want to use a part of Waymo Dataset to verify some methods or debug quickly, you could use our provided [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_mini.tar.gz) which only contains two segments in train split and one segment in val split from the original dataset. All the images, point clouds and annotations in this compressed file have been processed offline so that you can directly download and unzip it to `data/waymo/`:
```bash ```bash
tar -xzvf waymo_mini_kitti_format.tar.gz -C ./data/waymo tar -xzvf waymo_mini.tar.gz -C ./data/waymo_mini
``` ```
- **Faster evaluation**. If you want faster evaluation on Waymo, you can download the preprocessed [metainfo](https://download.openmmlab.com/mmdetection3d/data/waymo/idx2metainfo.pkl) containing `contextname` and `timestamp` to the directory `data/waymo/waymo_format/` and then modify the dataset config as the following:
```python
val_evaluator = dict(
type='WaymoMetric',
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/gt.bin',
data_root='./data/waymo/waymo_format',
backend_args=backend_args,
convert_kitti_format=True,
idx2metainfo='data/waymo/waymo_format/idx2metainfo.pkl'
)
```
Now, this trick is only used for LiDAR-based detection methods.
### NuScenes ### NuScenes
1. Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Alternatively, you 1. Download nuScenes V1.0 full dataset data [HERE](https://www.nuscenes.org/download). Alternatively, you
...@@ -272,12 +258,12 @@ python tools/dataset_converters/update_infos_to_v2.py --dataset kitti --pkl-path ...@@ -272,12 +258,12 @@ python tools/dataset_converters/update_infos_to_v2.py --dataset kitti --pkl-path
We provide ready-made annotation files we generated offline for reference. You can directly use these files for convenice. We provide ready-made annotation files we generated offline for reference. You can directly use these files for convenice.
| Dataset | Train annotation file | Val annotation file | Test information file | | Dataset | Train annotation file | Val annotation file | Test information file |
| :--------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | | :-------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| KITTI | [kitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_train.pkl) | [kitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_val.pkl) | [kitti_infos_test](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_test.pkl) | | KITTI | [kitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_train.pkl) | [kitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_val.pkl) | [kitti_infos_test](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_test.pkl) |
| NuScenes | [nuscenes_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_train.pkl) [nuscenes_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_train.pkl) | [nuscenes_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_val.pkl) [nuscenes_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_val.pkl) | | | NuScenes | [nuscenes_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_train.pkl) [nuscenes_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_train.pkl) | [nuscenes_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_val.pkl) [nuscenes_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_val.pkl) | |
| Waymo | [waymo_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_train.pkl) [waymo_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_infos_train.pkl) | [waymo_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_val.pkl) [waymo_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_infos_val.pkl) | [waymo_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_test.pkl) | | Waymo | [waymo_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_train.pkl) | [waymo_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_val.pkl) | [waymo_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_test.pkl) [waymo_infos_test_cam_only.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_test_cam_only.pkl) |
| [Waymo-mini kitti-format data](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_kitti_format.tar.gz) | | | | | [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_mini.tar.gz) | | | |
| SUN RGB-D | [sunrgbd_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_train.pkl) | [sunrgbd_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_val.pkl) | | | SUN RGB-D | [sunrgbd_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_train.pkl) | [sunrgbd_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_val.pkl) | |
| ScanNet | [scannet_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_train.pkl) | [scannet_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_val.pkl) | [scannet_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_test.pkl) | | ScanNet | [scannet_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_train.pkl) | [scannet_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_val.pkl) | [scannet_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_test.pkl) |
| SemanticKitti | [semantickitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_train.pkl) | [semantickitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_val.pkl) | [semantickitti_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_test.pkl) | | SemanticKitti | [semantickitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_train.pkl) | [semantickitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_val.pkl) | [semantickitti_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_test.pkl) |
...@@ -111,10 +111,12 @@ python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitt ...@@ -111,10 +111,12 @@ python tools/create_data.py kitti --root-path ./data/kitti --out-dir ./data/kitt
### Waymo ### Waymo
[这里](https://waymo.com/open/download/)下载 Waymo 公开数据集 1.2 版本,在[这里](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing)下载其数据划分文件。然后,将 `.tfrecord` 文件置于 `data/waymo/waymo_format/` 目录下的相应位置,并将数据划分的 `.txt` 文件置于 `data/waymo/kitti_format/ImageSets` 目录下。在[这里](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects)下载验证集的真实标签(`.bin` 文件)并将其置于 `data/waymo/waymo_format/`。提示:你可以使用 `gsutil` 来用命令下载大规模的数据集。更多细节请参考此[工具](https://github.com/RalphMao/Waymo-Dataset-Tool)。完成以上各步后,可以通过运行以下指令对 Waymo 数据进行预处理: [这里](https://waymo.com/open/download/)下载 Waymo 公开数据集 1.4.1 版本,在[这里](https://drive.google.com/drive/folders/18BVuF_RYJF0NjZpt8SnfzANiakoRMf0o?usp=sharing)下载其数据划分文件。然后,将 `.tfrecord` 文件置于 `data/waymo/waymo_format/` 目录下的相应位置,并将数据划分的 `.txt` 文件置于 `data/waymo/kitti_format/ImageSets` 目录下。在[这里](https://console.cloud.google.com/storage/browser/waymo_open_dataset_v_1_2_0/validation/ground_truth_objects)下载验证集的真实标签(`.bin` 文件)并将其置于 `data/waymo/waymo_format/`。提示:你可以使用 `gsutil` 来用命令下载大规模的数据集。更多细节请参考此[工具](https://github.com/RalphMao/Waymo-Dataset-Tool)。完成以上各步后,可以通过运行以下指令对 Waymo 数据进行预处理:
```bash ```bash
python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/waymo/ --workers 128 --extra-tag waymo # TF_CPP_MIN_LOG_LEVEL=3 will disable all logging output from TensorFlow.
# The number of `--workers` depends on the maximum number of cores in your CPU.
TF_CPP_MIN_LOG_LEVEL=3 python tools/create_data.py waymo --root-path ./data/waymo --out-dir ./data/waymo --workers 128 --extra-tag waymo --version v1.4
``` ```
注意: 注意:
...@@ -125,28 +127,12 @@ python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/way ...@@ -125,28 +127,12 @@ python tools/create_data.py waymo --root-path ./data/waymo/ --out-dir ./data/way
- **现成的标注文件**: 我们已经提供了离线处理好的 [Waymo 标注文件](#数据集标注文件列表)。您直接下载他们并放到 `data/waymo/kitti_format/` 目录下。然而,您还是需要自己使用上面的脚本将 Waymo 的原始数据还需要转成 kitti 格式。 - **现成的标注文件**: 我们已经提供了离线处理好的 [Waymo 标注文件](#数据集标注文件列表)。您直接下载他们并放到 `data/waymo/kitti_format/` 目录下。然而,您还是需要自己使用上面的脚本将 Waymo 的原始数据还需要转成 kitti 格式。
- **Waymo-mini**: 如果你只是为了验证某些方法或者 debug, 你可以使用我们提供的 [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_kitti_format.tar.gz)。它只包含原始数据集中训练集中的 2 个 segments 和 验证集中的 1 个 segment。您只需要下载并且解压到 `data/waymo/`,即可使用它: - **Waymo-mini**: 如果你只是为了验证某些方法或者 debug, 你可以使用我们提供的 [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_mini.tar.gz)。它只包含原始数据集中训练集中的 2 个 segments 和 验证集中的 1 个 segment。您只需要下载并且解压到 `data/waymo_mini/`,即可使用它:
```bash ```bash
tar -xzvf waymo_mini_kitti_format.tar.gz -C ./data/waymo tar -xzvf waymo_mini.tar.gz -C ./data/waymo_mini
``` ```
- **更快的评估**: 如果你想在 Waymo 上进行更快的评估,你可以下载已经预处理好的[元信息文件](https://download.openmmlab.com/mmdetection3d/data/waymo/idx2metainfo.pkl)并将其放置在 `data/waymo/waymo_format/` 目录下。接着,你可以按照以下来修改数据集的配置:
```python
val_evaluator = dict(
type='WaymoMetric',
ann_file='./data/waymo/kitti_format/waymo_infos_val.pkl',
waymo_bin_file='./data/waymo/waymo_format/gt.bin',
data_root='./data/waymo/waymo_format',
backend_args=backend_args,
convert_kitti_format=True,
idx2metainfo='data/waymo/waymo_format/idx2metainfo.pkl'
)
```
目前这种方式仅限于纯点云检测任务。
### NuScenes ### NuScenes
[这里](https://www.nuscenes.org/download)下载 nuScenes 数据集 1.0 版本的完整数据文件。通过运行以下指令对 nuScenes 数据进行预处理: [这里](https://www.nuscenes.org/download)下载 nuScenes 数据集 1.0 版本的完整数据文件。通过运行以下指令对 nuScenes 数据进行预处理:
...@@ -222,12 +208,12 @@ python tools/dataset_converters/update_infos_to_v2.py --dataset kitti --pkl-path ...@@ -222,12 +208,12 @@ python tools/dataset_converters/update_infos_to_v2.py --dataset kitti --pkl-path
我们提供了离线生成好的数据集标注文件以供参考。为了方便,您也可以直接使用他们。 我们提供了离线生成好的数据集标注文件以供参考。为了方便,您也可以直接使用他们。
| 数据集 | 训练集标注文件 | 验证集标注文件 | 测试集标注文件 | | 数据集 | 训练集标注文件 | 验证集标注文件 | 测试集标注文件 |
| :--------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------: | | :-------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| KITTI | [kitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_train.pkl) | [kitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_val.pkl) | [kitti_infos_test](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_test.pkl) | | KITTI | [kitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_train.pkl) | [kitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_val.pkl) | [kitti_infos_test](https://download.openmmlab.com/mmdetection3d/data/kitti/kitti_infos_test.pkl) |
| NuScenes | [nuscenes_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_train.pkl) [nuscenes_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_train.pkl) | [nuscenes_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_val.pkl) [nuscenes_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_val.pkl) | | | NuScenes | [nuscenes_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_train.pkl) [nuscenes_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_train.pkl) | [nuscenes_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_infos_val.pkl) [nuscenes_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/nuscenes/nuscenes_mini_infos_val.pkl) | |
| Waymo | [waymo_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_train.pkl) [waymo_mini_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_infos_train.pkl) | [waymo_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_val.pkl) [waymo_mini_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_infos_val.pkl) | [waymo_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_test.pkl) | | Waymo | [waymo_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_train.pkl) | [waymo_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_val.pkl) | [waymo_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_infos_test.pkl) [waymo_infos_test_cam_only.pkl](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_infos_test_cam_only.pkl) |
| [Waymo-mini kitti-format data](https://download.openmmlab.com/mmdetection3d/data/waymo/waymo_mini_kitti_format.tar.gz) | | | | | [Waymo-mini](https://download.openmmlab.com/mmdetection3d/data/waymo_mmdet3d_after_1x4/waymo_mini.tar.gz) | | | |
| SUN RGB-D | [sunrgbd_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_train.pkl) | [sunrgbd_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_val.pkl) | | | SUN RGB-D | [sunrgbd_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_train.pkl) | [sunrgbd_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/sunrgbd/sunrgbd_infos_val.pkl) | |
| ScanNet | [scannet_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_train.pkl) | [scannet_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_val.pkl) | [scannet_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_test.pkl) | | ScanNet | [scannet_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_train.pkl) | [scannet_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_val.pkl) | [scannet_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/scannet/scannet_infos_test.pkl) |
| SemanticKitti | [semantickitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_train.pkl) | [semantickitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_val.pkl) | [semantickitti_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_test.pkl) | | SemanticKitti | [semantickitti_infos_train.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_train.pkl) | [semantickitti_infos_val.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_val.pkl) | [semantickitti_infos_test.pkl](https://download.openmmlab.com/mmdetection3d/data/semantickitti/semantickitti_infos_test.pkl) |
...@@ -1032,8 +1032,6 @@ class PGDHead(FCOSMono3DHead): ...@@ -1032,8 +1032,6 @@ class PGDHead(FCOSMono3DHead):
# change the offset to actual center predictions # change the offset to actual center predictions
bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2] bbox_pred3d[:, :2] = points - bbox_pred3d[:, :2]
if rescale: if rescale:
bbox_pred3d[:, :2] /= bbox_pred3d[:, :2].new_tensor(
scale_factor[0])
if self.pred_bbox2d: if self.pred_bbox2d:
bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor[0]) bbox_pred2d /= bbox_pred2d.new_tensor(scale_factor[0])
if self.use_depth_classifier: if self.use_depth_classifier:
......
...@@ -13,6 +13,9 @@ class DfM(BaseDetector): ...@@ -13,6 +13,9 @@ class DfM(BaseDetector):
<https://arxiv.org/abs/2207.12988>`_. <https://arxiv.org/abs/2207.12988>`_.
Args: Args:
data_preprocessor (:obj:`ConfigDict` or dict): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
backbone (:obj:`ConfigDict` or dict): The backbone config. backbone (:obj:`ConfigDict` or dict): The backbone config.
neck (:obj:`ConfigDict` or dict): The neck config. neck (:obj:`ConfigDict` or dict): The neck config.
backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone backbone_stereo (:obj:`ConfigDict` or dict): The stereo backbone
...@@ -39,6 +42,7 @@ class DfM(BaseDetector): ...@@ -39,6 +42,7 @@ class DfM(BaseDetector):
""" """
def __init__(self, def __init__(self,
data_preprocessor: ConfigType,
backbone: ConfigType, backbone: ConfigType,
neck: ConfigType, neck: ConfigType,
backbone_stereo: ConfigType, backbone_stereo: ConfigType,
...@@ -53,7 +57,8 @@ class DfM(BaseDetector): ...@@ -53,7 +57,8 @@ class DfM(BaseDetector):
test_cfg=None, test_cfg=None,
pretrained=None, pretrained=None,
init_cfg=None): init_cfg=None):
super().__init__(init_cfg=init_cfg) super().__init__(
data_preprocessor=data_preprocessor, init_cfg=init_cfg)
self.backbone = MODELS.build(backbone) self.backbone = MODELS.build(backbone)
self.neck = MODELS.build(neck) self.neck = MODELS.build(neck)
if backbone_stereo is not None: if backbone_stereo is not None:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Union
import numpy as np import numpy as np
import torch import torch
from mmdet.models.detectors import BaseDetector from mmengine.structures import InstanceData
from torch import Tensor
from mmdet3d.models.layers.fusion_layers.point_fusion import (point_sample, from mmdet3d.models.layers.fusion_layers.point_fusion import (point_sample,
voxel_sample) voxel_sample)
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d.utils import get_lidar2img from mmdet3d.structures.bbox_3d.utils import get_lidar2img
from mmdet3d.structures.det3d_data_sample import SampleList from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils import ConfigType, OptConfigType from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList
from .dfm import DfM from .dfm import DfM
from .imvoxelnet import ImVoxelNet
@MODELS.register_module() @MODELS.register_module()
class MultiViewDfM(ImVoxelNet, DfM): class MultiViewDfM(DfM):
r"""Waymo challenge solution of `MV-FCOS3D++ r"""Waymo challenge solution of `MV-FCOS3D++
<https://arxiv.org/abs/2207.12716>`_. <https://arxiv.org/abs/2207.12716>`_.
...@@ -25,7 +27,7 @@ class MultiViewDfM(ImVoxelNet, DfM): ...@@ -25,7 +27,7 @@ class MultiViewDfM(ImVoxelNet, DfM):
config. config.
backbone_3d (:obj:`ConfigDict` or dict): The 3d backbone config. backbone_3d (:obj:`ConfigDict` or dict): The 3d backbone config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config. neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config. bbox_head_3d (:obj:`ConfigDict` or dict): The bbox head config.
voxel_size (:obj:`ConfigDict` or dict): The voxel size. voxel_size (:obj:`ConfigDict` or dict): The voxel size.
anchor_generator (:obj:`ConfigDict` or dict): The anchor generator anchor_generator (:obj:`ConfigDict` or dict): The anchor generator
config. config.
...@@ -60,7 +62,7 @@ class MultiViewDfM(ImVoxelNet, DfM): ...@@ -60,7 +62,7 @@ class MultiViewDfM(ImVoxelNet, DfM):
backbone_stereo: ConfigType, backbone_stereo: ConfigType,
backbone_3d: ConfigType, backbone_3d: ConfigType,
neck_3d: ConfigType, neck_3d: ConfigType,
bbox_head: ConfigType, bbox_head_3d: ConfigType,
voxel_size: ConfigType, voxel_size: ConfigType,
anchor_generator: ConfigType, anchor_generator: ConfigType,
neck_2d: ConfigType = None, neck_2d: ConfigType = None,
...@@ -71,41 +73,24 @@ class MultiViewDfM(ImVoxelNet, DfM): ...@@ -71,41 +73,24 @@ class MultiViewDfM(ImVoxelNet, DfM):
test_cfg: OptConfigType = None, test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None, data_preprocessor: OptConfigType = None,
valid_sample: bool = True, valid_sample: bool = True,
temporal_aggregate: str = 'concat', temporal_aggregate: str = 'mean',
transform_depth: bool = True, transform_depth: bool = True,
init_cfg: OptConfigType = None): init_cfg: OptConfigType = None):
# TODO merge with DFM super().__init__(
BaseDetector.__init__( data_preprocessor=data_preprocessor,
self, data_preprocessor=data_preprocessor, init_cfg=init_cfg) backbone=backbone,
neck=neck,
self.backbone = MODELS.build(backbone) backbone_stereo=backbone_stereo,
self.neck = MODELS.build(neck) backbone_3d=backbone_3d,
if backbone_stereo is not None: neck_3d=neck_3d,
backbone_stereo.update(cat_img_feature=self.neck.cat_img_feature) bbox_head_3d=bbox_head_3d,
backbone_stereo.update(in_sem_channels=self.neck.sem_channels[-1]) neck_2d=neck_2d,
self.backbone_stereo = MODELS.build(backbone_stereo) bbox_head_2d=bbox_head_2d,
assert self.neck.cat_img_feature == \ depth_head_2d=depth_head_2d,
self.backbone_stereo.cat_img_feature depth_head=depth_head,
assert self.neck.sem_channels[ train_cfg=train_cfg,
-1] == self.backbone_stereo.in_sem_channels test_cfg=test_cfg,
if backbone_3d is not None: init_cfg=init_cfg)
self.backbone_3d = MODELS.build(backbone_3d)
if neck_3d is not None:
self.neck_3d = MODELS.build(neck_3d)
if neck_2d is not None:
self.neck_2d = MODELS.build(neck_2d)
if bbox_head_2d is not None:
self.bbox_head_2d = MODELS.build(bbox_head_2d)
if depth_head_2d is not None:
self.depth_head_2d = MODELS.build(depth_head_2d)
if depth_head is not None:
self.depth_head = MODELS.build(depth_head)
self.depth_samples = self.depth_head.depth_samples
self.train_cfg = train_cfg
self.test_cfg = test_cfg
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
self.bbox_head = MODELS.build(bbox_head)
self.voxel_size = voxel_size self.voxel_size = voxel_size
self.voxel_range = anchor_generator['ranges'][0] self.voxel_range = anchor_generator['ranges'][0]
self.n_voxels = [ self.n_voxels = [
...@@ -371,6 +356,139 @@ class MultiViewDfM(ImVoxelNet, DfM): ...@@ -371,6 +356,139 @@ class MultiViewDfM(ImVoxelNet, DfM):
transform_feats += (batch_stereo_feats, ) transform_feats += (batch_stereo_feats, )
return transform_feats return transform_feats
def loss(self, batch_inputs: Tensor,
batch_data_samples: SampleList) -> Union[dict, tuple]:
"""Calculate losses from a batch of inputs dict and data samples.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'img' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
dict: A dictionary of loss components.
"""
feats = self.extract_feat(batch_inputs, batch_data_samples)
bev_feat = feats[0]
losses = self.bbox_head_3d.loss([bev_feat], batch_data_samples)
return losses
def predict(self, batch_inputs: Tensor,
batch_data_samples: SampleList) -> SampleList:
"""Predict results from a batch of inputs and data samples with post-
processing.
Args:
batch_inputs_dict (dict): The model input dict which include
'points', 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor, optional): Image of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
samples. It usually includes information such as
`gt_instance_3d`, `gt_panoptic_seg_3d` and `gt_sem_seg_3d`.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input samples. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
feats = self.extract_feat(batch_inputs, batch_data_samples)
bev_feat = feats[0]
results_list = self.bbox_head_3d.predict([bev_feat],
batch_data_samples)
predictions = self.add_pred_to_datasample(batch_data_samples,
results_list)
return predictions
def _forward(self,
batch_inputs: Tensor,
batch_data_samples: SampleList = None):
"""Network forward process.
Usually includes backbone, neck and head forward without any post-
processing.
"""
feats = self.extract_feat(batch_inputs, batch_data_samples)
bev_feat = feats[0]
self.bbox_head.forward(bev_feat, batch_data_samples)
def add_pred_to_datasample(
self,
data_samples: SampleList,
data_instances_3d: OptInstanceList = None,
data_instances_2d: OptInstanceList = None,
) -> SampleList:
"""Convert results list to `Det3DDataSample`.
Subclasses could override it to be compatible for some multi-modality
3D detectors.
Args:
data_samples (list[:obj:`Det3DDataSample`]): The input data.
data_instances_3d (list[:obj:`InstanceData`], optional): 3D
Detection results of each sample.
data_instances_2d (list[:obj:`InstanceData`], optional): 2D
Detection results of each sample.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input. Each Det3DDataSample usually contains
'pred_instances_3d'. And the ``pred_instances_3d`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally
contains following keys.
- scores (Tensor): Classification scores of image, has a shape
(num_instance, )
- labels (Tensor): Predict Labels of 2D bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Contains a tensor with shape
(num_instances, 4).
"""
assert (data_instances_2d is not None) or \
(data_instances_3d is not None),\
'please pass at least one type of data_samples'
if data_instances_2d is None:
data_instances_2d = [
InstanceData() for _ in range(len(data_instances_3d))
]
if data_instances_3d is None:
data_instances_3d = [
InstanceData() for _ in range(len(data_instances_2d))
]
for i, data_sample in enumerate(data_samples):
data_sample.pred_instances_3d = data_instances_3d[i]
data_sample.pred_instances = data_instances_2d[i]
return data_samples
def aug_test(self, imgs, img_metas, **kwargs): def aug_test(self, imgs, img_metas, **kwargs):
"""Test with augmentations. """Test with augmentations.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment