Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
raojy
mmdetection3d_rjy
Commits
7aa442d5
Commit
7aa442d5
authored
Apr 01, 2026
by
raojy
Browse files
raw_mmdetection
parent
9c03eaa8
Changes
465
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2417 additions
and
0 deletions
+2417
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/mmdet_schedule_1x.py
...n3d/mmdet3d/configs/_base_/schedules/mmdet_schedule_1x.py
+33
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/schedule_2x.py
...tection3d/mmdet3d/configs/_base_/schedules/schedule_2x.py
+42
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/schedule_3x.py
...tection3d/mmdet3d/configs/_base_/schedules/schedule_3x.py
+37
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_100e.py
...ion3d/mmdet3d/configs/_base_/schedules/seg_cosine_100e.py
+32
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_150e.py
...ion3d/mmdet3d/configs/_base_/schedules/seg_cosine_150e.py
+32
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_200e.py
...ion3d/mmdet3d/configs/_base_/schedules/seg_cosine_200e.py
+32
-0
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_50e.py
...tion3d/mmdet3d/configs/_base_/schedules/seg_cosine_50e.py
+32
-0
mmdetection3d/mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
...terpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+181
-0
mmdetection3d/mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
...nterpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+181
-0
mmdetection3d/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
...3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
+43
-0
mmdetection3d/mmdet3d/configs/cylinder3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
...der3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
+88
-0
mmdetection3d/mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
..._w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
+99
-0
mmdetection3d/mmdet3d/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
...t/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
+312
-0
mmdetection3d/mmdet3d/configs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
...gs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
+152
-0
mmdetection3d/mmdet3d/configs/votenet/__init__.py
mmdetection3d/mmdet3d/configs/votenet/__init__.py
+1
-0
mmdetection3d/mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
...tion3d/mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
+49
-0
mmdetection3d/mmdet3d/datasets/__init__.py
mmdetection3d/mmdet3d/datasets/__init__.py
+41
-0
mmdetection3d/mmdet3d/datasets/convert_utils.py
mmdetection3d/mmdet3d/datasets/convert_utils.py
+425
-0
mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
+182
-0
mmdetection3d/mmdet3d/datasets/det3d_dataset.py
mmdetection3d/mmdet3d/datasets/det3d_dataset.py
+423
-0
No files found.
Too many changes to show.
To preserve performance only
465 of 465+
files are displayed.
Plain diff
Email patch
mmdetection3d/mmdet3d/configs/_base_/schedules/mmdet_schedule_1x.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
LinearLR
,
MultiStepLR
from
mmengine.runner.loops
import
EpochBasedTrainLoop
,
TestLoop
,
ValLoop
from
torch.optim.sgd
import
SGD
# training schedule for 1x
train_cfg
=
dict
(
type
=
EpochBasedTrainLoop
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
ValLoop
)
test_cfg
=
dict
(
type
=
TestLoop
)
# learning rate
param_scheduler
=
[
dict
(
type
=
LinearLR
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
# optimizer
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
SGD
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
16
)
mmdetection3d/mmdet3d/configs/_base_/schedules/schedule_2x.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
LinearLR
,
MultiStepLR
from
mmengine.runner.loops
import
EpochBasedTrainLoop
,
TestLoop
,
ValLoop
from
torch.optim.adamw
import
AdamW
# optimizer
# This schedule is mainly used by models on nuScenes dataset
lr
=
0.001
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
AdamW
,
lr
=
lr
,
weight_decay
=
0.01
),
# max_norm=10 is better for SECOND
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# training schedule for 2x
train_cfg
=
dict
(
type
=
EpochBasedTrainLoop
,
max_epochs
=
24
,
val_interval
=
24
)
val_cfg
=
dict
(
type
=
ValLoop
)
test_cfg
=
dict
(
type
=
TestLoop
)
# learning rate
param_scheduler
=
[
dict
(
type
=
LinearLR
,
start_factor
=
1.0
/
1000
,
by_epoch
=
False
,
begin
=
0
,
end
=
1000
),
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
24
,
by_epoch
=
True
,
milestones
=
[
20
,
23
],
gamma
=
0.1
)
]
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
32
)
mmdetection3d/mmdet3d/configs/_base_/schedules/schedule_3x.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
MultiStepLR
from
mmengine.runner.loops
import
EpochBasedTrainLoop
,
TestLoop
,
ValLoop
from
torch.optim.adamw
import
AdamW
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr
=
0.008
# max learning rate
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
AdamW
,
lr
=
lr
,
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
10
,
norm_type
=
2
),
)
# training schedule for 3x
train_cfg
=
dict
(
type
=
EpochBasedTrainLoop
,
max_epochs
=
36
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
ValLoop
)
test_cfg
=
dict
(
type
=
TestLoop
)
# learning rate
param_scheduler
=
[
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
36
,
by_epoch
=
True
,
milestones
=
[
24
,
32
],
gamma
=
0.1
)
]
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (4 GPUs) x (8 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
32
)
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_100e.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
CosineAnnealingLR
from
torch.optim.sgd
import
SGD
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
SGD
,
lr
=
0.1
,
momentum
=
0.9
,
weight_decay
=
0.001
),
clip_grad
=
None
)
param_scheduler
=
[
dict
(
type
=
CosineAnnealingLR
,
T_max
=
100
,
eta_min
=
1e-5
,
by_epoch
=
True
,
begin
=
0
,
end
=
100
)
]
# runtime settings
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
100
,
val_interval
=
1
)
val_cfg
=
dict
()
test_cfg
=
dict
()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (4 GPUs) x (32 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
128
)
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_150e.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
CosineAnnealingLR
from
torch.optim.sgd
import
SGD
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
SGD
,
lr
=
0.2
,
momentum
=
0.9
,
weight_decay
=
0.0001
),
clip_grad
=
None
)
param_scheduler
=
[
dict
(
type
=
CosineAnnealingLR
,
T_max
=
150
,
eta_min
=
0.002
,
by_epoch
=
True
,
begin
=
0
,
end
=
150
)
]
# runtime settings
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
150
,
val_interval
=
1
)
val_cfg
=
dict
()
test_cfg
=
dict
()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
64
)
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_200e.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
CosineAnnealingLR
from
torch.optim.adam
import
Adam
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
Adam
,
lr
=
0.001
,
weight_decay
=
0.01
),
clip_grad
=
None
)
param_scheduler
=
[
dict
(
type
=
CosineAnnealingLR
,
T_max
=
200
,
eta_min
=
1e-5
,
by_epoch
=
True
,
begin
=
0
,
end
=
200
)
]
# runtime settings
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
200
,
val_interval
=
1
)
val_cfg
=
dict
()
test_cfg
=
dict
()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (2 GPUs) x (16 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
32
)
mmdetection3d/mmdet3d/configs/_base_/schedules/seg_cosine_50e.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
CosineAnnealingLR
from
torch.optim.adam
import
Adam
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
Adam
,
lr
=
0.001
,
weight_decay
=
0.001
),
clip_grad
=
None
)
param_scheduler
=
[
dict
(
type
=
CosineAnnealingLR
,
T_max
=
50
,
eta_min
=
1e-5
,
by_epoch
=
True
,
begin
=
0
,
end
=
50
)
]
# runtime settings
train_cfg
=
dict
(
by_epoch
=
True
,
max_epochs
=
50
,
val_interval
=
1
)
val_cfg
=
dict
()
test_cfg
=
dict
()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (2 GPUs) x (16 samples per GPU).
auto_scale_lr
=
dict
(
enable
=
False
,
base_batch_size
=
32
)
mmdetection3d/mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine.config
import
read_base
with
read_base
():
from
.._base_.datasets.nus_3d
import
*
from
.._base_.models.centerpoint_pillar02_second_secfpn_nus
import
*
from
.._base_.schedules.cyclic_20e
import
*
from
.._base_.default_runtime
import
*
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet3d.datasets.nuscenes_dataset
import
NuScenesDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
# noqa
GlobalRotScaleTrans
,
ObjectNameFilter
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
.
update
(
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
,
sweeps
=
'sweeps/LIDAR_TOP'
))
model
.
update
(
dict
(
data_preprocessor
=
dict
(
voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
)),
pts_voxel_encoder
=
dict
(
point_cloud_range
=
point_cloud_range
),
pts_bbox_head
=
dict
(
bbox_coder
=
dict
(
pc_range
=
point_cloud_range
[:
2
])),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
point_cloud_range
=
point_cloud_range
)),
test_cfg
=
dict
(
pts
=
dict
(
pc_range
=
point_cloud_range
[:
2
]))))
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
backend_args
=
None
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
car
=
5
,
truck
=
5
,
bus
=
5
,
trailer
=
5
,
construction_vehicle
=
5
,
traffic_cone
=
5
,
barrier
=
5
,
motorcycle
=
5
,
bicycle
=
5
,
pedestrian
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
car
=
2
,
truck
=
3
,
construction_vehicle
=
7
,
bus
=
4
,
trailer
=
6
,
barrier
=
2
,
motorcycle
=
6
,
bicycle
=
6
,
pedestrian
=
2
,
traffic_cone
=
2
),
points_loader
=
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
ObjectSample
,
db_sampler
=
db_sampler
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectNameFilter
,
classes
=
class_names
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
)
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
])
]
train_dataloader
.
merge
(
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
CBGSDataset
,
dataset
=
dict
(
type
=
NuScenesDataset
,
data_root
=
data_root
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
metainfo
=
dict
(
classes
=
class_names
),
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))))
test_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
val_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
train_cfg
.
update
(
dict
(
val_interval
=
20
))
mmdetection3d/mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.nus_3d
import
*
from
.._base_.models.centerpoint_voxel01_second_secfpn_nus
import
*
from
.._base_.schedules.cyclic_20e
import
*
from
.._base_.default_runtime
import
*
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet3d.datasets.nuscenes_dataset
import
NuScenesDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
# noqa
GlobalRotScaleTrans
,
ObjectNameFilter
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
.
update
(
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
,
sweeps
=
'sweeps/LIDAR_TOP'
))
model
.
update
(
dict
(
data_preprocessor
=
dict
(
voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
)),
pts_bbox_head
=
dict
(
bbox_coder
=
dict
(
pc_range
=
point_cloud_range
[:
2
])),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
point_cloud_range
=
point_cloud_range
)),
test_cfg
=
dict
(
pts
=
dict
(
pc_range
=
point_cloud_range
[:
2
]))))
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
backend_args
=
None
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
car
=
5
,
truck
=
5
,
bus
=
5
,
trailer
=
5
,
construction_vehicle
=
5
,
traffic_cone
=
5
,
barrier
=
5
,
motorcycle
=
5
,
bicycle
=
5
,
pedestrian
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
car
=
2
,
truck
=
3
,
construction_vehicle
=
7
,
bus
=
4
,
trailer
=
6
,
barrier
=
2
,
motorcycle
=
6
,
bicycle
=
6
,
pedestrian
=
2
,
traffic_cone
=
2
),
points_loader
=
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
ObjectSample
,
db_sampler
=
db_sampler
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectNameFilter
,
classes
=
class_names
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
])
]
train_dataloader
.
merge
(
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
CBGSDataset
,
dataset
=
dict
(
type
=
NuScenesDataset
,
data_root
=
data_root
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
metainfo
=
dict
(
classes
=
class_names
),
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))))
test_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
val_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
train_cfg
.
update
(
dict
(
val_interval
=
20
))
mmdetection3d/mmdet3d/configs/cylinder3d/cylinder3d_4xb4-3x_semantickitti.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.semantickitti
import
*
from
.._base_.models.cylinder3d
import
*
from
.._base_.default_runtime
import
*
from
mmengine.optim.optimizer.optimizer_wrapper
import
OptimWrapper
from
mmengine.optim.scheduler.lr_scheduler
import
LinearLR
,
MultiStepLR
from
mmengine.runner.loops
import
EpochBasedTrainLoop
,
TestLoop
,
ValLoop
from
torch.optim
import
AdamW
# optimizer
lr
=
0.001
optim_wrapper
=
dict
(
type
=
OptimWrapper
,
optimizer
=
dict
(
type
=
AdamW
,
lr
=
lr
,
weight_decay
=
0.01
))
train_cfg
=
dict
(
type
=
EpochBasedTrainLoop
,
max_epochs
=
36
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
ValLoop
)
test_cfg
=
dict
(
type
=
TestLoop
)
# learning rate
param_scheduler
=
[
dict
(
type
=
LinearLR
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
1000
),
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
36
,
by_epoch
=
True
,
milestones
=
[
30
],
gamma
=
0.1
)
]
train_dataloader
.
update
(
dict
(
batch_size
=
4
,
))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
# auto_scale_lr = dict(enable=False, base_batch_size=32)
default_hooks
.
update
(
dict
(
checkpoint
=
dict
(
type
=
CheckpointHook
,
interval
=
5
)))
mmdetection3d/mmdet3d/configs/cylinder3d/cylinder3d_8xb2-laser-polar-mix-3x_semantickitti.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.semantickitti
import
*
from
.._base_.default_runtime
import
*
from
.._base_.models.cylinder3d
import
*
from
.._base_.schedules.schedule_3x
import
*
from
mmcv.transforms.wrappers
import
RandomChoice
from
mmdet3d.datasets.transforms.transforms_3d
import
LaserMix
,
PolarMix
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
),
dict
(
type
=
RandomChoice
,
transforms
=
[
[
dict
(
type
=
LaserMix
,
num_areas
=
[
3
,
4
,
5
,
6
],
pitch_angles
=
[
-
25
,
3
],
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
[
dict
(
type
=
PolarMix
,
instance_classes
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
],
swap_ratio
=
0.5
,
rotate_paste_ratio
=
1.0
,
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
],
prob
=
[
0.5
,
0.5
]),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0.
,
6.28318531
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
],
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
train_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
default_hooks
.
update
(
dict
(
checkpoint
=
dict
(
type
=
CheckpointHook
,
interval
=
1
)))
mmdetection3d/mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.semantickitti
import
*
from
.._base_.models.minkunet
import
*
from
.._base_.schedules.schedule_3x
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.wrappers
import
RandomChoice
from
mmengine.hooks.checkpoint_hook
import
CheckpointHook
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
PointSegClassMapping
)
from
mmdet3d.datasets.transforms.transforms_3d
import
(
GlobalRotScaleTrans
,
LaserMix
,
PolarMix
)
model
.
update
(
dict
(
data_preprocessor
=
dict
(
max_voxels
=
None
),
backbone
=
dict
(
encoder_blocks
=
[
2
,
3
,
4
,
6
])))
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
),
dict
(
type
=
RandomChoice
,
transforms
=
[
[
dict
(
type
=
LaserMix
,
num_areas
=
[
3
,
4
,
5
,
6
],
pitch_angles
=
[
-
25
,
3
],
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
[
dict
(
type
=
PolarMix
,
instance_classes
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
],
swap_ratio
=
0.5
,
rotate_paste_ratio
=
1.0
,
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
],
prob
=
[
0.5
,
0.5
]),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0.
,
6.28318531
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
],
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
train_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
default_hooks
.
update
(
dict
(
checkpoint
=
dict
(
type
=
CheckpointHook
,
interval
=
1
)))
mmdetection3d/mmdet3d/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.schedules.cosine
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.loading
import
LoadImageFromFile
from
mmcv.transforms.processing
import
RandomResize
,
Resize
from
mmengine.dataset.dataset_wrapper
import
RepeatDataset
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmengine.visualization.vis_backend
import
LocalVisBackend
from
mmdet3d.datasets.kitti_dataset
import
KittiDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
GlobalRotScaleTrans
,
ObjectRangeFilter
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
from
mmdet3d.evaluation.metrics.kitti_metric
import
KittiMetric
from
mmdet3d.models.backbones.second
import
SECOND
from
mmdet3d.models.data_preprocessors.data_preprocessor
import
\
Det3DDataPreprocessor
from
mmdet3d.models.dense_heads.anchor3d_head
import
Anchor3DHead
from
mmdet3d.models.detectors.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
from
mmdet3d.models.layers.fusion_layers.point_fusion
import
PointFusion
from
mmdet3d.models.middle_encoders.sparse_encoder
import
SparseEncoder
from
mmdet3d.models.necks.second_fpn
import
SECONDFPN
from
mmdet3d.models.task_modules.anchor.anchor_3d_generator
import
\
Anchor3DRangeGenerator
from
mmdet3d.models.task_modules.assigners.max_3d_iou_assigner
import
\
Max3DIoUAssigner
from
mmdet3d.models.task_modules.coders.delta_xyzwhlr_bbox_coder
import
\
DeltaXYZWLHRBBoxCoder
from
mmdet3d.models.voxel_encoders.voxel_encoder
import
DynamicVFE
from
mmdet3d.structures.ops.iou3d_calculator
import
BboxOverlapsNearest3D
from
mmdet3d.visualization.local_visualizer
import
Det3DLocalVisualizer
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
DynamicMVXFasterRCNN
,
data_preprocessor
=
dict
(
type
=
Det3DDataPreprocessor
,
voxel
=
True
,
voxel_type
=
'dynamic'
,
voxel_layer
=
dict
(
max_num_points
=-
1
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
-
1
,
-
1
)),
mean
=
[
102.9801
,
115.9465
,
122.7717
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
img_backbone
=
dict
(
type
=
'mmdet.ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
),
img_neck
=
dict
(
type
=
'mmdet.FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
# make the image features more stable numerically to avoid loss nan
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
num_outs
=
5
),
pts_voxel_encoder
=
dict
(
type
=
DynamicVFE
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
fusion_layer
=
dict
(
type
=
PointFusion
,
img_channels
=
256
,
pts_channels
=
64
,
mid_channels
=
128
,
out_channels
=
128
,
img_levels
=
[
0
,
1
,
2
,
3
,
4
],
align_corners
=
False
,
activate_out
=
True
,
fuse_out
=
False
)),
pts_middle_encoder
=
dict
(
type
=
SparseEncoder
,
in_channels
=
128
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
pts_backbone
=
dict
(
type
=
SECOND
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
pts_neck
=
dict
(
type
=
SECONDFPN
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
pts_bbox_head
=
dict
(
type
=
Anchor3DHead
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
Anchor3DRangeGenerator
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.8
,
0.6
,
1.73
],
[
1.76
,
0.6
,
1.73
],
[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
assigner_per_size
=
True
,
diff_rad_by_sin
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
DeltaXYZWLHRBBoxCoder
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
metainfo
=
dict
(
classes
=
class_names
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
LoadImageFromFile
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
RandomResize
,
scale
=
[(
640
,
192
),
(
2560
,
768
)],
keep_ratio
=
True
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0.2
,
0.2
,
0.2
]),
dict
(
type
=
RandomFlip3D
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
LoadImageFromFile
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1280
,
384
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
# Temporary solution, fix this after refactor the augtest
dict
(
type
=
Resize
,
scale
=
0
,
keep_ratio
=
True
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'img'
])
]
modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
RepeatDataset
,
times
=
2
,
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
modality
=
modality
,
ann_file
=
'kitti_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
False
),
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
modality
=
modality
,
ann_file
=
'kitti_infos_val.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
False
),
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
ann_file
=
'kitti_infos_val.pkl'
,
modality
=
modality
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
optim_wrapper
.
update
(
dict
(
optimizer
=
dict
(
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
),
))
val_evaluator
=
dict
(
type
=
KittiMetric
,
ann_file
=
'data/kitti/kitti_infos_val.pkl'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
LocalVisBackend
)]
visualizer
=
dict
(
type
=
Det3DLocalVisualizer
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
# You may need to download the model first is the network is unstable
load_from
=
'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth'
# noqa
mmdetection3d/mmdet3d/configs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.kitti_mono3d
import
*
from
.._base_.models.pgd
import
*
from
.._base_.schedules.mmdet_schedule_1x
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.processing
import
Resize
from
mmengine.optim.scheduler.lr_scheduler
import
LinearLR
,
MultiStepLR
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadImageFromFileMono3D
)
from
mmdet3d.datasets.transforms.transforms_3d
import
RandomFlip3D
from
mmdet3d.models.data_preprocessors.data_preprocessor
import
\
Det3DDataPreprocessor
from
mmdet3d.models.losses.uncertain_smooth_l1_loss
import
\
UncertainSmoothL1Loss
from
mmdet3d.models.task_modules.coders.pgd_bbox_coder
import
PGDBBoxCoder
# model settings
model
.
update
(
dict
(
data_preprocessor
=
dict
(
type
=
Det3DDataPreprocessor
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
frozen_stages
=
0
),
neck
=
dict
(
start_level
=
0
,
num_outs
=
4
),
bbox_head
=
dict
(
num_classes
=
3
,
bbox_code_size
=
7
,
pred_attrs
=
False
,
pred_velo
=
False
,
pred_bbox2d
=
True
,
use_onlyreg_proj
=
True
,
strides
=
(
4
,
8
,
16
,
32
),
regress_ranges
=
((
-
1
,
64
),
(
64
,
128
),
(
128
,
256
),
(
256
,
1e8
)),
group_reg_dims
=
(
2
,
1
,
3
,
1
,
16
,
4
),
# offset, depth, size, rot, kpts, bbox2d
reg_branch
=
(
(
256
,
),
# offset
(
256
,
),
# depth
(
256
,
),
# size
(
256
,
),
# rot
(
256
,
),
# kpts
(
256
,
)
# bbox2d
),
centerness_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
use_depth_classifier
=
True
,
depth_branch
=
(
256
,
),
depth_range
=
(
0
,
70
),
depth_unit
=
10
,
division
=
'uniform'
,
depth_bins
=
8
,
pred_keypoints
=
True
,
weight_dim
=
1
,
loss_depth
=
dict
(
type
=
UncertainSmoothL1Loss
,
alpha
=
1.0
,
beta
=
3.0
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
PGDBBoxCoder
,
base_depths
=
((
28.01
,
16.32
),
),
base_dims
=
((
0.8
,
1.73
,
0.6
),
(
1.76
,
1.73
,
0.6
),
(
3.9
,
1.56
,
1.6
)),
code_size
=
7
)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
train_cfg
=
dict
(
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
1.0
,
1.0
,
1.0
,
1.0
]),
test_cfg
=
dict
(
nms_pre
=
100
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
max_per_img
=
20
)))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
LoadImageFromFileMono3D
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
Resize
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
RandomFlip3D
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFileMono3D
,
backend_args
=
backend_args
),
dict
(
type
=
Resize
,
scale_factor
=
1.0
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'img'
])
]
train_dataloader
.
update
(
dict
(
batch_size
=
3
,
num_workers
=
3
,
dataset
=
dict
(
pipeline
=
train_pipeline
)))
test_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
)))
val_dataloader
.
update
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
)))
# optimizer
optim_wrapper
.
update
(
dict
(
optimizer
=
dict
(
lr
=
0.001
),
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
)))
# learning rate
param_scheduler
=
[
dict
(
type
=
LinearLR
,
start_factor
=
1.0
/
3
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
48
,
by_epoch
=
True
,
milestones
=
[
32
,
44
],
gamma
=
0.1
)
]
train_cfg
.
update
(
dict
(
max_epochs
=
48
,
val_interval
=
2
))
auto_scale_lr
.
update
(
dict
(
base_batch_size
=
12
))
mmdetection3d/mmdet3d/configs/votenet/__init__.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
mmdetection3d/mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
mmengine
import
read_base
with
read_base
():
from
.._base_.datasets.scannet_3d
import
*
from
.._base_.models.votenet
import
*
from
.._base_.schedules.schedule_3x
import
*
from
.._base_.default_runtime
import
*
from
mmengine.hooks.logger_hook
import
LoggerHook
from
mmdet3d.models.task_modules.coders.partial_bin_based_bbox_coder
import
\
PartialBinBasedBBoxCoder
# model settings
model
.
update
(
dict
(
bbox_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
PartialBinBasedBBoxCoder
,
num_sizes
=
18
,
num_dir_bins
=
1
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]))))
default_hooks
.
update
(
dict
(
logger
=
dict
(
type
=
LoggerHook
,
interval
=
30
)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr
.
update
(
dict
(
enable
=
False
,
base_batch_size
=
64
))
mmdetection3d/mmdet3d/datasets/__init__.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
from
.dataset_wrappers
import
CBGSDataset
from
.det3d_dataset
import
Det3DDataset
from
.kitti_dataset
import
KittiDataset
from
.lyft_dataset
import
LyftDataset
from
.nuscenes_dataset
import
NuScenesDataset
# yapf: enable
from
.s3dis_dataset
import
S3DISDataset
,
S3DISSegDataset
from
.scannet_dataset
import
(
ScanNetDataset
,
ScanNetInstanceSegDataset
,
ScanNetSegDataset
)
from
.seg3d_dataset
import
Seg3DDataset
from
.semantickitti_dataset
import
SemanticKittiDataset
from
.sunrgbd_dataset
import
SUNRGBDDataset
# yapf: disable
from
.transforms
import
(
AffineResize
,
BackgroundPointsFilter
,
GlobalAlignment
,
GlobalRotScaleTrans
,
IndoorPatchPointSample
,
IndoorPointSample
,
LoadAnnotations3D
,
LoadPointsFromDict
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
,
NormalizePointsColor
,
ObjectNameFilter
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointSample
,
PointShuffle
,
PointsRangeFilter
,
RandomDropPointsColor
,
RandomFlip3D
,
RandomJitterPoints
,
RandomResize3D
,
RandomShiftScale
,
Resize3D
,
VoxelBasedPointSampler
)
from
.utils
import
get_loading_pipeline
from
.waymo_dataset
import
WaymoDataset
__all__
=
[
'KittiDataset'
,
'CBGSDataset'
,
'NuScenesDataset'
,
'LyftDataset'
,
'ObjectSample'
,
'RandomFlip3D'
,
'ObjectNoise'
,
'GlobalRotScaleTrans'
,
'PointShuffle'
,
'ObjectRangeFilter'
,
'PointsRangeFilter'
,
'LoadPointsFromFile'
,
'S3DISSegDataset'
,
'S3DISDataset'
,
'NormalizePointsColor'
,
'IndoorPatchPointSample'
,
'IndoorPointSample'
,
'PointSample'
,
'LoadAnnotations3D'
,
'GlobalAlignment'
,
'SUNRGBDDataset'
,
'ScanNetDataset'
,
'ScanNetSegDataset'
,
'ScanNetInstanceSegDataset'
,
'SemanticKittiDataset'
,
'Det3DDataset'
,
'Seg3DDataset'
,
'LoadPointsFromMultiSweeps'
,
'WaymoDataset'
,
'BackgroundPointsFilter'
,
'VoxelBasedPointSampler'
,
'get_loading_pipeline'
,
'RandomDropPointsColor'
,
'RandomJitterPoints'
,
'ObjectNameFilter'
,
'AffineResize'
,
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'Resize3D'
,
'RandomResize3D'
,
]
mmdetection3d/mmdet3d/datasets/convert_utils.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
warnings
from
typing
import
List
,
Optional
,
Tuple
,
Union
import
numpy
as
np
from
nuscenes
import
NuScenes
from
nuscenes.utils.geometry_utils
import
view_points
from
pyquaternion
import
Quaternion
from
shapely.geometry
import
MultiPoint
,
box
from
shapely.geometry.polygon
import
Polygon
from
mmdet3d.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
points_cam2img
from
mmdet3d.structures.ops
import
box_np_ops
kitti_categories
=
(
'Pedestrian'
,
'Cyclist'
,
'Car'
,
'Van'
,
'Truck'
,
'Person_sitting'
,
'Tram'
,
'Misc'
)
waymo_categories
=
(
'Car'
,
'Pedestrian'
,
'Cyclist'
)
nus_categories
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
nus_attributes
=
(
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
'None'
)
NuScenesNameMapping
=
{
'movable_object.barrier'
:
'barrier'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.car'
:
'car'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'vehicle.trailer'
:
'trailer'
,
'vehicle.truck'
:
'truck'
}
LyftNameMapping
=
{
'bicycle'
:
'bicycle'
,
'bus'
:
'bus'
,
'car'
:
'car'
,
'emergency_vehicle'
:
'emergency_vehicle'
,
'motorcycle'
:
'motorcycle'
,
'other_vehicle'
:
'other_vehicle'
,
'pedestrian'
:
'pedestrian'
,
'truck'
:
'truck'
,
'animal'
:
'animal'
}
def
get_nuscenes_2d_boxes
(
nusc
:
NuScenes
,
sample_data_token
:
str
,
visibilities
:
List
[
str
])
->
List
[
dict
]:
"""Get the 2d / mono3d annotation records for a given `sample_data_token`
of nuscenes dataset.
Args:
nusc (:obj:`NuScenes`): NuScenes class.
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (List[str]): Visibility filter.
Return:
List[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
assert
sd_rec
[
'sensor_modality'
]
==
'camera'
,
'Error: get_2d_boxes only works'
\
' for camera sample_data!'
if
not
sd_rec
[
'is_key_frame'
]:
raise
ValueError
(
'The 2D re-projections are available only for keyframes.'
)
s_rec
=
nusc
.
get
(
'sample'
,
sd_rec
[
'sample_token'
])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_rec
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
camera_intrinsic
=
np
.
array
(
cs_rec
[
'camera_intrinsic'
])
# Get all the annotation with the specified visibilties.
ann_recs
=
[
nusc
.
get
(
'sample_annotation'
,
token
)
for
token
in
s_rec
[
'anns'
]
]
ann_recs
=
[
ann_rec
for
ann_rec
in
ann_recs
if
(
ann_rec
[
'visibility_token'
]
in
visibilities
)
]
repro_recs
=
[]
for
ann_rec
in
ann_recs
:
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
ann_rec
[
'token'
]
ann_rec
[
'sample_data_token'
]
=
sample_data_token
# Get the box in global coordinates.
box
=
nusc
.
get_box
(
ann_rec
[
'token'
])
# Move them to the ego-pose frame.
box
.
translate
(
-
np
.
array
(
pose_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_rec
[
'rotation'
]).
inverse
)
# Move them to the calibrated sensor frame.
box
.
translate
(
-
np
.
array
(
cs_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_rec
[
'rotation'
]).
inverse
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box
.
corners
()
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
'nuscenes'
)
# if repro_rec is None, we do not append it into repre_recs
if
repro_rec
is
not
None
:
loc
=
box
.
center
.
tolist
()
dim
=
box
.
wlh
dim
[[
0
,
1
,
2
]]
=
dim
[[
1
,
2
,
0
]]
# convert wlh to our lhw
dim
=
dim
.
tolist
()
rot
=
box
.
orientation
.
yaw_pitch_roll
[
0
]
rot
=
[
-
rot
]
# convert the rot to our cam coordinate
global_velo2d
=
nusc
.
box_velocity
(
box
.
token
)[:
2
]
global_velo3d
=
np
.
array
([
*
global_velo2d
,
0.0
])
e2g_r_mat
=
Quaternion
(
pose_rec
[
'rotation'
]).
rotation_matrix
c2e_r_mat
=
Quaternion
(
cs_rec
[
'rotation'
]).
rotation_matrix
cam_velo3d
=
global_velo3d
@
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
c2e_r_mat
).
T
velo
=
cam_velo3d
[
0
::
2
].
tolist
()
repro_rec
[
'bbox_3d'
]
=
loc
+
dim
+
rot
repro_rec
[
'velocity'
]
=
velo
center_3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center_2d_with_depth
=
points_cam2img
(
center_3d
,
camera_intrinsic
,
with_depth
=
True
)
center_2d_with_depth
=
center_2d_with_depth
.
squeeze
().
tolist
()
repro_rec
[
'center_2d'
]
=
center_2d_with_depth
[:
2
]
repro_rec
[
'depth'
]
=
center_2d_with_depth
[
2
]
# normalized center2D + depth
# if samples with depth < 0 will be removed
if
repro_rec
[
'depth'
]
<=
0
:
continue
ann_token
=
nusc
.
get
(
'sample_annotation'
,
box
.
token
)[
'attribute_tokens'
]
if
len
(
ann_token
)
==
0
:
attr_name
=
'None'
else
:
attr_name
=
nusc
.
get
(
'attribute'
,
ann_token
[
0
])[
'name'
]
attr_id
=
nus_attributes
.
index
(
attr_name
)
# repro_rec['attribute_name'] = attr_name
repro_rec
[
'attr_label'
]
=
attr_id
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
get_kitti_style_2d_boxes
(
info
:
dict
,
cam_idx
:
int
=
2
,
occluded
:
Tuple
[
int
]
=
(
0
,
1
,
2
,
3
),
annos
:
Optional
[
dict
]
=
None
,
mono3d
:
bool
=
True
,
dataset
:
str
=
'kitti'
)
->
List
[
dict
]:
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D/Mono3D annotations when loading annotations
from a kitti-style dataset class, such as KITTI and Waymo dataset.
Args:
info (dict): Information of the given sample data.
cam_idx (int): Camera id which the 2d / mono3d annotations to obtain
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
annos (dict, optional): Original annotations. Defaults to None.
mono3d (bool): Whether to get boxes with mono3d annotation.
Defaults to True.
dataset (str): Dataset name of getting 2d bboxes.
Defaults to 'kitti'.
Return:
List[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
camera_intrinsic
=
info
[
'calib'
][
f
'P
{
cam_idx
}
'
]
repro_recs
=
[]
# if no annotations in info (test dataset), then return
if
annos
is
None
:
return
repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts
=
annos
mask
=
[(
ocld
in
occluded
)
for
ocld
in
ann_dicts
[
'occluded'
]]
for
k
in
ann_dicts
.
keys
():
ann_dicts
[
k
]
=
ann_dicts
[
k
][
mask
]
# convert dict of list to list of dict
ann_recs
=
[]
for
i
in
range
(
len
(
ann_dicts
[
'occluded'
])):
ann_rec
=
{}
for
k
in
ann_dicts
.
keys
():
ann_rec
[
k
]
=
ann_dicts
[
k
][
i
]
ann_recs
.
append
(
ann_rec
)
for
ann_idx
,
ann_rec
in
enumerate
(
ann_recs
):
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
\
f
"
{
info
[
'image'
][
'image_idx'
]
}
.
{
ann_idx
}
"
ann_rec
[
'sample_data_token'
]
=
info
[
'image'
][
'image_idx'
]
loc
=
ann_rec
[
'location'
][
np
.
newaxis
,
:]
dim
=
ann_rec
[
'dimensions'
][
np
.
newaxis
,
:]
rot
=
ann_rec
[
'rotation_y'
][
np
.
newaxis
,
np
.
newaxis
]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst
=
np
.
array
([
0.5
,
0.5
,
0.5
])
src
=
np
.
array
([
0.5
,
1.0
,
0.5
])
# gravity center
loc_center
=
loc
+
dim
*
(
dst
-
src
)
gt_bbox_3d
=
np
.
concatenate
([
loc_center
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box_np_ops
.
center_to_corner_box3d
(
gt_bbox_3d
[:,
:
3
],
gt_bbox_3d
[:,
3
:
6
],
gt_bbox_3d
[:,
6
],
(
0.5
,
0.5
,
0.5
),
axis
=
1
)
corners_3d
=
corners_3d
[
0
].
T
# (1, 8, 3) -> (3, 8)
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
,
imsize
=
(
info
[
'image'
][
'image_shape'
][
1
],
info
[
'image'
][
'image_shape'
][
0
]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
dataset
)
# If mono3d=True, add 3D annotations in camera coordinates
if
mono3d
and
(
repro_rec
is
not
None
):
# use bottom center to represent the bbox_3d
repro_rec
[
'bbox_3d'
]
=
np
.
concatenate
(
[
loc
,
dim
,
rot
],
axis
=
1
).
astype
(
np
.
float32
).
squeeze
().
tolist
()
repro_rec
[
'velocity'
]
=
-
1
# no velocity in KITTI
center_3d
=
np
.
array
(
loc_center
).
reshape
([
1
,
3
])
center_2d_with_depth
=
points_cam2img
(
center_3d
,
camera_intrinsic
,
with_depth
=
True
)
center_2d_with_depth
=
center_2d_with_depth
.
squeeze
().
tolist
()
repro_rec
[
'center_2d'
]
=
center_2d_with_depth
[:
2
]
repro_rec
[
'depth'
]
=
center_2d_with_depth
[
2
]
# normalized center2D + depth
# samples with depth < 0 will be removed
if
repro_rec
[
'depth'
]
<=
0
:
continue
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
convert_annos
(
info
:
dict
,
cam_idx
:
int
)
->
dict
:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect
=
info
[
'calib'
][
'R0_rect'
].
astype
(
np
.
float32
)
lidar2cam0
=
info
[
'calib'
][
'Tr_velo_to_cam'
].
astype
(
np
.
float32
)
lidar2cami
=
info
[
'calib'
][
f
'Tr_velo_to_cam
{
cam_idx
}
'
].
astype
(
np
.
float32
)
annos
=
info
[
'annos'
]
converted_annos
=
copy
.
deepcopy
(
annos
)
loc
=
annos
[
'location'
]
dims
=
annos
[
'dimensions'
]
rots
=
annos
[
'rotation_y'
]
gt_bboxes_3d
=
np
.
concatenate
([
loc
,
dims
,
rots
[...,
np
.
newaxis
]],
axis
=
1
).
astype
(
np
.
float32
)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d
=
CameraInstance3DBoxes
(
gt_bboxes_3d
).
convert_to
(
Box3DMode
.
LIDAR
,
np
.
linalg
.
inv
(
rect
@
lidar2cam0
),
correct_yaw
=
True
)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d
=
gt_bboxes_3d
.
convert_to
(
Box3DMode
.
CAM
,
rect
@
lidar2cami
,
correct_yaw
=
True
).
numpy
()
converted_annos
[
'location'
]
=
gt_bboxes_3d
[:,
:
3
]
converted_annos
[
'dimensions'
]
=
gt_bboxes_3d
[:,
3
:
6
]
converted_annos
[
'rotation_y'
]
=
gt_bboxes_3d
[:,
6
]
return
converted_annos
def
post_process_coords
(
corner_coords
:
List
[
int
],
imsize
:
Tuple
[
int
]
=
(
1600
,
900
)
)
->
Union
[
Tuple
[
float
],
None
]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (List[int]): Corner coordinates of reprojected
bounding box.
imsize (Tuple[int]): Size of the image canvas.
Defaults to (1600, 900).
Return:
Tuple[float] or None: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box
=
MultiPoint
(
corner_coords
).
convex_hull
img_canvas
=
box
(
0
,
0
,
imsize
[
0
],
imsize
[
1
])
if
polygon_from_2d_box
.
intersects
(
img_canvas
):
img_intersection
=
polygon_from_2d_box
.
intersection
(
img_canvas
)
if
isinstance
(
img_intersection
,
Polygon
):
intersection_coords
=
np
.
array
(
[
coord
for
coord
in
img_intersection
.
exterior
.
coords
])
min_x
=
min
(
intersection_coords
[:,
0
])
min_y
=
min
(
intersection_coords
[:,
1
])
max_x
=
max
(
intersection_coords
[:,
0
])
max_y
=
max
(
intersection_coords
[:,
1
])
return
min_x
,
min_y
,
max_x
,
max_y
else
:
warnings
.
warn
(
'img_intersection is not an object of Polygon.'
)
return
None
else
:
return
None
def
generate_record
(
ann_rec
:
dict
,
x1
:
float
,
y1
:
float
,
x2
:
float
,
y2
:
float
,
dataset
:
str
)
->
Union
[
dict
,
None
]:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
dataset (str): Name of dataset.
Returns:
dict or None: A sample 2d annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (List[float]): left x, top y, right x, bottom y of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
if
dataset
==
'nuscenes'
:
cat_name
=
ann_rec
[
'category_name'
]
if
cat_name
not
in
NuScenesNameMapping
:
return
None
else
:
cat_name
=
NuScenesNameMapping
[
cat_name
]
categories
=
nus_categories
else
:
if
dataset
==
'kitti'
:
categories
=
kitti_categories
elif
dataset
==
'waymo'
:
categories
=
waymo_categories
else
:
raise
NotImplementedError
(
'Unsupported dataset!'
)
cat_name
=
ann_rec
[
'name'
]
if
cat_name
not
in
categories
:
return
None
rec
=
dict
()
rec
[
'bbox_label'
]
=
categories
.
index
(
cat_name
)
rec
[
'bbox_label_3d'
]
=
rec
[
'bbox_label'
]
rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
rec
[
'bbox_3d_isvalid'
]
=
True
return
rec
mmdetection3d/mmdet3d/datasets/dataset_wrappers.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
warnings
from
typing
import
List
,
Set
,
Union
import
numpy
as
np
from
mmengine.dataset
import
BaseDataset
,
force_full_init
from
mmdet3d.registry
import
DATASETS
@
DATASETS
.
register_module
()
class
CBGSDataset
:
"""A wrapper of class sampled dataset with ann_file path. Implementation of
paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
Detection <https://arxiv.org/abs/1908.09492>`_.
Balance the number of scenes under different classes.
Args:
dataset (:obj:`BaseDataset` or dict): The dataset to be class sampled.
lazy_init (bool): Whether to load annotation during instantiation.
Defaults to False.
"""
def
__init__
(
self
,
dataset
:
Union
[
BaseDataset
,
dict
],
lazy_init
:
bool
=
False
)
->
None
:
self
.
dataset
:
BaseDataset
if
isinstance
(
dataset
,
dict
):
self
.
dataset
=
DATASETS
.
build
(
dataset
)
elif
isinstance
(
dataset
,
BaseDataset
):
self
.
dataset
=
dataset
else
:
raise
TypeError
(
'elements in datasets sequence should be config or '
f
'`BaseDataset` instance, but got
{
type
(
dataset
)
}
'
)
self
.
_metainfo
=
self
.
dataset
.
metainfo
self
.
_fully_initialized
=
False
if
not
lazy_init
:
self
.
full_init
()
@
property
def
metainfo
(
self
)
->
dict
:
"""Get the meta information of the repeated dataset.
Returns:
dict: The meta information of repeated dataset.
"""
return
copy
.
deepcopy
(
self
.
_metainfo
)
def
full_init
(
self
)
->
None
:
"""Loop to ``full_init`` each dataset."""
if
self
.
_fully_initialized
:
return
self
.
dataset
.
full_init
()
# Get sample_indices
self
.
sample_indices
=
self
.
_get_sample_indices
(
self
.
dataset
)
self
.
_fully_initialized
=
True
def
_get_sample_indices
(
self
,
dataset
:
BaseDataset
)
->
List
[
int
]:
"""Load sample indices according to ann_file.
Args:
dataset (:obj:`BaseDataset`): The dataset.
Returns:
List[dict]: List of indices after class sampling.
"""
classes
=
self
.
metainfo
[
'classes'
]
cat2id
=
{
name
:
i
for
i
,
name
in
enumerate
(
classes
)}
class_sample_idxs
=
{
cat_id
:
[]
for
cat_id
in
cat2id
.
values
()}
for
idx
in
range
(
len
(
dataset
)):
sample_cat_ids
=
dataset
.
get_cat_ids
(
idx
)
for
cat_id
in
sample_cat_ids
:
if
cat_id
!=
-
1
:
# Filter categories that do not need to be cared.
# -1 indicates dontcare in MMDet3D.
class_sample_idxs
[
cat_id
].
append
(
idx
)
duplicated_samples
=
sum
(
[
len
(
v
)
for
_
,
v
in
class_sample_idxs
.
items
()])
class_distribution
=
{
k
:
len
(
v
)
/
duplicated_samples
for
k
,
v
in
class_sample_idxs
.
items
()
}
sample_indices
=
[]
frac
=
1.0
/
len
(
classes
)
ratios
=
[
frac
/
v
for
v
in
class_distribution
.
values
()]
for
cls_inds
,
ratio
in
zip
(
list
(
class_sample_idxs
.
values
()),
ratios
):
sample_indices
+=
np
.
random
.
choice
(
cls_inds
,
int
(
len
(
cls_inds
)
*
ratio
)).
tolist
()
return
sample_indices
@
force_full_init
def
_get_ori_dataset_idx
(
self
,
idx
:
int
)
->
int
:
"""Convert global index to local index.
Args:
idx (int): Global index of ``CBGSDataset``.
Returns:
int: Local index of data.
"""
return
self
.
sample_indices
[
idx
]
@
force_full_init
def
get_cat_ids
(
self
,
idx
:
int
)
->
Set
[
int
]:
"""Get category ids of class balanced dataset by index.
Args:
idx (int): Index of data.
Returns:
Set[int]: All categories in the sample of specified index.
"""
sample_idx
=
self
.
_get_ori_dataset_idx
(
idx
)
return
self
.
dataset
.
get_cat_ids
(
sample_idx
)
@
force_full_init
def
get_data_info
(
self
,
idx
:
int
)
->
dict
:
"""Get annotation by index.
Args:
idx (int): Global index of ``CBGSDataset``.
Returns:
dict: The idx-th annotation of the dataset.
"""
sample_idx
=
self
.
_get_ori_dataset_idx
(
idx
)
return
self
.
dataset
.
get_data_info
(
sample_idx
)
def
__getitem__
(
self
,
idx
:
int
)
->
dict
:
"""Get item from infos according to the given index.
Args:
idx (int): The index of self.sample_indices.
Returns:
dict: Data dictionary of the corresponding index.
"""
if
not
self
.
_fully_initialized
:
warnings
.
warn
(
'Please call `full_init` method manually to '
'accelerate the speed.'
)
self
.
full_init
()
ori_index
=
self
.
_get_ori_dataset_idx
(
idx
)
return
self
.
dataset
[
ori_index
]
@
force_full_init
def
__len__
(
self
)
->
int
:
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return
len
(
self
.
sample_indices
)
def
get_subset_
(
self
,
indices
:
Union
[
List
[
int
],
int
])
->
None
:
"""Not supported in ``CBGSDataset`` for the ambiguous meaning of sub-
dataset."""
raise
NotImplementedError
(
'`CBGSDataset` does not support `get_subset` and '
'`get_subset_` interfaces because this will lead to ambiguous '
'implementation of some methods. If you want to use `get_subset` '
'or `get_subset_` interfaces, please use them in the wrapped '
'dataset first and then use `CBGSDataset`.'
)
def
get_subset
(
self
,
indices
:
Union
[
List
[
int
],
int
])
->
BaseDataset
:
"""Not supported in ``CBGSDataset`` for the ambiguous meaning of sub-
dataset."""
raise
NotImplementedError
(
'`CBGSDataset` does not support `get_subset` and '
'`get_subset_` interfaces because this will lead to ambiguous '
'implementation of some methods. If you want to use `get_subset` '
'or `get_subset_` interfaces, please use them in the wrapped '
'dataset first and then use `CBGSDataset`.'
)
mmdetection3d/mmdet3d/datasets/det3d_dataset.py
0 → 100644
View file @
7aa442d5
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
os
from
os
import
path
as
osp
from
typing
import
Callable
,
List
,
Optional
,
Set
,
Union
import
numpy
as
np
import
torch
from
mmengine.dataset
import
BaseDataset
from
mmengine.logging
import
print_log
from
terminaltables
import
AsciiTable
from
mmdet3d.registry
import
DATASETS
from
mmdet3d.structures
import
get_box_type
@
DATASETS
.
register_module
()
class
Det3DDataset
(
BaseDataset
):
"""Base Class of 3D dataset.
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
# TODO: doc link here for the standard data format
Args:
data_root (str, optional): The root directory for ``data_prefix`` and
``ann_file``. Defaults to None.
ann_file (str): Annotation file path. Defaults to ''.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input,
it usually has following keys:
- use_camera: bool
- use_lidar: bool
Defaults to dict(use_lidar=True, use_camera=False).
default_cam_key (str, optional): The default camera name adopted.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection.
- 'Depth': Box in depth coordinates, usually for
indoor point cloud 3d detection.
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with empty GT.
If it's set to be True, the example with empty annotations after
data pipeline will be dropped and a random example will be chosen
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations in test_mode,
the annotation will be save in `eval_ann_infos`, which can be
used in Evaluator. Defaults to True.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend. Defaults to None.
show_ins_var (bool): For debug purpose. Whether to show variation
of the number of instances before and after through pipeline.
Defaults to False.
"""
def
__init__
(
self
,
data_root
:
Optional
[
str
]
=
None
,
ann_file
:
str
=
''
,
metainfo
:
Optional
[
dict
]
=
None
,
data_prefix
:
dict
=
dict
(
pts
=
'velodyne'
,
img
=
''
),
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
modality
:
dict
=
dict
(
use_lidar
=
True
,
use_camera
=
False
),
default_cam_key
:
str
=
None
,
box_type_3d
:
dict
=
'LiDAR'
,
filter_empty_gt
:
bool
=
True
,
test_mode
:
bool
=
False
,
load_eval_anns
:
bool
=
True
,
backend_args
:
Optional
[
dict
]
=
None
,
show_ins_var
:
bool
=
False
,
**
kwargs
)
->
None
:
self
.
backend_args
=
backend_args
self
.
filter_empty_gt
=
filter_empty_gt
self
.
load_eval_anns
=
load_eval_anns
_default_modality_keys
=
(
'use_lidar'
,
'use_camera'
)
if
modality
is
None
:
modality
=
dict
()
# Defaults to False if not specify
for
key
in
_default_modality_keys
:
if
key
not
in
modality
:
modality
[
key
]
=
False
self
.
modality
=
modality
self
.
default_cam_key
=
default_cam_key
assert
self
.
modality
[
'use_lidar'
]
or
self
.
modality
[
'use_camera'
],
(
'Please specify the `modality` (`use_lidar` '
f
', `use_camera`) for
{
self
.
__class__
.
__name__
}
'
)
self
.
box_type_3d
,
self
.
box_mode_3d
=
get_box_type
(
box_type_3d
)
if
metainfo
is
not
None
and
'classes'
in
metainfo
:
# we allow to train on subset of self.METAINFO['classes']
# map unselected labels to -1
self
.
label_mapping
=
{
i
:
-
1
for
i
in
range
(
len
(
self
.
METAINFO
[
'classes'
]))
}
self
.
label_mapping
[
-
1
]
=
-
1
for
label_idx
,
name
in
enumerate
(
metainfo
[
'classes'
]):
ori_label
=
self
.
METAINFO
[
'classes'
].
index
(
name
)
self
.
label_mapping
[
ori_label
]
=
label_idx
self
.
num_ins_per_cat
=
[
0
]
*
len
(
metainfo
[
'classes'
])
else
:
self
.
label_mapping
=
{
i
:
i
for
i
in
range
(
len
(
self
.
METAINFO
[
'classes'
]))
}
self
.
label_mapping
[
-
1
]
=
-
1
self
.
num_ins_per_cat
=
[
0
]
*
len
(
self
.
METAINFO
[
'classes'
])
super
().
__init__
(
ann_file
=
ann_file
,
metainfo
=
metainfo
,
data_root
=
data_root
,
data_prefix
=
data_prefix
,
pipeline
=
pipeline
,
test_mode
=
test_mode
,
**
kwargs
)
# can be accessed by other component in runner
self
.
metainfo
[
'box_type_3d'
]
=
box_type_3d
self
.
metainfo
[
'label_mapping'
]
=
self
.
label_mapping
if
not
kwargs
.
get
(
'lazy_init'
,
False
):
# used for showing variation of the number of instances before and
# after through the pipeline
self
.
show_ins_var
=
show_ins_var
# show statistics of this dataset
print_log
(
'-'
*
30
,
'current'
)
print_log
(
f
'The length of
{
"test"
if
self
.
test_mode
else
"training"
}
dataset:
{
len
(
self
)
}
'
,
# noqa: E501
'current'
)
content_show
=
[[
'category'
,
'number'
]]
for
label
,
num
in
enumerate
(
self
.
num_ins_per_cat
):
cat_name
=
self
.
metainfo
[
'classes'
][
label
]
content_show
.
append
([
cat_name
,
num
])
table
=
AsciiTable
(
content_show
)
print_log
(
f
'The number of instances per category in the dataset:
\n
{
table
.
table
}
'
,
# noqa: E501
'current'
)
def
_remove_dontcare
(
self
,
ann_info
:
dict
)
->
dict
:
"""Remove annotations that do not need to be cared.
-1 indicates dontcare in MMDet3d.
Args:
ann_info (dict): Dict of annotation infos. The
instance with label `-1` will be removed.
Returns:
dict: Annotations after filtering.
"""
img_filtered_annotations
=
{}
filter_mask
=
ann_info
[
'gt_labels_3d'
]
>
-
1
for
key
in
ann_info
.
keys
():
if
key
!=
'instances'
:
img_filtered_annotations
[
key
]
=
(
ann_info
[
key
][
filter_mask
])
else
:
img_filtered_annotations
[
key
]
=
ann_info
[
key
]
return
img_filtered_annotations
def
get_ann_info
(
self
,
index
:
int
)
->
dict
:
"""Get annotation info according to the given index.
Use index to get the corresponding annotations, thus the
evalhook could use this api.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information.
"""
data_info
=
self
.
get_data_info
(
index
)
# test model
if
'ann_info'
not
in
data_info
:
ann_info
=
self
.
parse_ann_info
(
data_info
)
else
:
ann_info
=
data_info
[
'ann_info'
]
return
ann_info
def
parse_ann_info
(
self
,
info
:
dict
)
->
Union
[
dict
,
None
]:
"""Process the `instances` in data info to `ann_info`.
In `Custom3DDataset`, we simply concatenate all the field
in `instances` to `np.ndarray`, you can do the specific
process in subclass. You have to convert `gt_bboxes_3d`
to different coordinates according to the task.
Args:
info (dict): Info dict.
Returns:
dict or None: Processed `ann_info`.
"""
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
# 2d annotation process is in the `LoadAnnotations3D`
# in `transforms`
name_mapping
=
{
'bbox_label_3d'
:
'gt_labels_3d'
,
'bbox_label'
:
'gt_bboxes_labels'
,
'bbox'
:
'gt_bboxes'
,
'bbox_3d'
:
'gt_bboxes_3d'
,
'depth'
:
'depths'
,
'center_2d'
:
'centers_2d'
,
'attr_label'
:
'attr_labels'
,
'velocity'
:
'velocities'
,
}
instances
=
info
[
'instances'
]
# empty gt
if
len
(
instances
)
==
0
:
return
None
else
:
keys
=
list
(
instances
[
0
].
keys
())
ann_info
=
dict
()
for
ann_name
in
keys
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
if
'label'
in
ann_name
and
ann_name
!=
'attr_label'
:
temp_anns
=
[
self
.
label_mapping
[
item
]
for
item
in
temp_anns
]
if
ann_name
in
name_mapping
:
mapped_ann_name
=
name_mapping
[
ann_name
]
else
:
mapped_ann_name
=
ann_name
if
'label'
in
ann_name
:
temp_anns
=
np
.
array
(
temp_anns
).
astype
(
np
.
int64
)
elif
ann_name
in
name_mapping
:
temp_anns
=
np
.
array
(
temp_anns
).
astype
(
np
.
float32
)
else
:
temp_anns
=
np
.
array
(
temp_anns
)
ann_info
[
mapped_ann_name
]
=
temp_anns
ann_info
[
'instances'
]
=
info
[
'instances'
]
for
label
in
ann_info
[
'gt_labels_3d'
]:
if
label
!=
-
1
:
self
.
num_ins_per_cat
[
label
]
+=
1
return
ann_info
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the raw data info.
Convert all relative path of needed modality data file to
the absolute path. And process the `instances` field to
`ann_info` in training stage.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if
self
.
modality
[
'use_lidar'
]:
info
[
'lidar_points'
][
'lidar_path'
]
=
\
osp
.
join
(
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
info
[
'num_pts_feats'
]
=
info
[
'lidar_points'
][
'num_pts_feats'
]
info
[
'lidar_path'
]
=
info
[
'lidar_points'
][
'lidar_path'
]
if
'lidar_sweeps'
in
info
:
for
sweep
in
info
[
'lidar_sweeps'
]:
file_suffix
=
sweep
[
'lidar_points'
][
'lidar_path'
].
split
(
os
.
sep
)[
-
1
]
if
'samples'
in
sweep
[
'lidar_points'
][
'lidar_path'
]:
sweep
[
'lidar_points'
][
'lidar_path'
]
=
osp
.
join
(
self
.
data_prefix
[
'pts'
],
file_suffix
)
else
:
sweep
[
'lidar_points'
][
'lidar_path'
]
=
osp
.
join
(
self
.
data_prefix
[
'sweeps'
],
file_suffix
)
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
if
cam_id
in
self
.
data_prefix
:
cam_prefix
=
self
.
data_prefix
[
cam_id
]
else
:
cam_prefix
=
self
.
data_prefix
.
get
(
'img'
,
''
)
img_info
[
'img_path'
]
=
osp
.
join
(
cam_prefix
,
img_info
[
'img_path'
])
if
self
.
default_cam_key
is
not
None
:
info
[
'img_path'
]
=
info
[
'images'
][
self
.
default_cam_key
][
'img_path'
]
if
'lidar2cam'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'lidar2cam'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'lidar2cam'
])
if
'cam2img'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'cam2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'cam2img'
])
if
'lidar2img'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'lidar2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'lidar2img'
])
else
:
info
[
'lidar2img'
]
=
info
[
'cam2img'
]
@
info
[
'lidar2cam'
]
if
not
self
.
test_mode
:
# used in training
info
[
'ann_info'
]
=
self
.
parse_ann_info
(
info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
info
[
'eval_ann_info'
]
=
self
.
parse_ann_info
(
info
)
return
info
def
_show_ins_var
(
self
,
old_labels
:
np
.
ndarray
,
new_labels
:
torch
.
Tensor
)
->
None
:
"""Show variation of the number of instances before and after through
the pipeline.
Args:
old_labels (np.ndarray): The labels before through the pipeline.
new_labels (torch.Tensor): The labels after through the pipeline.
"""
ori_num_per_cat
=
dict
()
for
label
in
old_labels
:
if
label
!=
-
1
:
cat_name
=
self
.
metainfo
[
'classes'
][
label
]
ori_num_per_cat
[
cat_name
]
=
ori_num_per_cat
.
get
(
cat_name
,
0
)
+
1
new_num_per_cat
=
dict
()
for
label
in
new_labels
:
if
label
!=
-
1
:
cat_name
=
self
.
metainfo
[
'classes'
][
label
]
new_num_per_cat
[
cat_name
]
=
new_num_per_cat
.
get
(
cat_name
,
0
)
+
1
content_show
=
[[
'category'
,
'new number'
,
'ori number'
]]
for
cat_name
,
num
in
ori_num_per_cat
.
items
():
new_num
=
new_num_per_cat
.
get
(
cat_name
,
0
)
content_show
.
append
([
cat_name
,
new_num
,
num
])
table
=
AsciiTable
(
content_show
)
print_log
(
'The number of instances per category after and before '
f
'through pipeline:
\n
{
table
.
table
}
'
,
'current'
)
def
prepare_data
(
self
,
index
:
int
)
->
Union
[
dict
,
None
]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
Args:
index (int): Index for accessing the target data.
Returns:
dict or None: Data dict of the corresponding index.
"""
ori_input_dict
=
self
.
get_data_info
(
index
)
# deepcopy here to avoid inplace modification in pipeline.
input_dict
=
copy
.
deepcopy
(
ori_input_dict
)
# box_type_3d (str): 3D box type.
input_dict
[
'box_type_3d'
]
=
self
.
box_type_3d
# box_mode_3d (str): 3D box mode.
input_dict
[
'box_mode_3d'
]
=
self
.
box_mode_3d
# pre-pipline return None to random another in `__getitem__`
if
not
self
.
test_mode
and
self
.
filter_empty_gt
:
if
len
(
input_dict
[
'ann_info'
][
'gt_labels_3d'
])
==
0
:
return
None
example
=
self
.
pipeline
(
input_dict
)
if
not
self
.
test_mode
and
self
.
filter_empty_gt
:
# after pipeline drop the example with empty annotations
# return None to random another in `__getitem__`
if
example
is
None
or
len
(
example
[
'data_samples'
].
gt_instances_3d
.
labels_3d
)
==
0
:
return
None
if
self
.
show_ins_var
:
if
'ann_info'
in
ori_input_dict
:
self
.
_show_ins_var
(
ori_input_dict
[
'ann_info'
][
'gt_labels_3d'
],
example
[
'data_samples'
].
gt_instances_3d
.
labels_3d
)
else
:
print_log
(
"'ann_info' is not in the input dict. It's probably that "
'the data is not in training mode'
,
'current'
,
level
=
30
)
return
example
def
get_cat_ids
(
self
,
idx
:
int
)
->
Set
[
int
]:
"""Get category ids by index. Dataset wrapped by ClassBalancedDataset
must implement this method.
The ``CBGSDataset`` or ``ClassBalancedDataset``requires a subclass
which implements this method.
Args:
idx (int): The index of data.
Returns:
set[int]: All categories in the sample of specified index.
"""
info
=
self
.
get_data_info
(
idx
)
gt_labels
=
info
[
'ann_info'
][
'gt_labels_3d'
].
tolist
()
return
set
(
gt_labels
)
Prev
1
…
19
20
21
22
23
24
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment