Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f27d308f
"eigen-master/blas/complex_single.cpp" did not exist on "e7df86554156b36846008d8ddbcc4d8521a16554"
Commit
f27d308f
authored
Jun 07, 2020
by
yinchimaoliang
Browse files
merge master
parents
c66ae813
27ebcfac
Changes
80
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1267 additions
and
55 deletions
+1267
-55
.isort.cfg
.isort.cfg
+1
-1
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-1
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+243
-0
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
...lient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
+218
-0
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
...second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+2
-4
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+2
-4
configs/scannet/votenet_8x8_scannet-3d-18class.py
configs/scannet/votenet_8x8_scannet-3d-18class.py
+188
-0
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
+164
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+5
-2
mmdet3d/core/bbox/coders/__init__.py
mmdet3d/core/bbox/coders/__init__.py
+4
-1
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
+193
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+84
-0
mmdet3d/core/evaluation/indoor_eval.py
mmdet3d/core/evaluation/indoor_eval.py
+32
-7
mmdet3d/core/post_processing/__init__.py
mmdet3d/core/post_processing/__init__.py
+3
-2
mmdet3d/core/post_processing/box3d_nms.py
mmdet3d/core/post_processing/box3d_nms.py
+49
-0
mmdet3d/datasets/__init__.py
mmdet3d/datasets/__init__.py
+3
-3
mmdet3d/datasets/custom_3d.py
mmdet3d/datasets/custom_3d.py
+10
-11
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+5
-4
mmdet3d/datasets/pipelines/indoor_augment.py
mmdet3d/datasets/pipelines/indoor_augment.py
+1
-1
mmdet3d/datasets/pipelines/indoor_loading.py
mmdet3d/datasets/pipelines/indoor_loading.py
+58
-14
No files found.
.isort.cfg
View file @
f27d308f
...
...
@@ -3,6 +3,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet,mmdet3d
known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,torch,torchvision
known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,
terminaltables,
torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
.pre-commit-config.yaml
View file @
f27d308f
...
...
@@ -15,6 +15,7 @@ repos:
rev
:
v0.30.0
hooks
:
-
id
:
yapf
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
rev
:
v2.5.0
hooks
:
...
...
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
0 → 100644
View file @
f27d308f
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
num_input_features
=
4
,
num_filters
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
],
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
],
sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(),
classes
=
class_names
,
sample_groups
=
dict
(
bus
=
4
,
trailer
=
4
,
truck
=
4
,
))
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
evaluation
=
dict
(
interval
=
24
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
24
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
0 → 100644
View file @
f27d308f
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.3
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
(
data
=
's3://kitti_data/'
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
View file @
f27d308f
...
...
@@ -255,13 +255,11 @@ optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'
c
osine'
,
policy
=
'
C
osine
Anealing
'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
,
)
min_lr_ratio
=
1e-5
)
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
f27d308f
...
...
@@ -207,13 +207,11 @@ optimizer = dict(
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'
c
osine'
,
policy
=
'
C
osine
Anealing
'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
,
)
min_lr_ratio
=
1e-5
)
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
...
...
configs/scannet/votenet_8x8_scannet-3d-18class.py
0 → 100644
View file @
f27d308f
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
1
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'ScanNetDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_mask_3d
=
True
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
)),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
,
flip_ratio_xz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
36
,
1
/
36
],
scale_range
=
None
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
10
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_scannet'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
0 → 100644
View file @
f27d308f
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
10
,
num_dir_bins
=
12
,
with_rot
=
True
,
mean_sizes
=
[[
2.114256
,
1.620300
,
0.927272
],
[
0.791118
,
1.279516
,
0.718182
],
[
0.923508
,
1.867419
,
0.845495
],
[
0.591958
,
0.552978
,
0.827272
],
[
0.699104
,
0.454178
,
0.75625
],
[
0.69519
,
1.346299
,
0.736364
],
[
0.528526
,
1.002642
,
1.172878
],
[
0.500618
,
0.632163
,
0.683424
],
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
6
,
1
/
6
],
scale_range
=
[
0.85
,
1.15
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
16
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
30
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_sunrgbd'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
f27d308f
...
...
@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
bbox3d2result
,
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
box3d_to_corner3d_upright_depth
,
boxes3d_to_bev_torch_lidar
)
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
...
...
@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'box3d_to_corner3d_upright_depth'
]
mmdet3d/core/bbox/coders/__init__.py
View file @
f27d308f
from
mmdet.core.bbox
import
build_bbox_coder
from
.delta_xyzwhlr_bbox_coder
import
DeltaXYZWLHRBBoxCoder
from
.partial_bin_based_bbox_coder
import
PartialBinBasedBBoxCoder
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
]
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'PartialBinBasedBBoxCoder'
]
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
0 → 100644
View file @
f27d308f
import
numpy
as
np
import
torch
from
mmdet.core.bbox
import
BaseBBoxCoder
from
mmdet.core.bbox.builder
import
BBOX_CODERS
@
BBOX_CODERS
.
register_module
()
class
PartialBinBasedBBoxCoder
(
BaseBBoxCoder
):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def
__init__
(
self
,
num_dir_bins
,
num_sizes
,
mean_sizes
,
with_rot
=
True
):
super
(
PartialBinBasedBBoxCoder
,
self
).
__init__
()
assert
len
(
mean_sizes
)
==
num_sizes
self
.
num_dir_bins
=
num_dir_bins
self
.
num_sizes
=
num_sizes
self
.
mean_sizes
=
mean_sizes
self
.
with_rot
=
with_rot
def
encode
(
self
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target
=
gt_bboxes_3d
[...,
0
:
3
]
# generate bbox size target
size_class_target
=
gt_labels_3d
size_res_target
=
gt_bboxes_3d
[...,
3
:
6
]
-
gt_bboxes_3d
.
new_tensor
(
self
.
mean_sizes
)[
size_class_target
]
# generate dir target
box_num
=
gt_bboxes_3d
.
shape
[
0
]
if
self
.
with_rot
:
(
dir_class_target
,
dir_res_target
)
=
self
.
angle2class
(
gt_bboxes_3d
[...,
6
])
else
:
dir_class_target
=
gt_labels_3d
.
new_zeros
(
box_num
)
dir_res_target
=
gt_bboxes_3d
.
new_zeros
(
box_num
)
return
(
center_target
,
size_class_target
,
size_res_target
,
dir_class_target
,
dir_res_target
)
def
decode
(
self
,
bbox_out
):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center
=
bbox_out
[
'center'
]
batch_size
,
num_proposal
=
center
.
shape
[:
2
]
# decode heading angle
if
self
.
with_rot
:
dir_class
=
torch
.
argmax
(
bbox_out
[
'dir_class'
],
-
1
)
dir_res
=
torch
.
gather
(
bbox_out
[
'dir_res'
],
2
,
dir_class
.
unsqueeze
(
-
1
))
dir_res
.
squeeze_
(
2
)
dir_angle
=
self
.
class2angle
(
dir_class
,
dir_res
).
reshape
(
batch_size
,
num_proposal
,
1
)
else
:
dir_angle
=
center
.
new_zeros
(
batch_size
,
num_proposal
,
1
)
# decode bbox size
size_class
=
torch
.
argmax
(
bbox_out
[
'size_class'
],
-
1
,
keepdim
=
True
)
size_res
=
torch
.
gather
(
bbox_out
[
'size_res'
],
2
,
size_class
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
1
,
3
))
mean_sizes
=
center
.
new_tensor
(
self
.
mean_sizes
)
size_base
=
torch
.
index_select
(
mean_sizes
,
0
,
size_class
.
reshape
(
-
1
))
bbox_size
=
size_base
.
reshape
(
batch_size
,
num_proposal
,
-
1
)
+
size_res
.
squeeze
(
2
)
bbox3d
=
torch
.
cat
([
center
,
bbox_size
,
dir_angle
],
dim
=-
1
)
return
bbox3d
def
split_pred
(
self
,
preds
,
base_xyz
):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results
=
{}
start
,
end
=
0
,
0
preds_trans
=
preds
.
transpose
(
2
,
1
)
# decode objectness score
end
+=
2
results
[
'obj_scores'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
# decode center
end
+=
3
# (batch_size, num_proposal, 3)
results
[
'center'
]
=
base_xyz
+
preds_trans
[...,
start
:
end
]
start
=
end
# decode direction
end
+=
self
.
num_dir_bins
results
[
'dir_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_dir_bins
dir_res_norm
=
preds_trans
[...,
start
:
end
]
start
=
end
results
[
'dir_res_norm'
]
=
dir_res_norm
results
[
'dir_res'
]
=
dir_res_norm
*
(
np
.
pi
/
self
.
num_dir_bins
)
# decode size
end
+=
self
.
num_sizes
results
[
'size_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_sizes
*
3
size_res_norm
=
preds_trans
[...,
start
:
end
]
batch_size
,
num_proposal
=
preds_trans
.
shape
[:
2
]
size_res_norm
=
size_res_norm
.
view
(
[
batch_size
,
num_proposal
,
self
.
num_sizes
,
3
])
start
=
end
results
[
'size_res_norm'
]
=
size_res_norm
mean_sizes
=
preds
.
new_tensor
(
self
.
mean_sizes
)
results
[
'size_res'
]
=
(
size_res_norm
*
mean_sizes
.
unsqueeze
(
0
).
unsqueeze
(
0
))
# decode semantic score
results
[
'sem_scores'
]
=
preds_trans
[...,
start
:]
return
results
def
angle2class
(
self
,
angle
):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle
=
angle
%
(
2
*
np
.
pi
)
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
shifted_angle
=
(
angle
+
angle_per_class
/
2
)
%
(
2
*
np
.
pi
)
angle_cls
=
shifted_angle
//
angle_per_class
angle_res
=
shifted_angle
-
(
angle_cls
*
angle_per_class
+
angle_per_class
/
2
)
return
angle_cls
.
long
(),
angle_res
def
class2angle
(
self
,
angle_cls
,
angle_res
,
limit_period
=
True
):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
angle_center
=
angle_cls
.
float
()
*
angle_per_class
angle
=
angle_center
+
angle_res
if
limit_period
:
angle
[
angle
>
np
.
pi
]
-=
2
*
np
.
pi
return
angle
mmdet3d/core/bbox/transforms.py
View file @
f27d308f
...
...
@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
"""
return
dict
(
boxes_3d
=
bboxes
.
cpu
(),
scores_3d
=
scores
.
cpu
(),
labels_3d
=
labels
.
cpu
())
def
upright_depth_to_lidar_torch
(
points
=
None
,
bboxes
=
None
,
to_bottom_center
=
False
):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if
points
is
not
None
:
points_lidar
=
points
.
clone
()
points_lidar
=
points_lidar
[...,
[
1
,
0
,
2
]]
points_lidar
[...,
1
]
*=
-
1
else
:
points_lidar
=
None
if
bboxes
is
not
None
:
bboxes_lidar
=
bboxes
.
clone
()
bboxes_lidar
=
bboxes_lidar
[...,
[
1
,
0
,
2
,
4
,
3
,
5
,
6
]]
bboxes_lidar
[...,
1
]
*=
-
1
if
to_bottom_center
:
bboxes_lidar
[...,
2
]
-=
0.5
*
bboxes_lidar
[...,
5
]
else
:
bboxes_lidar
=
None
return
points_lidar
,
bboxes_lidar
def
box3d_to_corner3d_upright_depth
(
boxes3d
):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num
=
boxes3d
.
shape
[
0
]
ry
=
boxes3d
[:,
6
:
7
]
l
,
w
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
zeros
=
boxes3d
.
new_zeros
((
boxes_num
,
1
))
ones
=
boxes3d
.
new_ones
((
boxes_num
,
1
))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners
=
torch
.
cat
(
[
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
],
dim
=
1
)
# (N, 8)
z_corners
=
torch
.
cat
(
[
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
-
ry
),
torch
.
sin
(
-
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners3d
=
torch
.
cat
(
(
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners3d
mmdet3d/core/evaluation/indoor_eval.py
View file @
f27d308f
import
numpy
as
np
import
torch
from
mmcv.utils
import
print_log
from
terminaltables
import
AsciiTable
from
mmdet3d.core.bbox.iou_calculators.iou3d_calculator
import
bbox_overlaps_3d
...
...
@@ -263,14 +265,14 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None):
recall
[
iou_idx
][
label
],
precision
[
iou_idx
][
label
],
ap
[
iou_idx
][
label
]
=
ret_values
[
i
][
iou_idx
]
else
:
recall
[
iou_idx
][
label
]
=
[
0
]
precision
[
iou_idx
][
label
]
=
[
0
]
ap
[
iou_idx
][
label
]
=
[
0
]
recall
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
precision
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
ap
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
return
recall
,
precision
,
ap
def
indoor_eval
(
gt_annos
,
dt_annos
,
metric
,
label2cat
):
def
indoor_eval
(
gt_annos
,
dt_annos
,
metric
,
label2cat
,
logger
=
None
):
"""Scannet Evaluation.
Evaluate the result of the detection.
...
...
@@ -280,6 +282,8 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
dt_annos (list[dict]): Detection annotations.
metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}.
logger (logging.Logger | str | None): The way to print the mAP
summary. See `mmdet.utils.print_log()` for details. Default: None.
Return:
dict: Dict of results.
...
...
@@ -301,20 +305,41 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
boxes_3d
=
np
.
array
([],
dtype
=
np
.
float32
),
labels_3d
=
np
.
array
([],
dtype
=
np
.
int64
)))
result_str
=
str
()
result_str
+=
'mAP'
rec
,
prec
,
ap
=
eval_map_recall
(
dt_annos
,
gt_infos
,
metric
)
ret_dict
=
{}
ret_dict
=
dict
()
header
=
[
'classes'
]
table_columns
=
[[
label2cat
[
label
]
for
label
in
ap
[
0
].
keys
()]
+
[
'Overall'
]]
for
i
,
iou_thresh
in
enumerate
(
metric
):
header
.
append
(
f
'AP_
{
iou_thresh
:.
2
f
}
'
)
header
.
append
(
f
'AR_
{
iou_thresh
:.
2
f
}
'
)
rec_list
=
[]
for
label
in
ap
[
i
].
keys
():
ret_dict
[
f
'
{
label2cat
[
label
]
}
_AP_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
ap
[
i
][
label
][
0
])
ret_dict
[
f
'mAP_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
np
.
mean
(
list
(
ap
[
i
].
values
())))
table_columns
.
append
(
list
(
map
(
float
,
list
(
ap
[
i
].
values
()))))
table_columns
[
-
1
]
+=
[
ret_dict
[
f
'mAP_
{
iou_thresh
:.
2
f
}
'
]]
table_columns
[
-
1
]
=
[
f
'
{
x
:.
4
f
}
'
for
x
in
table_columns
[
-
1
]]
for
label
in
rec
[
i
].
keys
():
ret_dict
[
f
'
{
label2cat
[
label
]
}
_rec_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
rec
[
i
][
label
][
-
1
])
rec_list
.
append
(
rec
[
i
][
label
][
-
1
])
ret_dict
[
f
'mAR_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
np
.
mean
(
rec_list
))
table_columns
.
append
(
list
(
map
(
float
,
rec_list
)))
table_columns
[
-
1
]
+=
[
ret_dict
[
f
'mAR_
{
iou_thresh
:.
2
f
}
'
]]
table_columns
[
-
1
]
=
[
f
'
{
x
:.
4
f
}
'
for
x
in
table_columns
[
-
1
]]
table_data
=
[
header
]
table_rows
=
list
(
zip
(
*
table_columns
))
table_data
+=
table_rows
table
=
AsciiTable
(
table_data
)
table
.
inner_footing_row_border
=
True
print_log
(
'
\n
'
+
table
.
table
,
logger
=
logger
)
return
ret_dict
mmdet3d/core/post_processing/__init__.py
View file @
f27d308f
from
mmdet.core.post_processing
import
(
merge_aug_bboxes
,
merge_aug_masks
,
merge_aug_proposals
,
merge_aug_scores
,
multiclass_nms
)
from
.box3d_nms
import
box3d_multiclass_nms
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
__all__
=
[
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'aligned_3d_nms'
]
mmdet3d/core/post_processing/box3d_nms.py
View file @
f27d308f
...
...
@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
)))
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
return
bboxes
,
scores
,
labels
,
dir_scores
def
aligned_3d_nms
(
boxes
,
scores
,
classes
,
thresh
):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1
=
boxes
[:,
0
]
y1
=
boxes
[:,
1
]
z1
=
boxes
[:,
2
]
x2
=
boxes
[:,
3
]
y2
=
boxes
[:,
4
]
z2
=
boxes
[:,
5
]
area
=
(
x2
-
x1
)
*
(
y2
-
y1
)
*
(
z2
-
z1
)
zero
=
boxes
.
new_zeros
(
1
,
)
score_sorted
=
torch
.
argsort
(
scores
)
pick
=
[]
while
(
score_sorted
.
shape
[
0
]
!=
0
):
last
=
score_sorted
.
shape
[
0
]
i
=
score_sorted
[
-
1
]
pick
.
append
(
i
)
xx1
=
torch
.
max
(
x1
[
i
],
x1
[
score_sorted
[:
last
-
1
]])
yy1
=
torch
.
max
(
y1
[
i
],
y1
[
score_sorted
[:
last
-
1
]])
zz1
=
torch
.
max
(
z1
[
i
],
z1
[
score_sorted
[:
last
-
1
]])
xx2
=
torch
.
min
(
x2
[
i
],
x2
[
score_sorted
[:
last
-
1
]])
yy2
=
torch
.
min
(
y2
[
i
],
y2
[
score_sorted
[:
last
-
1
]])
zz2
=
torch
.
min
(
z2
[
i
],
z2
[
score_sorted
[:
last
-
1
]])
classes1
=
classes
[
i
]
classes2
=
classes
[
score_sorted
[:
last
-
1
]]
inter_l
=
torch
.
max
(
zero
,
xx2
-
xx1
)
inter_w
=
torch
.
max
(
zero
,
yy2
-
yy1
)
inter_h
=
torch
.
max
(
zero
,
zz2
-
zz1
)
inter
=
inter_l
*
inter_w
*
inter_h
iou
=
inter
/
(
area
[
i
]
+
area
[
score_sorted
[:
last
-
1
]]
-
inter
)
iou
=
iou
*
(
classes1
==
classes2
).
float
()
score_sorted
=
score_sorted
[
torch
.
nonzero
(
iou
<=
thresh
).
flatten
()]
indices
=
boxes
.
new_tensor
(
pick
,
dtype
=
torch
.
long
)
return
indices
mmdet3d/datasets/__init__.py
View file @
f27d308f
...
...
@@ -8,8 +8,8 @@ from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from
.nuscenes_dataset
import
NuScenesDataset
from
.pipelines
import
(
GlobalRotScale
,
IndoorFlipData
,
IndoorGlobalRotScale
,
IndoorPointSample
,
IndoorPointsColorJitter
,
IndoorPointsColorNormalize
,
LoadAnnotations3D
,
LoadPointsFromFile
,
ObjectNoise
,
ObjectRangeFilter
,
LoadAnnotations3D
,
LoadPointsFromFile
,
NormalizePointsColor
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
from
.scannet_dataset
import
ScanNetDataset
...
...
@@ -21,7 +21,7 @@ __all__ = [
'CocoDataset'
,
'Kitti2DDataset'
,
'NuScenesDataset'
,
'ObjectSample'
,
'RandomFlip3D'
,
'ObjectNoise'
,
'GlobalRotScale'
,
'PointShuffle'
,
'ObjectRangeFilter'
,
'PointsRangeFilter'
,
'Collect3D'
,
'LoadPointsFromFile'
,
'
Indoor
PointsColor
Normalize
'
,
'IndoorPointSample'
,
'LoadPointsFromFile'
,
'
Normalize
PointsColor'
,
'IndoorPointSample'
,
'LoadAnnotations3D'
,
'IndoorPointsColorJitter'
,
'IndoorGlobalRotScale'
,
'IndoorFlipData'
,
'SUNRGBDDataset'
,
'ScanNetDataset'
,
'Custom3DDataset'
]
mmdet3d/datasets/custom_3d.py
View file @
f27d308f
...
...
@@ -3,7 +3,6 @@ import tempfile
import
mmcv
import
numpy
as
np
from
mmcv.utils
import
print_log
from
torch.utils.data
import
Dataset
from
mmdet.datasets
import
DATASETS
...
...
@@ -19,12 +18,14 @@ class Custom3DDataset(Dataset):
pipeline
=
None
,
classes
=
None
,
modality
=
None
,
filter_empty_gt
=
True
,
test_mode
=
False
):
super
().
__init__
()
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
filter_empty_gt
=
filter_empty_gt
self
.
CLASSES
=
self
.
get_classes
(
classes
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
...
...
@@ -52,7 +53,7 @@ class Custom3DDataset(Dataset):
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
if
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
if
self
.
filter_empty_gt
and
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
return
None
return
input_dict
...
...
@@ -67,7 +68,8 @@ class Custom3DDataset(Dataset):
return
None
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
:
if
self
.
filter_empty_gt
and
(
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
):
return
None
return
example
...
...
@@ -124,23 +126,20 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
"""
from
mmdet3d.core.evaluation
import
indoor_eval
assert
isinstance
(
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
assert
len
(
results
)
>
0
,
f
'Expect length of results > 0.'
assert
len
(
results
)
==
len
(
self
.
data_infos
)
assert
isinstance
(
results
[
0
],
dict
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
label2cat
=
{
i
:
cat_id
for
i
,
cat_id
in
enumerate
(
self
.
CLASSES
)}
ret_dict
=
indoor_eval
(
gt_annos
,
results
,
iou_thr
,
label2cat
)
result_str
=
str
()
for
key
,
val
in
ret_dict
.
items
():
result_str
+=
f
'
{
key
}
:
{
val
}
\n
'
mAP_25
,
mAP_50
=
ret_dict
[
'mAP_0.25'
],
ret_dict
[
'mAP_0.50'
]
result_str
+=
f
'mAP(0.25):
{
mAP_25
}
mAP(0.50):
{
mAP_50
}
'
print_log
(
'
\n
'
+
result_str
,
logger
=
logger
)
ret_dict
=
indoor_eval
(
gt_annos
,
results
,
iou_thr
,
label2cat
,
logger
=
logger
)
return
ret_dict
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
f27d308f
...
...
@@ -3,10 +3,11 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScale
,
IndoorPointsColorJitter
)
from
.indoor_loading
import
(
IndoorPointsColorNormalize
,
LoadAnnotations3D
,
LoadPointsFromFile
)
from
.indoor_loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
NormalizePointsColor
)
from
.indoor_sample
import
IndoorPointSample
from
.loading
import
LoadMultiViewImageFromFiles
from
.point_seg_class_mapping
import
PointSegClassMapping
from
.train_aug
import
(
GlobalRotScale
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
...
...
@@ -17,6 +18,6 @@ __all__ = [
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'IndoorGlobalRotScale'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'MMDataBaseSampler'
,
'
Indoor
PointsColor
Normalize
'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
'MMDataBaseSampler'
,
'
Normalize
PointsColor'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
,
'PointSegClassMapping'
]
mmdet3d/datasets/pipelines/indoor_augment.py
View file @
f27d308f
...
...
@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results
[
'scale_ratio'
]
=
scale_ratio
results
[
'points'
]
=
points
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
np
.
float32
)
return
results
def
__repr__
(
self
):
...
...
mmdet3d/datasets/pipelines/indoor_loading.py
View file @
f27d308f
...
...
@@ -6,8 +6,8 @@ from mmdet.datasets.pipelines import LoadAnnotations
@
PIPELINES
.
register_module
()
class
Indoor
PointsColor
Normalize
(
object
):
"""
Indoor points color normalize
class
Normalize
PointsColor
(
object
):
"""
Normalize color of points
Normalize color of the points.
...
...
@@ -45,9 +45,16 @@ class LoadPointsFromFile(object):
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def
__init__
(
self
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
shift_height
=
False
):
def
__init__
(
self
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
shift_height
=
False
,
file_client_args
=
dict
(
backend
=
'disk'
)):
self
.
shift_height
=
shift_height
if
isinstance
(
use_dim
,
int
):
use_dim
=
list
(
range
(
use_dim
))
...
...
@@ -56,8 +63,16 @@ class LoadPointsFromFile(object):
self
.
load_dim
=
load_dim
self
.
use_dim
=
use_dim
self
.
file_client_args
=
file_client_args
.
copy
()
self
.
file_client
=
None
def
_load_points
(
self
,
pts_filename
):
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
pts_bytes
=
self
.
file_client
.
get
(
pts_filename
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_filename
)
if
pts_filename
.
endswith
(
'.npy'
):
points
=
np
.
load
(
pts_filename
)
...
...
@@ -113,6 +128,9 @@ class LoadAnnotations3D(LoadAnnotations):
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def
__init__
(
self
,
...
...
@@ -124,8 +142,15 @@ class LoadAnnotations3D(LoadAnnotations):
with_label
=
False
,
with_mask
=
False
,
with_seg
=
False
,
poly2mask
=
True
):
super
().
__init__
(
with_bbox
,
with_label
,
with_mask
,
with_seg
,
poly2mask
)
poly2mask
=
True
,
file_client_args
=
dict
(
backend
=
'disk'
)):
super
().
__init__
(
with_bbox
,
with_label
,
with_mask
,
with_seg
,
poly2mask
,
file_client_args
=
file_client_args
)
self
.
with_bbox_3d
=
with_bbox_3d
self
.
with_label_3d
=
with_label_3d
self
.
with_mask_3d
=
with_mask_3d
...
...
@@ -142,16 +167,35 @@ class LoadAnnotations3D(LoadAnnotations):
def
_load_masks_3d
(
self
,
results
):
pts_instance_mask_path
=
results
[
'ann_info'
][
'pts_instance_mask_path'
]
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
mask_bytes
=
self
.
file_client
.
get
(
pts_instance_mask_path
)
pts_instance_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
)
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_instance_mask_path
)
pts_instance_mask
=
np
.
fromfile
(
pts_instance_mask_path
,
dtype
=
np
.
long
)
pts_instance_mask
=
np
.
fromfile
(
pts_instance_mask_path
,
dtype
=
np
.
long
)
results
[
'pts_instance_mask'
]
=
pts_instance_mask
results
[
'pts_mask_fields'
].
append
(
results
[
'pts_instance_mask'
])
return
results
def
_load_semantic_seg_3d
(
self
,
results
):
pts_semantic_mask_path
=
results
[
'ann_info'
][
'pts_semantic_mask_path'
]
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
mask_bytes
=
self
.
file_client
.
get
(
pts_semantic_mask_path
)
# add .copy() to fix read-only bug
pts_semantic_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
).
copy
()
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_semantic_mask_path
)
pts_semantic_mask
=
np
.
fromfile
(
pts_semantic_mask_path
,
dtype
=
np
.
long
)
pts_semantic_mask
=
np
.
fromfile
(
pts_semantic_mask_path
,
dtype
=
np
.
long
)
results
[
'pts_semantic_mask'
]
=
pts_semantic_mask
results
[
'pts_seg_fields'
].
append
(
results
[
'pts_semantic_mask'
])
return
results
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment