Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f27d308f
Commit
f27d308f
authored
Jun 07, 2020
by
yinchimaoliang
Browse files
merge master
parents
c66ae813
27ebcfac
Changes
80
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1267 additions
and
55 deletions
+1267
-55
.isort.cfg
.isort.cfg
+1
-1
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-1
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+243
-0
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
...lient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
+218
-0
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
...second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+2
-4
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+2
-4
configs/scannet/votenet_8x8_scannet-3d-18class.py
configs/scannet/votenet_8x8_scannet-3d-18class.py
+188
-0
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
+164
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+5
-2
mmdet3d/core/bbox/coders/__init__.py
mmdet3d/core/bbox/coders/__init__.py
+4
-1
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
+193
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+84
-0
mmdet3d/core/evaluation/indoor_eval.py
mmdet3d/core/evaluation/indoor_eval.py
+32
-7
mmdet3d/core/post_processing/__init__.py
mmdet3d/core/post_processing/__init__.py
+3
-2
mmdet3d/core/post_processing/box3d_nms.py
mmdet3d/core/post_processing/box3d_nms.py
+49
-0
mmdet3d/datasets/__init__.py
mmdet3d/datasets/__init__.py
+3
-3
mmdet3d/datasets/custom_3d.py
mmdet3d/datasets/custom_3d.py
+10
-11
mmdet3d/datasets/pipelines/__init__.py
mmdet3d/datasets/pipelines/__init__.py
+5
-4
mmdet3d/datasets/pipelines/indoor_augment.py
mmdet3d/datasets/pipelines/indoor_augment.py
+1
-1
mmdet3d/datasets/pipelines/indoor_loading.py
mmdet3d/datasets/pipelines/indoor_loading.py
+58
-14
No files found.
.isort.cfg
View file @
f27d308f
...
...
@@ -3,6 +3,6 @@ line_length = 79
multi_line_output = 0
known_standard_library = setuptools
known_first_party = mmdet,mmdet3d
known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,torch,torchvision
known_third_party = cv2,mmcv,numba,numpy,nuscenes,plyfile,pycocotools,pyquaternion,pytest,scipy,shapely,six,skimage,
terminaltables,
torch,torchvision
no_lines_before = STDLIB,LOCALFOLDER
default_section = THIRDPARTY
.pre-commit-config.yaml
View file @
f27d308f
...
...
@@ -10,11 +10,12 @@ repos:
-
repo
:
https://github.com/timothycrosley/isort
rev
:
4.3.21
hooks
:
-
id
:
isort
-
id
:
isort
-
repo
:
https://github.com/pre-commit/mirrors-yapf
rev
:
v0.30.0
hooks
:
-
id
:
yapf
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
rev
:
v2.5.0
hooks
:
...
...
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
0 → 100644
View file @
f27d308f
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
num_input_features
=
4
,
num_filters
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
],
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
],
sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(),
classes
=
class_names
,
sample_groups
=
dict
(
bus
=
4
,
trailer
=
4
,
truck
=
4
,
))
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
evaluation
=
dict
(
interval
=
24
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
24
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
0 → 100644
View file @
f27d308f
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.3
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
(
data
=
's3://kitti_data/'
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
View file @
f27d308f
...
...
@@ -255,13 +255,11 @@ optimizer = dict(type='AdamW', lr=0.003, betas=(0.95, 0.99), weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'
c
osine'
,
policy
=
'
C
osine
Anealing
'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
,
)
min_lr_ratio
=
1e-5
)
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
f27d308f
...
...
@@ -207,13 +207,11 @@ optimizer = dict(
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'
c
osine'
,
policy
=
'
C
osine
Anealing
'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
,
)
min_lr_ratio
=
1e-5
)
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
...
...
configs/scannet/votenet_8x8_scannet-3d-18class.py
0 → 100644
View file @
f27d308f
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
18
,
num_dir_bins
=
1
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'ScanNetDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_mask_3d
=
True
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
)),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
,
flip_ratio_xz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
36
,
1
/
36
],
scale_range
=
None
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
40000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
10
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_scannet'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/sunrgbd/votenet_16x8_sunrgbd-3d-10class.py
0 → 100644
View file @
f27d308f
# model settings
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
pool_mod
=
'max'
),
bbox_head
=
dict
(
type
=
'VoteHead'
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
10
,
num_dir_bins
=
12
,
with_rot
=
True
,
mean_sizes
=
[[
2.114256
,
1.620300
,
0.927272
],
[
0.791118
,
1.279516
,
0.718182
],
[
0.923508
,
1.867419
,
0.845495
],
[
0.591958
,
0.552978
,
0.827272
],
[
0.699104
,
0.454178
,
0.75625
],
[
0.69519
,
1.346299
,
0.736364
],
[
0.528526
,
1.002642
,
1.172878
],
[
0.500618
,
0.632163
,
0.683424
],
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)))
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)
# dataset settings
dataset_type
=
'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'IndoorFlipData'
,
flip_ratio_yz
=
0.5
),
dict
(
type
=
'IndoorGlobalRotScale'
,
shift_height
=
True
,
rot_range
=
[
-
1
/
6
,
1
/
6
],
scale_range
=
[
0.85
,
1.15
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'IndoorPointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
16
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
lr
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
30
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
36
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/votenet_sunrgbd'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
f27d308f
...
...
@@ -8,7 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
bbox3d2result
,
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
box3d_to_corner3d_upright_depth
,
boxes3d_to_bev_torch_lidar
)
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
...
...
@@ -22,5 +24,6 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'box3d_to_corner3d_upright_depth'
]
mmdet3d/core/bbox/coders/__init__.py
View file @
f27d308f
from
mmdet.core.bbox
import
build_bbox_coder
from
.delta_xyzwhlr_bbox_coder
import
DeltaXYZWLHRBBoxCoder
from
.partial_bin_based_bbox_coder
import
PartialBinBasedBBoxCoder
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
]
__all__
=
[
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'PartialBinBasedBBoxCoder'
]
mmdet3d/core/bbox/coders/partial_bin_based_bbox_coder.py
0 → 100644
View file @
f27d308f
import
numpy
as
np
import
torch
from
mmdet.core.bbox
import
BaseBBoxCoder
from
mmdet.core.bbox.builder
import
BBOX_CODERS
@
BBOX_CODERS
.
register_module
()
class
PartialBinBasedBBoxCoder
(
BaseBBoxCoder
):
"""Partial bin based bbox coder
Args:
num_dir_bins (int): Number of bins to encode direction angle.
num_sizes (int): Number of size clusters.
mean_sizes (list[list[int]]): Mean size of bboxes in each class.
with_rot (bool): Whether the bbox is with rotation.
"""
def
__init__
(
self
,
num_dir_bins
,
num_sizes
,
mean_sizes
,
with_rot
=
True
):
super
(
PartialBinBasedBBoxCoder
,
self
).
__init__
()
assert
len
(
mean_sizes
)
==
num_sizes
self
.
num_dir_bins
=
num_dir_bins
self
.
num_sizes
=
num_sizes
self
.
mean_sizes
=
mean_sizes
self
.
with_rot
=
with_rot
def
encode
(
self
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Encode ground truth to prediction targets.
Args:
gt_bboxes_3d (Tensor): 3d gt bboxes with shape (n, 7).
gt_labels_3d (Tensor): Gt classes.
Returns:
tuple: Targets of center, size and direction.
"""
# generate center target
center_target
=
gt_bboxes_3d
[...,
0
:
3
]
# generate bbox size target
size_class_target
=
gt_labels_3d
size_res_target
=
gt_bboxes_3d
[...,
3
:
6
]
-
gt_bboxes_3d
.
new_tensor
(
self
.
mean_sizes
)[
size_class_target
]
# generate dir target
box_num
=
gt_bboxes_3d
.
shape
[
0
]
if
self
.
with_rot
:
(
dir_class_target
,
dir_res_target
)
=
self
.
angle2class
(
gt_bboxes_3d
[...,
6
])
else
:
dir_class_target
=
gt_labels_3d
.
new_zeros
(
box_num
)
dir_res_target
=
gt_bboxes_3d
.
new_zeros
(
box_num
)
return
(
center_target
,
size_class_target
,
size_res_target
,
dir_class_target
,
dir_res_target
)
def
decode
(
self
,
bbox_out
):
"""Decode predicted parts to bbox3d.
Args:
bbox_out (dict): predictions from model, should contain keys below
- center: predicted bottom center of bboxes.
- dir_class: predicted bbox direction class.
- dir_res: predicted bbox direction residual.
- size_class: predicted bbox size class.
- size_res: predicted bbox size residual.
Returns:
Tensor: decoded bbox3d with shape (batch, n, 7)
"""
center
=
bbox_out
[
'center'
]
batch_size
,
num_proposal
=
center
.
shape
[:
2
]
# decode heading angle
if
self
.
with_rot
:
dir_class
=
torch
.
argmax
(
bbox_out
[
'dir_class'
],
-
1
)
dir_res
=
torch
.
gather
(
bbox_out
[
'dir_res'
],
2
,
dir_class
.
unsqueeze
(
-
1
))
dir_res
.
squeeze_
(
2
)
dir_angle
=
self
.
class2angle
(
dir_class
,
dir_res
).
reshape
(
batch_size
,
num_proposal
,
1
)
else
:
dir_angle
=
center
.
new_zeros
(
batch_size
,
num_proposal
,
1
)
# decode bbox size
size_class
=
torch
.
argmax
(
bbox_out
[
'size_class'
],
-
1
,
keepdim
=
True
)
size_res
=
torch
.
gather
(
bbox_out
[
'size_res'
],
2
,
size_class
.
unsqueeze
(
-
1
).
repeat
(
1
,
1
,
1
,
3
))
mean_sizes
=
center
.
new_tensor
(
self
.
mean_sizes
)
size_base
=
torch
.
index_select
(
mean_sizes
,
0
,
size_class
.
reshape
(
-
1
))
bbox_size
=
size_base
.
reshape
(
batch_size
,
num_proposal
,
-
1
)
+
size_res
.
squeeze
(
2
)
bbox3d
=
torch
.
cat
([
center
,
bbox_size
,
dir_angle
],
dim
=-
1
)
return
bbox3d
def
split_pred
(
self
,
preds
,
base_xyz
):
"""Split predicted features to specific parts.
Args:
preds (Tensor): predicted features to split.
base_xyz (Tensor): coordinates of points.
Returns:
dict: split results.
"""
results
=
{}
start
,
end
=
0
,
0
preds_trans
=
preds
.
transpose
(
2
,
1
)
# decode objectness score
end
+=
2
results
[
'obj_scores'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
# decode center
end
+=
3
# (batch_size, num_proposal, 3)
results
[
'center'
]
=
base_xyz
+
preds_trans
[...,
start
:
end
]
start
=
end
# decode direction
end
+=
self
.
num_dir_bins
results
[
'dir_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_dir_bins
dir_res_norm
=
preds_trans
[...,
start
:
end
]
start
=
end
results
[
'dir_res_norm'
]
=
dir_res_norm
results
[
'dir_res'
]
=
dir_res_norm
*
(
np
.
pi
/
self
.
num_dir_bins
)
# decode size
end
+=
self
.
num_sizes
results
[
'size_class'
]
=
preds_trans
[...,
start
:
end
]
start
=
end
end
+=
self
.
num_sizes
*
3
size_res_norm
=
preds_trans
[...,
start
:
end
]
batch_size
,
num_proposal
=
preds_trans
.
shape
[:
2
]
size_res_norm
=
size_res_norm
.
view
(
[
batch_size
,
num_proposal
,
self
.
num_sizes
,
3
])
start
=
end
results
[
'size_res_norm'
]
=
size_res_norm
mean_sizes
=
preds
.
new_tensor
(
self
.
mean_sizes
)
results
[
'size_res'
]
=
(
size_res_norm
*
mean_sizes
.
unsqueeze
(
0
).
unsqueeze
(
0
))
# decode semantic score
results
[
'sem_scores'
]
=
preds_trans
[...,
start
:]
return
results
def
angle2class
(
self
,
angle
):
"""Convert continuous angle to a discrete class and a residual.
Convert continuous angle to a discrete class and a small
regression number from class center angle to current angle.
Args:
angle (Tensor): Angle is from 0-2pi (or -pi~pi), class center at
0, 1*(2pi/N), 2*(2pi/N) ... (N-1)*(2pi/N)
Returns:
tuple: Encoded discrete class and residual.
"""
angle
=
angle
%
(
2
*
np
.
pi
)
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
shifted_angle
=
(
angle
+
angle_per_class
/
2
)
%
(
2
*
np
.
pi
)
angle_cls
=
shifted_angle
//
angle_per_class
angle_res
=
shifted_angle
-
(
angle_cls
*
angle_per_class
+
angle_per_class
/
2
)
return
angle_cls
.
long
(),
angle_res
def
class2angle
(
self
,
angle_cls
,
angle_res
,
limit_period
=
True
):
"""Inverse function to angle2class
Args:
angle_cls (Tensor): Angle class to decode.
angle_res (Tensor): Angle residual to decode.
limit_period (bool): Whether to limit angle to [-pi, pi].
Returns:
Tensor: angle decoded from angle_cls and angle_res.
"""
angle_per_class
=
2
*
np
.
pi
/
float
(
self
.
num_dir_bins
)
angle_center
=
angle_cls
.
float
()
*
angle_per_class
angle
=
angle_center
+
angle_res
if
limit_period
:
angle
[
angle
>
np
.
pi
]
-=
2
*
np
.
pi
return
angle
mmdet3d/core/bbox/transforms.py
View file @
f27d308f
...
...
@@ -84,3 +84,87 @@ def bbox3d2result(bboxes, scores, labels):
"""
return
dict
(
boxes_3d
=
bboxes
.
cpu
(),
scores_3d
=
scores
.
cpu
(),
labels_3d
=
labels
.
cpu
())
def
upright_depth_to_lidar_torch
(
points
=
None
,
bboxes
=
None
,
to_bottom_center
=
False
):
"""Convert points and boxes in upright depth coordinate to lidar.
Args:
points (None | Tensor): points in upright depth coordinate.
bboxes (None | Tensor): bboxes in upright depth coordinate.
to_bottom_center (bool): covert bboxes to bottom center.
Returns:
tuple: points and bboxes in lidar coordinate.
"""
if
points
is
not
None
:
points_lidar
=
points
.
clone
()
points_lidar
=
points_lidar
[...,
[
1
,
0
,
2
]]
points_lidar
[...,
1
]
*=
-
1
else
:
points_lidar
=
None
if
bboxes
is
not
None
:
bboxes_lidar
=
bboxes
.
clone
()
bboxes_lidar
=
bboxes_lidar
[...,
[
1
,
0
,
2
,
4
,
3
,
5
,
6
]]
bboxes_lidar
[...,
1
]
*=
-
1
if
to_bottom_center
:
bboxes_lidar
[...,
2
]
-=
0.5
*
bboxes_lidar
[...,
5
]
else
:
bboxes_lidar
=
None
return
points_lidar
,
bboxes_lidar
def
box3d_to_corner3d_upright_depth
(
boxes3d
):
"""Convert box3d to corner3d in upright depth coordinate
Args:
boxes3d (Tensor): boxes with shape [n,7] in upright depth coordinate
Returns:
Tensor: boxes with [n, 8, 3] in upright depth coordinate
"""
boxes_num
=
boxes3d
.
shape
[
0
]
ry
=
boxes3d
[:,
6
:
7
]
l
,
w
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
zeros
=
boxes3d
.
new_zeros
((
boxes_num
,
1
))
ones
=
boxes3d
.
new_ones
((
boxes_num
,
1
))
# zeros = torch.cuda.FloatTensor(boxes_num, 1).fill_(0)
# ones = torch.cuda.FloatTensor(boxes_num, 1).fill_(1)
x_corners
=
torch
.
cat
(
[
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
],
dim
=
1
)
# (N, 8)
z_corners
=
torch
.
cat
(
[
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
-
ry
),
torch
.
sin
(
-
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners3d
=
torch
.
cat
(
(
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners3d
mmdet3d/core/evaluation/indoor_eval.py
View file @
f27d308f
import
numpy
as
np
import
torch
from
mmcv.utils
import
print_log
from
terminaltables
import
AsciiTable
from
mmdet3d.core.bbox.iou_calculators.iou3d_calculator
import
bbox_overlaps_3d
...
...
@@ -263,14 +265,14 @@ def eval_map_recall(det_infos, gt_infos, ovthresh=None):
recall
[
iou_idx
][
label
],
precision
[
iou_idx
][
label
],
ap
[
iou_idx
][
label
]
=
ret_values
[
i
][
iou_idx
]
else
:
recall
[
iou_idx
][
label
]
=
[
0
]
precision
[
iou_idx
][
label
]
=
[
0
]
ap
[
iou_idx
][
label
]
=
[
0
]
recall
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
precision
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
ap
[
iou_idx
][
label
]
=
np
.
zeros
(
1
)
return
recall
,
precision
,
ap
def
indoor_eval
(
gt_annos
,
dt_annos
,
metric
,
label2cat
):
def
indoor_eval
(
gt_annos
,
dt_annos
,
metric
,
label2cat
,
logger
=
None
):
"""Scannet Evaluation.
Evaluate the result of the detection.
...
...
@@ -280,6 +282,8 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
dt_annos (list[dict]): Detection annotations.
metric (list[float]): AP IoU thresholds.
label2cat (dict): {label: cat}.
logger (logging.Logger | str | None): The way to print the mAP
summary. See `mmdet.utils.print_log()` for details. Default: None.
Return:
dict: Dict of results.
...
...
@@ -301,20 +305,41 @@ def indoor_eval(gt_annos, dt_annos, metric, label2cat):
boxes_3d
=
np
.
array
([],
dtype
=
np
.
float32
),
labels_3d
=
np
.
array
([],
dtype
=
np
.
int64
)))
result_str
=
str
()
result_str
+=
'mAP'
rec
,
prec
,
ap
=
eval_map_recall
(
dt_annos
,
gt_infos
,
metric
)
ret_dict
=
{}
ret_dict
=
dict
()
header
=
[
'classes'
]
table_columns
=
[[
label2cat
[
label
]
for
label
in
ap
[
0
].
keys
()]
+
[
'Overall'
]]
for
i
,
iou_thresh
in
enumerate
(
metric
):
header
.
append
(
f
'AP_
{
iou_thresh
:.
2
f
}
'
)
header
.
append
(
f
'AR_
{
iou_thresh
:.
2
f
}
'
)
rec_list
=
[]
for
label
in
ap
[
i
].
keys
():
ret_dict
[
f
'
{
label2cat
[
label
]
}
_AP_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
ap
[
i
][
label
][
0
])
ret_dict
[
f
'mAP_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
np
.
mean
(
list
(
ap
[
i
].
values
())))
table_columns
.
append
(
list
(
map
(
float
,
list
(
ap
[
i
].
values
()))))
table_columns
[
-
1
]
+=
[
ret_dict
[
f
'mAP_
{
iou_thresh
:.
2
f
}
'
]]
table_columns
[
-
1
]
=
[
f
'
{
x
:.
4
f
}
'
for
x
in
table_columns
[
-
1
]]
for
label
in
rec
[
i
].
keys
():
ret_dict
[
f
'
{
label2cat
[
label
]
}
_rec_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
rec
[
i
][
label
][
-
1
])
rec_list
.
append
(
rec
[
i
][
label
][
-
1
])
ret_dict
[
f
'mAR_
{
iou_thresh
:.
2
f
}
'
]
=
float
(
np
.
mean
(
rec_list
))
table_columns
.
append
(
list
(
map
(
float
,
rec_list
)))
table_columns
[
-
1
]
+=
[
ret_dict
[
f
'mAR_
{
iou_thresh
:.
2
f
}
'
]]
table_columns
[
-
1
]
=
[
f
'
{
x
:.
4
f
}
'
for
x
in
table_columns
[
-
1
]]
table_data
=
[
header
]
table_rows
=
list
(
zip
(
*
table_columns
))
table_data
+=
table_rows
table
=
AsciiTable
(
table_data
)
table
.
inner_footing_row_border
=
True
print_log
(
'
\n
'
+
table
.
table
,
logger
=
logger
)
return
ret_dict
mmdet3d/core/post_processing/__init__.py
View file @
f27d308f
from
mmdet.core.post_processing
import
(
merge_aug_bboxes
,
merge_aug_masks
,
merge_aug_proposals
,
merge_aug_scores
,
multiclass_nms
)
from
.box3d_nms
import
box3d_multiclass_nms
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
__all__
=
[
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'aligned_3d_nms'
]
mmdet3d/core/post_processing/box3d_nms.py
View file @
f27d308f
...
...
@@ -64,3 +64,52 @@ def box3d_multiclass_nms(mlvl_bboxes,
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
)))
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
return
bboxes
,
scores
,
labels
,
dir_scores
def
aligned_3d_nms
(
boxes
,
scores
,
classes
,
thresh
):
"""3d nms for aligned boxes.
Args:
boxes (Tensor): Aligned box with shape [n, 6].
scores (Tensor): Scores of each box.
classes (Tensor): Class of each box.
thresh (float): Iou threshold for nms.
Returns:
Tensor: Indices of selected boxes.
"""
x1
=
boxes
[:,
0
]
y1
=
boxes
[:,
1
]
z1
=
boxes
[:,
2
]
x2
=
boxes
[:,
3
]
y2
=
boxes
[:,
4
]
z2
=
boxes
[:,
5
]
area
=
(
x2
-
x1
)
*
(
y2
-
y1
)
*
(
z2
-
z1
)
zero
=
boxes
.
new_zeros
(
1
,
)
score_sorted
=
torch
.
argsort
(
scores
)
pick
=
[]
while
(
score_sorted
.
shape
[
0
]
!=
0
):
last
=
score_sorted
.
shape
[
0
]
i
=
score_sorted
[
-
1
]
pick
.
append
(
i
)
xx1
=
torch
.
max
(
x1
[
i
],
x1
[
score_sorted
[:
last
-
1
]])
yy1
=
torch
.
max
(
y1
[
i
],
y1
[
score_sorted
[:
last
-
1
]])
zz1
=
torch
.
max
(
z1
[
i
],
z1
[
score_sorted
[:
last
-
1
]])
xx2
=
torch
.
min
(
x2
[
i
],
x2
[
score_sorted
[:
last
-
1
]])
yy2
=
torch
.
min
(
y2
[
i
],
y2
[
score_sorted
[:
last
-
1
]])
zz2
=
torch
.
min
(
z2
[
i
],
z2
[
score_sorted
[:
last
-
1
]])
classes1
=
classes
[
i
]
classes2
=
classes
[
score_sorted
[:
last
-
1
]]
inter_l
=
torch
.
max
(
zero
,
xx2
-
xx1
)
inter_w
=
torch
.
max
(
zero
,
yy2
-
yy1
)
inter_h
=
torch
.
max
(
zero
,
zz2
-
zz1
)
inter
=
inter_l
*
inter_w
*
inter_h
iou
=
inter
/
(
area
[
i
]
+
area
[
score_sorted
[:
last
-
1
]]
-
inter
)
iou
=
iou
*
(
classes1
==
classes2
).
float
()
score_sorted
=
score_sorted
[
torch
.
nonzero
(
iou
<=
thresh
).
flatten
()]
indices
=
boxes
.
new_tensor
(
pick
,
dtype
=
torch
.
long
)
return
indices
mmdet3d/datasets/__init__.py
View file @
f27d308f
...
...
@@ -8,8 +8,8 @@ from .loader import DistributedGroupSampler, GroupSampler, build_dataloader
from
.nuscenes_dataset
import
NuScenesDataset
from
.pipelines
import
(
GlobalRotScale
,
IndoorFlipData
,
IndoorGlobalRotScale
,
IndoorPointSample
,
IndoorPointsColorJitter
,
IndoorPointsColorNormalize
,
LoadAnnotations3D
,
LoadPointsFromFile
,
ObjectNoise
,
ObjectRangeFilter
,
LoadAnnotations3D
,
LoadPointsFromFile
,
NormalizePointsColor
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
from
.scannet_dataset
import
ScanNetDataset
...
...
@@ -21,7 +21,7 @@ __all__ = [
'CocoDataset'
,
'Kitti2DDataset'
,
'NuScenesDataset'
,
'ObjectSample'
,
'RandomFlip3D'
,
'ObjectNoise'
,
'GlobalRotScale'
,
'PointShuffle'
,
'ObjectRangeFilter'
,
'PointsRangeFilter'
,
'Collect3D'
,
'LoadPointsFromFile'
,
'
Indoor
PointsColor
Normalize
'
,
'IndoorPointSample'
,
'LoadPointsFromFile'
,
'
Normalize
PointsColor'
,
'IndoorPointSample'
,
'LoadAnnotations3D'
,
'IndoorPointsColorJitter'
,
'IndoorGlobalRotScale'
,
'IndoorFlipData'
,
'SUNRGBDDataset'
,
'ScanNetDataset'
,
'Custom3DDataset'
]
mmdet3d/datasets/custom_3d.py
View file @
f27d308f
...
...
@@ -3,7 +3,6 @@ import tempfile
import
mmcv
import
numpy
as
np
from
mmcv.utils
import
print_log
from
torch.utils.data
import
Dataset
from
mmdet.datasets
import
DATASETS
...
...
@@ -19,12 +18,14 @@ class Custom3DDataset(Dataset):
pipeline
=
None
,
classes
=
None
,
modality
=
None
,
filter_empty_gt
=
True
,
test_mode
=
False
):
super
().
__init__
()
self
.
data_root
=
data_root
self
.
ann_file
=
ann_file
self
.
test_mode
=
test_mode
self
.
modality
=
modality
self
.
filter_empty_gt
=
filter_empty_gt
self
.
CLASSES
=
self
.
get_classes
(
classes
)
self
.
data_infos
=
self
.
load_annotations
(
self
.
ann_file
)
...
...
@@ -52,7 +53,7 @@ class Custom3DDataset(Dataset):
if
not
self
.
test_mode
:
annos
=
self
.
get_ann_info
(
index
)
input_dict
[
'ann_info'
]
=
annos
if
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
if
self
.
filter_empty_gt
and
len
(
annos
[
'gt_bboxes_3d'
])
==
0
:
return
None
return
input_dict
...
...
@@ -67,7 +68,8 @@ class Custom3DDataset(Dataset):
return
None
self
.
pre_pipeline
(
input_dict
)
example
=
self
.
pipeline
(
input_dict
)
if
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
:
if
self
.
filter_empty_gt
and
(
example
is
None
or
len
(
example
[
'gt_bboxes_3d'
].
_data
)
==
0
):
return
None
return
example
...
...
@@ -124,23 +126,20 @@ class Custom3DDataset(Dataset):
results (list[dict]): List of results.
metric (str | list[str]): Metrics to be evaluated.
iou_thr (list[float]): AP IoU thresholds.
"""
from
mmdet3d.core.evaluation
import
indoor_eval
assert
isinstance
(
results
,
list
),
f
'Expect results to be list, got
{
type
(
results
)
}
.'
assert
len
(
results
)
>
0
,
f
'Expect length of results > 0.'
assert
len
(
results
)
==
len
(
self
.
data_infos
)
assert
isinstance
(
results
[
0
],
dict
),
f
'Expect elements in results to be dict, got
{
type
(
results
[
0
])
}
.'
gt_annos
=
[
info
[
'annos'
]
for
info
in
self
.
data_infos
]
label2cat
=
{
i
:
cat_id
for
i
,
cat_id
in
enumerate
(
self
.
CLASSES
)}
ret_dict
=
indoor_eval
(
gt_annos
,
results
,
iou_thr
,
label2cat
)
result_str
=
str
()
for
key
,
val
in
ret_dict
.
items
():
result_str
+=
f
'
{
key
}
:
{
val
}
\n
'
mAP_25
,
mAP_50
=
ret_dict
[
'mAP_0.25'
],
ret_dict
[
'mAP_0.50'
]
result_str
+=
f
'mAP(0.25):
{
mAP_25
}
mAP(0.50):
{
mAP_50
}
'
print_log
(
'
\n
'
+
result_str
,
logger
=
logger
)
ret_dict
=
indoor_eval
(
gt_annos
,
results
,
iou_thr
,
label2cat
,
logger
=
logger
)
return
ret_dict
...
...
mmdet3d/datasets/pipelines/__init__.py
View file @
f27d308f
...
...
@@ -3,10 +3,11 @@ from .dbsampler import DataBaseSampler, MMDataBaseSampler
from
.formating
import
DefaultFormatBundle
,
DefaultFormatBundle3D
from
.indoor_augment
import
(
IndoorFlipData
,
IndoorGlobalRotScale
,
IndoorPointsColorJitter
)
from
.indoor_loading
import
(
IndoorPointsColorNormalize
,
LoadAnnotations3D
,
LoadPointsFromFile
)
from
.indoor_loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
NormalizePointsColor
)
from
.indoor_sample
import
IndoorPointSample
from
.loading
import
LoadMultiViewImageFromFiles
from
.point_seg_class_mapping
import
PointSegClassMapping
from
.train_aug
import
(
GlobalRotScale
,
ObjectNoise
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
...
...
@@ -17,6 +18,6 @@ __all__ = [
'Compose'
,
'LoadMultiViewImageFromFiles'
,
'LoadPointsFromFile'
,
'DefaultFormatBundle'
,
'DefaultFormatBundle3D'
,
'DataBaseSampler'
,
'IndoorGlobalRotScale'
,
'IndoorPointsColorJitter'
,
'IndoorFlipData'
,
'MMDataBaseSampler'
,
'
Indoor
PointsColor
Normalize
'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
'MMDataBaseSampler'
,
'
Normalize
PointsColor'
,
'LoadAnnotations3D'
,
'IndoorPointSample'
,
'PointSegClassMapping'
]
mmdet3d/datasets/pipelines/indoor_augment.py
View file @
f27d308f
...
...
@@ -224,7 +224,7 @@ class IndoorGlobalRotScale(object):
results
[
'scale_ratio'
]
=
scale_ratio
results
[
'points'
]
=
points
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
results
[
'gt_bboxes_3d'
]
=
gt_bboxes_3d
.
astype
(
np
.
float32
)
return
results
def
__repr__
(
self
):
...
...
mmdet3d/datasets/pipelines/indoor_loading.py
View file @
f27d308f
...
...
@@ -6,8 +6,8 @@ from mmdet.datasets.pipelines import LoadAnnotations
@
PIPELINES
.
register_module
()
class
Indoor
PointsColor
Normalize
(
object
):
"""
Indoor points color normalize
class
Normalize
PointsColor
(
object
):
"""
Normalize color of points
Normalize color of the points.
...
...
@@ -45,9 +45,16 @@ class LoadPointsFromFile(object):
use_dim (list[int]): Which dimensions of the points to be used.
Default: [0, 1, 2]. For KITTI dataset, set use_dim=4
or use_dim=[0, 1, 2, 3] to use the intensity dimension
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def
__init__
(
self
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
shift_height
=
False
):
def
__init__
(
self
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
shift_height
=
False
,
file_client_args
=
dict
(
backend
=
'disk'
)):
self
.
shift_height
=
shift_height
if
isinstance
(
use_dim
,
int
):
use_dim
=
list
(
range
(
use_dim
))
...
...
@@ -56,13 +63,21 @@ class LoadPointsFromFile(object):
self
.
load_dim
=
load_dim
self
.
use_dim
=
use_dim
self
.
file_client_args
=
file_client_args
.
copy
()
self
.
file_client
=
None
def
_load_points
(
self
,
pts_filename
):
mmcv
.
check_file_exist
(
pts_filename
)
if
pts_filename
.
endswith
(
'.npy'
):
points
=
np
.
load
(
pts_filename
)
else
:
points
=
np
.
fromfile
(
pts_filename
,
dtype
=
np
.
float32
)
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
pts_bytes
=
self
.
file_client
.
get
(
pts_filename
)
points
=
np
.
frombuffer
(
pts_bytes
,
dtype
=
np
.
float32
)
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_filename
)
if
pts_filename
.
endswith
(
'.npy'
):
points
=
np
.
load
(
pts_filename
)
else
:
points
=
np
.
fromfile
(
pts_filename
,
dtype
=
np
.
float32
)
return
points
def
__call__
(
self
,
results
):
...
...
@@ -113,6 +128,9 @@ class LoadAnnotations3D(LoadAnnotations):
Defaults to False.
poly2mask (bool, optional): Whether to convert polygon annotations
to bitmasks. Defaults to True.
file_client_args (dict): Config dict of file clients, refer to
https://github.com/open-mmlab/mmcv/blob/master/mmcv/fileio/file_client.py
for more details.
"""
def
__init__
(
self
,
...
...
@@ -124,8 +142,15 @@ class LoadAnnotations3D(LoadAnnotations):
with_label
=
False
,
with_mask
=
False
,
with_seg
=
False
,
poly2mask
=
True
):
super
().
__init__
(
with_bbox
,
with_label
,
with_mask
,
with_seg
,
poly2mask
)
poly2mask
=
True
,
file_client_args
=
dict
(
backend
=
'disk'
)):
super
().
__init__
(
with_bbox
,
with_label
,
with_mask
,
with_seg
,
poly2mask
,
file_client_args
=
file_client_args
)
self
.
with_bbox_3d
=
with_bbox_3d
self
.
with_label_3d
=
with_label_3d
self
.
with_mask_3d
=
with_mask_3d
...
...
@@ -142,16 +167,35 @@ class LoadAnnotations3D(LoadAnnotations):
def
_load_masks_3d
(
self
,
results
):
pts_instance_mask_path
=
results
[
'ann_info'
][
'pts_instance_mask_path'
]
mmcv
.
check_file_exist
(
pts_instance_mask_path
)
pts_instance_mask
=
np
.
fromfile
(
pts_instance_mask_path
,
dtype
=
np
.
long
)
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
mask_bytes
=
self
.
file_client
.
get
(
pts_instance_mask_path
)
pts_instance_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
)
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_instance_mask_path
)
pts_instance_mask
=
np
.
fromfile
(
pts_instance_mask_path
,
dtype
=
np
.
long
)
results
[
'pts_instance_mask'
]
=
pts_instance_mask
results
[
'pts_mask_fields'
].
append
(
results
[
'pts_instance_mask'
])
return
results
def
_load_semantic_seg_3d
(
self
,
results
):
pts_semantic_mask_path
=
results
[
'ann_info'
][
'pts_semantic_mask_path'
]
mmcv
.
check_file_exist
(
pts_semantic_mask_path
)
pts_semantic_mask
=
np
.
fromfile
(
pts_semantic_mask_path
,
dtype
=
np
.
long
)
if
self
.
file_client
is
None
:
self
.
file_client
=
mmcv
.
FileClient
(
**
self
.
file_client_args
)
try
:
mask_bytes
=
self
.
file_client
.
get
(
pts_semantic_mask_path
)
# add .copy() to fix read-only bug
pts_semantic_mask
=
np
.
frombuffer
(
mask_bytes
,
dtype
=
np
.
int
).
copy
()
except
ConnectionError
:
mmcv
.
check_file_exist
(
pts_semantic_mask_path
)
pts_semantic_mask
=
np
.
fromfile
(
pts_semantic_mask_path
,
dtype
=
np
.
long
)
results
[
'pts_semantic_mask'
]
=
pts_semantic_mask
results
[
'pts_seg_fields'
].
append
(
results
[
'pts_semantic_mask'
])
return
results
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment