Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
191288eb
Commit
191288eb
authored
Jun 07, 2020
by
zhangwenwei
Browse files
Clean voxel encoders
parent
27ebcfac
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
498 additions
and
914 deletions
+498
-914
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+0
-243
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
...lient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
+0
-218
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
...second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+2
-2
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-2
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+1
-2
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+1
-2
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
...rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+1
-1
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+1
-5
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
...igs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+1
-5
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-2
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+20
-8
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+20
-8
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
+206
-0
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+28
-10
mmdet3d/core/bbox/box_np_ops.py
mmdet3d/core/bbox/box_np_ops.py
+2
-2
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+2
-4
mmdet3d/models/voxel_encoders/__init__.py
mmdet3d/models/voxel_encoders/__init__.py
+4
-5
mmdet3d/models/voxel_encoders/pillar_encoder.py
mmdet3d/models/voxel_encoders/pillar_encoder.py
+40
-184
mmdet3d/models/voxel_encoders/utils.py
mmdet3d/models/voxel_encoders/utils.py
+13
-31
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+152
-180
No files found.
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
deleted
100644 → 0
View file @
27ebcfac
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
num_input_features
=
4
,
num_filters
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
],
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
],
sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(),
classes
=
class_names
,
sample_groups
=
dict
(
bus
=
4
,
trailer
=
4
,
truck
=
4
,
))
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
evaluation
=
dict
(
interval
=
24
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
24
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
deleted
100644 → 0
View file @
27ebcfac
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.3
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
(
data
=
's3://kitti_data/'
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
View file @
191288eb
...
...
@@ -28,8 +28,8 @@ model = dict(
),
pts_voxel_encoder
=
dict
(
type
=
'DynamicVFE'
,
num_input_feature
s
=
4
,
num_filter
s
=
[
64
,
64
],
in_channel
s
=
4
,
feat_channel
s
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
...
...
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
191288eb
...
...
@@ -12,8 +12,8 @@ model = dict(
),
voxel_encoder
=
dict
(
type
=
'DynamicPillarFeatureNet'
,
num_input_feature
s
=
4
,
num_filter
s
=
[
64
],
in_channel
s
=
4
,
feat_channel
s
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
191288eb
...
...
@@ -11,8 +11,7 @@ model = dict(
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'DynamicVFEV3'
,
num_input_features
=
4
,
type
=
'DynamicSimpleVFE'
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
...
...
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
191288eb
...
...
@@ -11,8 +11,7 @@ model = dict(
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'DynamicVFEV3'
,
num_input_features
=
4
,
type
=
'DynamicSimpleVFE'
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
...
...
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
View file @
191288eb
...
...
@@ -2,7 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
(
'open-mmlab://resnet50_caffe
_bgr
'
),
pretrained
=
(
'open-mmlab://
detectron2/
resnet50_caffe'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
View file @
191288eb
...
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
View file @
191288eb
...
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
...
...
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
191288eb
...
...
@@ -10,8 +10,8 @@ model = dict(
),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
num_input_feature
s
=
4
,
num_filter
s
=
[
64
],
in_channel
s
=
4
,
feat_channel
s
=
[
64
],
with_distance
=
False
,
# these two arguments should be consistent with the voxel_generator
voxel_size
=
[
0.16
,
0.16
,
4
],
...
...
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
191288eb
...
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
...
...
@@ -104,9 +100,21 @@ db_sampler = dict(
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
...
...
@@ -126,7 +134,11 @@ train_pipeline = [
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
...
...
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
View file @
191288eb
...
...
@@ -2,7 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
(
'open-mmlab://resnet50_caffe
_bgr
'
),
pretrained
=
(
'open-mmlab://
detectron2/
resnet50_caffe'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
...
...
@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# file_client_args = dict(backend='disk')
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
),
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
img_scale
=
[
(
12
0
0
,
720
),
(
1920
,
1080
)],
multiscale_mode
=
'range'
,
img_scale
=
(
12
8
0
,
720
),
ratio_range
=
(
0.75
,
1.25
)
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
...
...
@@ -135,10 +147,10 @@ train_pipeline = [
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1
60
0
,
90
0
),
img_scale
=
(
1
28
0
,
72
0
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
...
...
@@ -192,6 +204,6 @@ total_epochs = 12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
None
load_from
=
'./pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth'
# noqa
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
0 → 100644
View file @
191288eb
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
'open-mmlab://regnetx_3.2gf'
,
backbone
=
dict
(
type
=
'RegNet'
,
arch
=
'regnetx_3.2gf'
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
base_channels
=
32
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
96
,
192
,
432
,
1008
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
out_size
=
7
,
sample_num
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/nuscenes/'
classes
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
# The mean and std is used in PyCls when training RegNets
mean
=
[
103.53
,
116.28
,
123.675
],
std
=
[
57.375
,
57.12
,
58.395
],
to_rgb
=
False
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1280
,
720
),
ratio_range
=
(
0.75
,
1.25
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
720
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.00005
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
'./pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth'
# noqa
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
191288eb
...
...
@@ -15,8 +15,8 @@ model = dict(
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
num_input_feature
s
=
4
,
num_filter
s
=
[
64
,
64
],
in_channel
s
=
4
,
feat_channel
s
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
...
...
@@ -85,9 +85,7 @@ model = dict(
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
...
...
@@ -138,10 +136,23 @@ db_sampler = dict(
trailer
=
4
,
truck
=
4
,
))
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
...
...
@@ -156,8 +167,15 @@ train_pipeline = [
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
...
...
mmdet3d/core/bbox/box_np_ops.py
View file @
191288eb
...
...
@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
def
surface_equ_3d
(
polygon_surfaces
):
# return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec
=
polygon_surfaces
[:,
:,
:
2
,
:]
-
polygon_surfaces
[:,
:,
1
:
3
,
:]
surface_vec
=
polygon_surfaces
[:,
:,
:
2
,
:]
-
\
polygon_surfaces
[:,
:,
1
:
3
,
:]
# normal_vec: [..., 3]
normal_vec
=
np
.
cross
(
surface_vec
[:,
:,
0
,
:],
surface_vec
[:,
:,
1
,
:])
# print(normal_vec.shape, points[..., 0, :].shape)
...
...
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
View file @
191288eb
...
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
,
conv_type
=
'SubMConv3d'
))
indice_key
=
'rcnn_down0'
))
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
...
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
,
conv_type
=
'SubMConv3d'
))
indice_key
=
'rcnn_down1'
))
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
...
...
mmdet3d/models/voxel_encoders/__init__.py
View file @
191288eb
from
.pillar_encoder
import
AlignedPillarFeatureNet
,
PillarFeatureNet
from
.voxel_encoder
import
(
DynamicVFE
,
VoxelFeatureExtractor
,
VoxelFeatureExtractorV2
,
VoxelFeatureExtractorV3
)
from
.pillar_encoder
import
PillarFeatureNet
from
.voxel_encoder
import
DynamicSimpleVFE
,
DynamicVFE
,
HardSimpleVFE
,
HardVFE
__all__
=
[
'PillarFeatureNet'
,
'
AlignedPillarFeatureNet'
,
'VoxelFeatureExtractor
'
,
'Dynamic
VFE'
,
'VoxelFeatureExtractorV2'
,
'VoxelFeatureExtractorV3
'
'PillarFeatureNet'
,
'
HardVFE'
,
'DynamicVFE'
,
'HardSimpleVFE
'
,
'Dynamic
SimpleVFE
'
]
mmdet3d/models/voxel_encoders/pillar_encoder.py
View file @
191288eb
...
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
class
PillarFeatureNet
(
nn
.
Module
):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
in_channels
=
4
,
feat_channels
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super
(
PillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
2
in_channel
s
+=
2
if
with_distance
:
num_input_feature
s
+=
1
in_channel
s
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
self
.
num_input_features
=
num_input_feature
s
num_filters
=
[
num_input_features
]
+
list
(
num_filter
s
)
self
.
in_channels
=
in_channel
s
feat_channels
=
[
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
if
i
<
len
(
num_filter
s
)
-
2
:
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
<
len
(
feat_channel
s
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
...
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer
(
in_filters
,
out_filters
,
use_
norm
,
norm
_cfg
=
norm_cfg
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
...
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
in_channels
=
4
,
feat_channels
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
...
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
"""
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
num_input_features
,
use_norm
,
num_filters
,
in_channels
,
feat_channels
,
with_distance
,
with_cluster_center
=
with_cluster_center
,
with_voxel_center
=
with_voxel_center
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
norm_cfg
,
mode
=
mode
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
# TODO: currently only support one PFNLayer
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
()
class
AlignedPillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super
(
AlignedPillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
print
(
'Use cluster center'
)
num_input_features
+=
3
if
with_voxel_center
:
print
(
'Use voxel center'
)
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
x_distance
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
y_distance
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
z_distance
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
normed_x_distance
=
1
-
torch
.
abs
(
x_distance
/
self
.
vx
)
normed_y_distance
=
1
-
torch
.
abs
(
y_distance
/
self
.
vy
)
normed_z_distance
=
1
-
torch
.
abs
(
z_distance
/
self
.
vz
)
x_mask
=
torch
.
gt
(
normed_x_distance
,
0
).
type_as
(
features
)
y_mask
=
torch
.
gt
(
normed_y_distance
,
0
).
type_as
(
features
)
z_mask
=
torch
.
gt
(
normed_z_distance
,
0
).
type_as
(
features
)
nonzero_points_mask
=
x_mask
.
mul
(
y_mask
).
mul
(
z_mask
)
aligned_distance
=
normed_x_distance
.
mul
(
normed_y_distance
).
mul
(
normed_z_distance
).
mul
(
nonzero_points_mask
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
if
pfn
.
last_vfe
:
features
=
pfn
(
features
,
aligned_distance
)
else
:
features
=
pfn
(
features
)
return
features
.
squeeze
()
mmdet3d/models/voxel_encoders/utils.py
View file @
191288eb
...
...
@@ -4,28 +4,15 @@ from torch import nn
from
torch.nn
import
functional
as
F
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
"""Create boolean mask by actually number of a padded tensor.
Args:
actual_num (
[type]): [description]
max_num (
[type]): [description]
actual_num (
torch.Tensor): Actual number of points in each voxel.
max_num (
int): Max number of points in each voxel
Returns:
[type]: [description]
torch.Tensor: Mask indicates which points are valid inside a voxel.
"""
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
# tiled_actual_num: [N, M, 1]
...
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self
.
cat_max
=
cat_max
self
.
max_out
=
max_out
# self.units = int(out_channels / 2)
if
norm_cfg
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
self
.
norm
=
norm_layer
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
out_channels
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
def
forward
(
self
,
inputs
):
# [K, T, 7] tensordot [7, units] = [K, T, units]
...
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def
__init__
(
self
,
in_channels
,
out_channels
,
use_norm
=
True
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)
,
last_layer
=
False
,
mode
=
'max'
):
""" Pillar Feature Net Layer.
...
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
"""
super
().
__init__
()
...
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels
=
out_channels
//
2
self
.
units
=
out_channels
if
use_norm
:
self
.
norm
=
nn
.
BatchNorm1d
(
self
.
units
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
self
.
unints
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
True
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
self
.
units
)[
1
]
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
assert
mode
in
[
'max'
,
'avg'
]
self
.
mode
=
mode
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
...
...
mmdet3d/models/voxel_encoders/voxel_encoder.py
View file @
191288eb
import
torch
from
mmcv.cnn
import
build_norm_layer
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.ops
import
DynamicScatter
from
..
import
builder
from
..registry
import
VOXEL_ENCODERS
from
.utils
import
Empty
,
VFELayer
,
get_paddings_indicator
from
.utils
import
VFELayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractor
(
nn
.
Module
):
class
HardSimpleVFE
(
nn
.
Module
):
"""Simple voxel feature encoder used in SECOND
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractor
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
==
2
num_input_features
+=
3
# add mean features
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
vfe1
=
VFELayer
(
num_input_features
,
num_filters
[
0
],
use_norm
)
self
.
vfe2
=
VFELayer
(
num_filters
[
0
],
num_filters
[
1
],
use_norm
)
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x
=
self
.
vfe1
(
features
)
x
*=
mask
x
=
self
.
vfe2
(
x
)
x
*=
mask
x
=
self
.
linear
(
x
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
x
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
x
,
dim
=
1
)[
0
]
return
voxelwise
It simply averages the values of points in a voxel.
"""
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV2
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV2
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
>
0
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
num_filters
=
[
num_input_features
]
+
num_filters
filters_pairs
=
[[
num_filters
[
i
],
num_filters
[
i
+
1
]]
for
i
in
range
(
len
(
num_filters
)
-
1
)]
self
.
vfe_layers
=
nn
.
ModuleList
(
[
VFELayer
(
i
,
o
,
use_norm
)
for
i
,
o
in
filters_pairs
])
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
-
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
-
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
for
vfe
in
self
.
vfe_layers
:
features
=
vfe
(
features
)
features
*=
mask
features
=
self
.
linear
(
features
)
features
=
self
.
norm
(
features
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
features
=
F
.
relu
(
features
)
features
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
features
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV3
,
self
).
__init__
()
self
.
name
=
name
def
__init__
(
self
):
super
(
HardSimpleVFE
,
self
).
__init__
()
def
forward
(
self
,
features
,
num_points
,
coors
):
# features: [concated_num_points, num_voxel_size, 3(4)]
...
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFEV3
(
nn
.
Module
):
class
DynamicSimpleVFE
(
nn
.
Module
):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def
__init__
(
self
,
num_input_features
=
4
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
super
(
DynamicVFE
V3
,
self
).
__init__
()
super
(
Dynamic
Simple
VFE
,
self
).
__init__
()
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
@
torch
.
no_grad
()
...
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFE
(
nn
.
Module
):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
num_input_feature
s
=
4
,
num_filter
s
=
[],
in_channel
s
=
4
,
feat_channel
s
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
...
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
DynamicVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
assert
mode
in
[
'avg'
,
'max'
]
assert
len
(
feat_channels
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
num_input_feature
s
+=
3
self
.
num_input_features
=
num_input_feature
s
in_channel
s
+=
3
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
...
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
canvas_z
=
int
(
...
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points
=
None
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
...
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
class
HardVFE
(
nn
.
Module
):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
num_input_feature
s
=
4
,
num_filter
s
=
[],
in_channel
s
=
4
,
feat_channel
s
=
[],
with_distance
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
...
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer
=
None
,
return_point_feats
=
False
):
super
(
HardVFE
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
num_input_feature
s
+=
3
self
.
num_input_features
=
num_input_feature
s
in_channel
s
+=
3
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
...
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
feat_channel
s
[
i
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
in_filters
*=
2
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if
i
==
(
len
(
num_filter
s
)
-
2
):
if
i
==
(
len
(
feat_channel
s
)
-
2
):
cat_max
=
False
max_out
=
True
if
fusion_layer
:
...
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors
,
img_feats
=
None
,
img_meta
=
None
):
"""
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
"""Forward functions
Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
...
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
voxel_feats
=
vfe
(
voxel_feats
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
return
voxel_feats
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points
batch_size
=
coors
[
-
1
,
0
]
+
1
points
=
[]
...
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
point_feats
=
voxel_feats
[
mask
]
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
voxel_canvas
=
voxel_feats
.
new_zeros
(
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
point_feats
.
size
(
-
1
)))
voxel_canvas
[
mask
]
=
point_feats
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
if
torch
.
isnan
(
out
).
any
():
import
pdb
pdb
.
set_trace
()
return
out
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment