Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
191288eb
"...model_zoo/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "fedaa36da50dd39bb97ef33d04dc5921b08e1eb2"
Commit
191288eb
authored
Jun 07, 2020
by
zhangwenwei
Browse files
Clean voxel encoders
parent
27ebcfac
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
498 additions
and
914 deletions
+498
-914
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+0
-243
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
...lient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
+0
-218
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
...second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+2
-2
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-2
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+1
-2
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+1
-2
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
...rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+1
-1
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+1
-5
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
...igs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+1
-5
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+2
-2
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+20
-8
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+20
-8
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
+206
-0
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+28
-10
mmdet3d/core/bbox/box_np_ops.py
mmdet3d/core/bbox/box_np_ops.py
+2
-2
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+2
-4
mmdet3d/models/voxel_encoders/__init__.py
mmdet3d/models/voxel_encoders/__init__.py
+4
-5
mmdet3d/models/voxel_encoders/pillar_encoder.py
mmdet3d/models/voxel_encoders/pillar_encoder.py
+40
-184
mmdet3d/models/voxel_encoders/utils.py
mmdet3d/models/voxel_encoders/utils.py
+13
-31
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+152
-180
No files found.
configs/fileclient/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
deleted
100644 → 0
View file @
27ebcfac
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
num_input_features
=
4
,
num_filters
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
],
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
],
sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(),
classes
=
class_names
,
sample_groups
=
dict
(
bus
=
4
,
trailer
=
4
,
truck
=
4
,
))
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.3925
,
0.3925
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
evaluation
=
dict
(
interval
=
24
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
24
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/fileclient/hv_second_secfpn_6x8_80e_fileclient_kitti-3d-car.py
deleted
100644 → 0
View file @
27ebcfac
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.3
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
(
data
=
's3://kitti_data/'
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
View file @
191288eb
...
@@ -28,8 +28,8 @@ model = dict(
...
@@ -28,8 +28,8 @@ model = dict(
),
),
pts_voxel_encoder
=
dict
(
pts_voxel_encoder
=
dict
(
type
=
'DynamicVFE'
,
type
=
'DynamicVFE'
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[
64
,
64
],
feat_channel
s
=
[
64
,
64
],
with_distance
=
False
,
with_distance
=
False
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
...
...
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
191288eb
...
@@ -12,8 +12,8 @@ model = dict(
...
@@ -12,8 +12,8 @@ model = dict(
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'DynamicPillarFeatureNet'
,
type
=
'DynamicPillarFeatureNet'
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[
64
],
feat_channel
s
=
[
64
],
with_distance
=
False
,
with_distance
=
False
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
point_cloud_range
=
point_cloud_range
,
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
191288eb
...
@@ -11,8 +11,7 @@ model = dict(
...
@@ -11,8 +11,7 @@ model = dict(
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'DynamicVFEV3'
,
type
=
'DynamicSimpleVFE'
,
num_input_features
=
4
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
...
...
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
191288eb
...
@@ -11,8 +11,7 @@ model = dict(
...
@@ -11,8 +11,7 @@ model = dict(
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
max_voxels
=
(
-
1
,
-
1
),
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'DynamicVFEV3'
,
type
=
'DynamicSimpleVFE'
,
num_input_features
=
4
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
...
...
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
View file @
191288eb
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
model
=
dict
(
type
=
'FasterRCNN'
,
type
=
'FasterRCNN'
,
pretrained
=
(
'open-mmlab://resnet50_caffe
_bgr
'
),
pretrained
=
(
'open-mmlab://
detectron2/
resnet50_caffe'
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'ResNet'
,
type
=
'ResNet'
,
depth
=
50
,
depth
=
50
,
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
View file @
191288eb
...
@@ -10,11 +10,7 @@ model = dict(
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
type
=
'SparseUNet'
,
in_channels
=
4
,
in_channels
=
4
,
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
View file @
191288eb
...
@@ -10,11 +10,7 @@ model = dict(
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
type
=
'SparseUNet'
,
in_channels
=
4
,
in_channels
=
4
,
...
...
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
191288eb
...
@@ -10,8 +10,8 @@ model = dict(
...
@@ -10,8 +10,8 @@ model = dict(
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
type
=
'PillarFeatureNet'
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[
64
],
feat_channel
s
=
[
64
],
with_distance
=
False
,
with_distance
=
False
,
# these two arguments should be consistent with the voxel_generator
# these two arguments should be consistent with the voxel_generator
voxel_size
=
[
0.16
,
0.16
,
4
],
voxel_size
=
[
0.16
,
0.16
,
4
],
...
...
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
191288eb
...
@@ -10,11 +10,7 @@ model = dict(
...
@@ -10,11 +10,7 @@ model = dict(
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
type
=
'SparseEncoder'
,
in_channels
=
4
,
in_channels
=
4
,
...
@@ -104,9 +100,21 @@ db_sampler = dict(
...
@@ -104,9 +100,21 @@ db_sampler = dict(
classes
=
class_names
,
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
sample_groups
=
dict
(
Car
=
15
),
)
)
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
dict
(
type
=
'ObjectNoise'
,
type
=
'ObjectNoise'
,
...
@@ -126,7 +134,11 @@ train_pipeline = [
...
@@ -126,7 +134,11 @@ train_pipeline = [
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
dict
(
type
=
'DefaultFormatBundle3D'
,
type
=
'DefaultFormatBundle3D'
,
...
...
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
View file @
191288eb
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
model
=
dict
(
type
=
'FasterRCNN'
,
type
=
'FasterRCNN'
,
pretrained
=
(
'open-mmlab://resnet50_caffe
_bgr
'
),
pretrained
=
(
'open-mmlab://
detectron2/
resnet50_caffe'
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'ResNet'
,
type
=
'ResNet'
,
depth
=
50
,
depth
=
50
,
...
@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
...
@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# file_client_args = dict(backend='disk')
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'Resize'
,
type
=
'Resize'
,
img_scale
=
[
(
12
0
0
,
720
),
(
1920
,
1080
)],
img_scale
=
(
12
8
0
,
720
),
multiscale_mode
=
'range'
,
ratio_range
=
(
0.75
,
1.25
)
,
keep_ratio
=
True
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
...
@@ -135,10 +147,10 @@ train_pipeline = [
...
@@ -135,10 +147,10 @@ train_pipeline = [
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'MultiScaleFlipAug'
,
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1
60
0
,
90
0
),
img_scale
=
(
1
28
0
,
72
0
),
flip
=
False
,
flip
=
False
,
transforms
=
[
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
...
@@ -192,6 +204,6 @@ total_epochs = 12
...
@@ -192,6 +204,6 @@ total_epochs = 12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
None
load_from
=
'./pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth'
# noqa
resume_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
workflow
=
[(
'train'
,
1
)]
configs/nus/faster_rcnn_regnet-3gf_fpn_2x8_1x_nus.py
0 → 100644
View file @
191288eb
# model settings
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
'open-mmlab://regnetx_3.2gf'
,
backbone
=
dict
(
type
=
'RegNet'
,
arch
=
'regnetx_3.2gf'
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
base_channels
=
32
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
96
,
192
,
432
,
1008
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
out_size
=
7
,
sample_num
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_thr
=
0.5
),
max_per_img
=
100
)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type
=
'CocoDataset'
data_root
=
'data/nuscenes/'
classes
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
# The mean and std is used in PyCls when training RegNets
mean
=
[
103.53
,
116.28
,
123.675
],
std
=
[
57.375
,
57.12
,
58.395
],
to_rgb
=
False
)
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://nuscenes/nuscenes/'
,
'data/nuscenes/'
:
's3://nuscenes/nuscenes/'
}))
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations'
,
with_bbox
=
True
,
with_mask
=
False
,
file_client_args
=
file_client_args
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1280
,
720
),
ratio_range
=
(
0.75
,
1.25
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle'
),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFile'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug'
,
img_scale
=
(
1280
,
720
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'ImageToTensor'
,
keys
=
[
'img'
]),
dict
(
type
=
'Collect'
,
keys
=
[
'img'
]),
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.00005
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
8
,
11
])
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
evaluation
=
dict
(
interval
=
1
)
# runtime settings
total_epochs
=
12
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29501
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/faster_rcnn_r50_fpn_1x'
load_from
=
'./pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth'
# noqa
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
191288eb
...
@@ -15,8 +15,8 @@ model = dict(
...
@@ -15,8 +15,8 @@ model = dict(
),
),
pts_voxel_encoder
=
dict
(
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
type
=
'HardVFE'
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[
64
,
64
],
feat_channel
s
=
[
64
,
64
],
with_distance
=
False
,
with_distance
=
False
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
...
@@ -85,9 +85,7 @@ model = dict(
...
@@ -85,9 +85,7 @@ model = dict(
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
),
)
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
train_cfg
=
dict
(
pts
=
dict
(
pts
=
dict
(
...
@@ -138,10 +136,23 @@ db_sampler = dict(
...
@@ -138,10 +136,23 @@ db_sampler = dict(
trailer
=
4
,
trailer
=
4
,
truck
=
4
,
truck
=
4
,
))
))
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
),
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
dict
(
type
=
'GlobalRotScale'
,
type
=
'GlobalRotScale'
,
...
@@ -156,8 +167,15 @@ train_pipeline = [
...
@@ -156,8 +167,15 @@ train_pipeline = [
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
),
dict
(
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
),
type
=
'LoadPointsFromFile'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
dict
(
...
...
mmdet3d/core/bbox/box_np_ops.py
View file @
191288eb
...
@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
...
@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
def
surface_equ_3d
(
polygon_surfaces
):
def
surface_equ_3d
(
polygon_surfaces
):
# return [a, b, c], d in ax+by+cz+d=0
# return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec
=
polygon_surfaces
[:,
:,
:
2
,
:]
-
polygon_surfaces
[:,
:,
surface_vec
=
polygon_surfaces
[:,
:,
:
2
,
:]
-
\
1
:
3
,
:]
polygon_surfaces
[:,
:,
1
:
3
,
:]
# normal_vec: [..., 3]
# normal_vec: [..., 3]
normal_vec
=
np
.
cross
(
surface_vec
[:,
:,
0
,
:],
surface_vec
[:,
:,
1
,
:])
normal_vec
=
np
.
cross
(
surface_vec
[:,
:,
0
,
:],
surface_vec
[:,
:,
1
,
:])
# print(normal_vec.shape, points[..., 0, :].shape)
# print(normal_vec.shape, points[..., 0, :].shape)
...
...
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
View file @
191288eb
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
...
@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3
,
3
,
padding
=
1
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
,
indice_key
=
'rcnn_down0'
))
conv_type
=
'SubMConv3d'
))
merge_conv_channel_last
=
channel
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
down_conv_channel_last
=
merge_conv_channel_last
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
...
@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3
,
3
,
padding
=
1
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
,
indice_key
=
'rcnn_down1'
))
conv_type
=
'SubMConv3d'
))
down_conv_channel_last
=
channel
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
self
.
conv_down
.
add_module
(
'merge_conv'
,
...
...
mmdet3d/models/voxel_encoders/__init__.py
View file @
191288eb
from
.pillar_encoder
import
AlignedPillarFeatureNet
,
PillarFeatureNet
from
.pillar_encoder
import
PillarFeatureNet
from
.voxel_encoder
import
(
DynamicVFE
,
VoxelFeatureExtractor
,
from
.voxel_encoder
import
DynamicSimpleVFE
,
DynamicVFE
,
HardSimpleVFE
,
HardVFE
VoxelFeatureExtractorV2
,
VoxelFeatureExtractorV3
)
__all__
=
[
__all__
=
[
'PillarFeatureNet'
,
'
AlignedPillarFeatureNet'
,
'VoxelFeatureExtractor
'
,
'PillarFeatureNet'
,
'
HardVFE'
,
'DynamicVFE'
,
'HardSimpleVFE
'
,
'Dynamic
VFE'
,
'VoxelFeatureExtractorV2'
,
'VoxelFeatureExtractorV3
'
'Dynamic
SimpleVFE
'
]
]
mmdet3d/models/voxel_encoders/pillar_encoder.py
View file @
191288eb
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
...
@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
PillarFeatureNet
(
nn
.
Module
):
class
PillarFeatureNet
(
nn
.
Module
):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
in_channels
=
4
,
use_norm
=
True
,
feat_channels
=
(
64
,
),
num_filters
=
(
64
,
),
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
mode
=
'max'
):
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super
(
PillarFeatureNet
,
self
).
__init__
()
super
(
PillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
2
in_channel
s
+=
2
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
1
in_channel
s
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
# Create PillarFeatureNet layers
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
num_filters
=
[
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
<
len
(
num_filter
s
)
-
2
:
if
i
<
len
(
feat_channel
s
)
-
2
:
last_layer
=
False
last_layer
=
False
else
:
else
:
last_layer
=
True
last_layer
=
True
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
...
@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer
(
PFNLayer
(
in_filters
,
in_filters
,
out_filters
,
out_filters
,
use_
norm
,
norm
_cfg
=
norm_cfg
,
last_layer
=
last_layer
,
last_layer
=
last_layer
,
mode
=
mode
))
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
...
@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
class
DynamicPillarFeatureNet
(
PillarFeatureNet
):
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
in_channels
=
4
,
use_norm
=
True
,
feat_channels
=
(
64
,
),
num_filters
=
(
64
,
),
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
True
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
with_voxel_center
=
True
,
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
...
@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
"""
"""
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
super
(
DynamicPillarFeatureNet
,
self
).
__init__
(
num_input_features
,
in_channels
,
use_norm
,
feat_channels
,
num_filters
,
with_distance
,
with_distance
,
with_cluster_center
=
with_cluster_center
,
with_cluster_center
=
with_cluster_center
,
with_voxel_center
=
with_voxel_center
,
with_voxel_center
=
with_voxel_center
,
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
norm_cfg
,
mode
=
mode
)
mode
=
mode
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
pfn_layers
=
[]
pfn_layers
=
[]
# TODO: currently only support one PFNLayer
# TODO: currently only support one PFNLayer
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
...
@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
features
=
torch
.
cat
([
point_feats
,
feat_per_point
],
dim
=
1
)
return
voxel_feats
,
voxel_coors
return
voxel_feats
,
voxel_coors
@
VOXEL_ENCODERS
.
register_module
()
class
AlignedPillarFeatureNet
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
(
64
,
),
with_distance
=
False
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
),
mode
=
'max'
):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super
(
AlignedPillarFeatureNet
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
if
with_cluster_center
:
print
(
'Use cluster center'
)
num_input_features
+=
3
if
with_voxel_center
:
print
(
'Use voxel center'
)
num_input_features
+=
2
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
# Create PillarFeatureNet layers
num_filters
=
[
num_input_features
]
+
list
(
num_filters
)
pfn_layers
=
[]
for
i
in
range
(
len
(
num_filters
)
-
1
):
in_filters
=
num_filters
[
i
]
out_filters
=
num_filters
[
i
+
1
]
if
i
<
len
(
num_filters
)
-
2
:
last_layer
=
False
else
:
last_layer
=
True
pfn_layers
.
append
(
PFNLayer
(
in_filters
,
out_filters
,
use_norm
,
last_layer
=
last_layer
,
mode
=
mode
))
self
.
pfn_layers
=
nn
.
ModuleList
(
pfn_layers
)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self
.
vx
=
voxel_size
[
0
]
self
.
vy
=
voxel_size
[
1
]
self
.
vz
=
voxel_size
[
2
]
self
.
x_offset
=
self
.
vx
/
2
+
point_cloud_range
[
0
]
self
.
y_offset
=
self
.
vy
/
2
+
point_cloud_range
[
1
]
self
.
z_offset
=
self
.
vz
/
2
+
point_cloud_range
[
2
]
def
forward
(
self
,
features
,
num_points
,
coors
):
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
if
self
.
_with_cluster_center
:
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_points
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
f_cluster
=
features
[:,
:,
:
3
]
-
points_mean
features_ls
.
append
(
f_cluster
)
x_distance
=
features
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
y_distance
=
features
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
z_distance
=
features
[:,
:,
2
]
-
(
coors
[:,
1
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vz
+
self
.
z_offset
)
normed_x_distance
=
1
-
torch
.
abs
(
x_distance
/
self
.
vx
)
normed_y_distance
=
1
-
torch
.
abs
(
y_distance
/
self
.
vy
)
normed_z_distance
=
1
-
torch
.
abs
(
z_distance
/
self
.
vz
)
x_mask
=
torch
.
gt
(
normed_x_distance
,
0
).
type_as
(
features
)
y_mask
=
torch
.
gt
(
normed_y_distance
,
0
).
type_as
(
features
)
z_mask
=
torch
.
gt
(
normed_z_distance
,
0
).
type_as
(
features
)
nonzero_points_mask
=
x_mask
.
mul
(
y_mask
).
mul
(
z_mask
)
aligned_distance
=
normed_x_distance
.
mul
(
normed_y_distance
).
mul
(
normed_z_distance
).
mul
(
nonzero_points_mask
)
# Find distance of x, y, and z from pillar center
if
self
.
_with_voxel_center
:
f_center
=
features
[:,
:,
:
2
]
f_center
[:,
:,
0
]
=
f_center
[:,
:,
0
]
-
(
coors
[:,
3
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vx
+
self
.
x_offset
)
f_center
[:,
:,
1
]
=
f_center
[:,
:,
1
]
-
(
coors
[:,
2
].
type_as
(
features
).
unsqueeze
(
1
)
*
self
.
vy
+
self
.
y_offset
)
features_ls
.
append
(
f_center
)
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features_ls
.
append
(
points_dist
)
# Combine together feature decorations
features
=
torch
.
cat
(
features_ls
,
dim
=-
1
)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_points
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
features
*=
mask
for
pfn
in
self
.
pfn_layers
:
if
pfn
.
last_vfe
:
features
=
pfn
(
features
,
aligned_distance
)
else
:
features
=
pfn
(
features
)
return
features
.
squeeze
()
mmdet3d/models/voxel_encoders/utils.py
View file @
191288eb
...
@@ -4,28 +4,15 @@ from torch import nn
...
@@ -4,28 +4,15 @@ from torch import nn
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
class
Empty
(
nn
.
Module
):
def
__init__
(
self
,
*
args
,
**
kwargs
):
super
(
Empty
,
self
).
__init__
()
def
forward
(
self
,
*
args
,
**
kwargs
):
if
len
(
args
)
==
1
:
return
args
[
0
]
elif
len
(
args
)
==
0
:
return
None
return
args
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
def
get_paddings_indicator
(
actual_num
,
max_num
,
axis
=
0
):
"""Create boolean mask by actually number of a padded tensor.
"""Create boolean mask by actually number of a padded tensor.
Args:
Args:
actual_num (
[type]): [description]
actual_num (
torch.Tensor): Actual number of points in each voxel.
max_num (
[type]): [description]
max_num (
int): Max number of points in each voxel
Returns:
Returns:
[type]: [description]
torch.Tensor: Mask indicates which points are valid inside a voxel.
"""
"""
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
actual_num
=
torch
.
unsqueeze
(
actual_num
,
axis
+
1
)
# tiled_actual_num: [N, M, 1]
# tiled_actual_num: [N, M, 1]
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
...
@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self
.
cat_max
=
cat_max
self
.
cat_max
=
cat_max
self
.
max_out
=
max_out
self
.
max_out
=
max_out
# self.units = int(out_channels / 2)
# self.units = int(out_channels / 2)
if
norm_cfg
:
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_channels
)
self
.
norm
=
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
]
self
.
norm
=
norm_layer
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
out_channels
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
# [K, T, 7] tensordot [7, units] = [K, T, units]
# [K, T, 7] tensordot [7, units] = [K, T, units]
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
...
@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
use_norm
=
True
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)
,
last_layer
=
False
,
last_layer
=
False
,
mode
=
'max'
):
mode
=
'max'
):
""" Pillar Feature Net Layer.
""" Pillar Feature Net Layer.
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
...
@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args:
Args:
in_channels (int): Number of input channels.
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm.
norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of
last_layer (bool): If last_layer, there is no concatenation of
features.
features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
"""
"""
super
().
__init__
()
super
().
__init__
()
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
...
@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels
=
out_channels
//
2
out_channels
=
out_channels
//
2
self
.
units
=
out_channels
self
.
units
=
out_channels
if
use_norm
:
self
.
norm
=
build_norm_layer
(
norm_cfg
,
self
.
units
)[
1
]
self
.
norm
=
nn
.
BatchNorm1d
(
self
.
units
,
eps
=
1e-3
,
momentum
=
0.01
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
False
)
else
:
self
.
norm
=
Empty
(
self
.
unints
)
self
.
linear
=
nn
.
Linear
(
in_channels
,
self
.
units
,
bias
=
True
)
assert
mode
in
[
'max'
,
'avg'
]
self
.
mode
=
mode
self
.
mode
=
mode
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
def
forward
(
self
,
inputs
,
num_voxels
=
None
,
aligned_distance
=
None
):
...
...
mmdet3d/models/voxel_encoders/voxel_encoder.py
View file @
191288eb
import
torch
import
torch
from
mmcv.cnn
import
build_norm_layer
from
mmcv.cnn
import
build_norm_layer
from
torch
import
nn
from
torch
import
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.ops
import
DynamicScatter
from
mmdet3d.ops
import
DynamicScatter
from
..
import
builder
from
..
import
builder
from
..registry
import
VOXEL_ENCODERS
from
..registry
import
VOXEL_ENCODERS
from
.utils
import
Empty
,
VFELayer
,
get_paddings_indicator
from
.utils
import
VFELayer
,
get_paddings_indicator
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractor
(
nn
.
Module
):
class
HardSimpleVFE
(
nn
.
Module
):
"""Simple voxel feature encoder used in SECOND
def
__init__
(
self
,
It simply averages the values of points in a voxel.
num_input_features
=
4
,
"""
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractor
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
==
2
num_input_features
+=
3
# add mean features
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
self
.
vfe1
=
VFELayer
(
num_input_features
,
num_filters
[
0
],
use_norm
)
self
.
vfe2
=
VFELayer
(
num_filters
[
0
],
num_filters
[
1
],
use_norm
)
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
1
],
num_filters
[
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x
=
self
.
vfe1
(
features
)
x
*=
mask
x
=
self
.
vfe2
(
x
)
x
*=
mask
x
=
self
.
linear
(
x
)
x
=
self
.
norm
(
x
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
x
=
F
.
relu
(
x
)
x
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
x
,
dim
=
1
)[
0
]
return
voxelwise
def
__init__
(
self
):
@
VOXEL_ENCODERS
.
register_module
()
super
(
HardSimpleVFE
,
self
).
__init__
()
class
VoxelFeatureExtractorV2
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV2
,
self
).
__init__
()
self
.
name
=
name
assert
len
(
num_filters
)
>
0
num_input_features
+=
3
if
with_distance
:
num_input_features
+=
1
self
.
_with_distance
=
with_distance
num_filters
=
[
num_input_features
]
+
num_filters
filters_pairs
=
[[
num_filters
[
i
],
num_filters
[
i
+
1
]]
for
i
in
range
(
len
(
num_filters
)
-
1
)]
self
.
vfe_layers
=
nn
.
ModuleList
(
[
VFELayer
(
i
,
o
,
use_norm
)
for
i
,
o
in
filters_pairs
])
if
use_norm
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
False
)
self
.
norm
=
nn
.
BatchNorm1d
(
num_filters
[
-
1
],
eps
=
1e-3
,
momentum
=
0.01
)
else
:
self
.
linear
=
nn
.
Linear
(
num_filters
[
-
1
],
num_filters
[
-
1
],
bias
=
True
)
self
.
norm
=
Empty
(
num_filters
[
-
1
])
def
forward
(
self
,
features
,
num_voxels
,
**
kwargs
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean
=
features
[:,
:,
:
3
].
sum
(
dim
=
1
,
keepdim
=
True
)
/
num_voxels
.
type_as
(
features
).
view
(
-
1
,
1
,
1
)
features_relative
=
features
[:,
:,
:
3
]
-
points_mean
if
self
.
_with_distance
:
points_dist
=
torch
.
norm
(
features
[:,
:,
:
3
],
2
,
2
,
keepdim
=
True
)
features
=
torch
.
cat
([
features
,
features_relative
,
points_dist
],
dim
=-
1
)
else
:
features
=
torch
.
cat
([
features
,
features_relative
],
dim
=-
1
)
voxel_count
=
features
.
shape
[
1
]
mask
=
get_paddings_indicator
(
num_voxels
,
voxel_count
,
axis
=
0
)
mask
=
torch
.
unsqueeze
(
mask
,
-
1
).
type_as
(
features
)
for
vfe
in
self
.
vfe_layers
:
features
=
vfe
(
features
)
features
*=
mask
features
=
self
.
linear
(
features
)
features
=
self
.
norm
(
features
.
permute
(
0
,
2
,
1
).
contiguous
()).
permute
(
0
,
2
,
1
).
contiguous
()
features
=
F
.
relu
(
features
)
features
*=
mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise
=
torch
.
max
(
features
,
dim
=
1
)[
0
]
return
voxelwise
@
VOXEL_ENCODERS
.
register_module
()
class
VoxelFeatureExtractorV3
(
nn
.
Module
):
def
__init__
(
self
,
num_input_features
=
4
,
use_norm
=
True
,
num_filters
=
[
32
,
128
],
with_distance
=
False
,
name
=
'VoxelFeatureExtractor'
):
super
(
VoxelFeatureExtractorV3
,
self
).
__init__
()
self
.
name
=
name
def
forward
(
self
,
features
,
num_points
,
coors
):
def
forward
(
self
,
features
,
num_points
,
coors
):
# features: [concated_num_points, num_voxel_size, 3(4)]
# features: [concated_num_points, num_voxel_size, 3(4)]
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
...
@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFEV3
(
nn
.
Module
):
class
DynamicSimpleVFE
(
nn
.
Module
):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_features
=
4
,
voxel_size
=
(
0.2
,
0.2
,
4
),
voxel_size
=
(
0.2
,
0.2
,
4
),
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
point_cloud_range
=
(
0
,
-
40
,
-
3
,
70.4
,
40
,
1
)):
super
(
DynamicVFE
V3
,
self
).
__init__
()
super
(
Dynamic
Simple
VFE
,
self
).
__init__
()
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
@
torch
.
no_grad
()
@
torch
.
no_grad
()
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
...
@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
DynamicVFE
(
nn
.
Module
):
class
DynamicVFE
(
nn
.
Module
):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[],
feat_channel
s
=
[],
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
with_voxel_center
=
False
,
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
...
@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer
=
None
,
fusion_layer
=
None
,
return_point_feats
=
False
):
return_point_feats
=
False
):
super
(
DynamicVFE
,
self
).
__init__
()
super
(
DynamicVFE
,
self
).
__init__
()
assert
len
(
num_filters
)
>
0
assert
mode
in
[
'avg'
,
'max'
]
assert
len
(
feat_channels
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
3
in_channel
s
+=
3
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
...
@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
norm_name
,
norm_layer
=
build_norm_layer
(
norm_cfg
,
out_filters
)
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
...
@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
self
.
fusion_layer
=
builder
.
build_fusion_layer
(
fusion_layer
)
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
def
map_voxel_center_to_point
(
self
,
pts_coors
,
voxel_mean
,
voxel_coors
):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas
# Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation
# Calculate necessary things for canvas creation
canvas_z
=
int
(
canvas_z
=
int
(
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
...
@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points
=
None
,
points
=
None
,
img_feats
=
None
,
img_feats
=
None
,
img_meta
=
None
):
img_meta
=
None
):
"""
"""Forward functions
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim)
Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
"""
features_ls
=
[
features
]
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
# Find distance of x, y, and z from cluster center
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
...
@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@
VOXEL_ENCODERS
.
register_module
()
@
VOXEL_ENCODERS
.
register_module
()
class
HardVFE
(
nn
.
Module
):
class
HardVFE
(
nn
.
Module
):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_input_feature
s
=
4
,
in_channel
s
=
4
,
num_filter
s
=
[],
feat_channel
s
=
[],
with_distance
=
False
,
with_distance
=
False
,
with_cluster_center
=
False
,
with_cluster_center
=
False
,
with_voxel_center
=
False
,
with_voxel_center
=
False
,
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
...
@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer
=
None
,
fusion_layer
=
None
,
return_point_feats
=
False
):
return_point_feats
=
False
):
super
(
HardVFE
,
self
).
__init__
()
super
(
HardVFE
,
self
).
__init__
()
assert
len
(
num_filter
s
)
>
0
assert
len
(
feat_channel
s
)
>
0
if
with_cluster_center
:
if
with_cluster_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_voxel_center
:
if
with_voxel_center
:
num_input_feature
s
+=
3
in_channel
s
+=
3
if
with_distance
:
if
with_distance
:
num_input_feature
s
+=
3
in_channel
s
+=
3
self
.
num_input_features
=
num_input_feature
s
self
.
in_channels
=
in_channel
s
self
.
_with_distance
=
with_distance
self
.
_with_distance
=
with_distance
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_cluster_center
=
with_cluster_center
self
.
_with_voxel_center
=
with_voxel_center
self
.
_with_voxel_center
=
with_voxel_center
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
...
@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self
.
point_cloud_range
=
point_cloud_range
self
.
point_cloud_range
=
point_cloud_range
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
self
.
scatter
=
DynamicScatter
(
voxel_size
,
point_cloud_range
,
True
)
num_filters
=
[
self
.
num_input_features
]
+
list
(
num_filter
s
)
feat_channels
=
[
self
.
in_channels
]
+
list
(
feat_channel
s
)
vfe_layers
=
[]
vfe_layers
=
[]
for
i
in
range
(
len
(
num_filter
s
)
-
1
):
for
i
in
range
(
len
(
feat_channel
s
)
-
1
):
in_filters
=
num_filter
s
[
i
]
in_filters
=
feat_channel
s
[
i
]
out_filters
=
num_filter
s
[
i
+
1
]
out_filters
=
feat_channel
s
[
i
+
1
]
if
i
>
0
:
if
i
>
0
:
in_filters
*=
2
in_filters
*=
2
# TODO: pass norm_cfg to VFE
# TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if
i
==
(
len
(
num_filter
s
)
-
2
):
if
i
==
(
len
(
feat_channel
s
)
-
2
):
cat_max
=
False
cat_max
=
False
max_out
=
True
max_out
=
True
if
fusion_layer
:
if
fusion_layer
:
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
...
@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors
,
coors
,
img_feats
=
None
,
img_feats
=
None
,
img_meta
=
None
):
img_meta
=
None
):
"""
"""Forward functions
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim)
Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
"""
"""
features_ls
=
[
features
]
features_ls
=
[
features
]
# Find distance of x, y, and z from cluster center
# Find distance of x, y, and z from cluster center
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
...
@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
for
i
,
vfe
in
enumerate
(
self
.
vfe_layers
):
voxel_feats
=
vfe
(
voxel_feats
)
voxel_feats
=
vfe
(
voxel_feats
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
if
(
self
.
fusion_layer
is
not
None
and
img_feats
is
not
None
):
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
voxel_feats
=
self
.
fusion_with_mask
(
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
)
coors
,
img_feats
,
img_meta
)
if
torch
.
isnan
(
voxel_feats
).
any
():
import
pdb
pdb
.
set_trace
()
return
voxel_feats
return
voxel_feats
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
def
fusion_with_mask
(
self
,
features
,
mask
,
voxel_feats
,
coors
,
img_feats
,
img_meta
):
img_meta
):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points
# the features is consist of a batch of points
batch_size
=
coors
[
-
1
,
0
]
+
1
batch_size
=
coors
[
-
1
,
0
]
+
1
points
=
[]
points
=
[]
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
...
@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
points
.
append
(
features
[
single_mask
][
mask
[
single_mask
]])
point_feats
=
voxel_feats
[
mask
]
point_feats
=
voxel_feats
[
mask
]
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
point_feats
=
self
.
fusion_layer
(
img_feats
,
points
,
point_feats
,
img_meta
)
img_meta
)
if
torch
.
isnan
(
point_feats
).
any
():
import
pdb
pdb
.
set_trace
()
voxel_canvas
=
voxel_feats
.
new_zeros
(
voxel_canvas
=
voxel_feats
.
new_zeros
(
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
size
=
(
voxel_feats
.
size
(
0
),
voxel_feats
.
size
(
1
),
point_feats
.
size
(
-
1
)))
point_feats
.
size
(
-
1
)))
voxel_canvas
[
mask
]
=
point_feats
voxel_canvas
[
mask
]
=
point_feats
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
out
=
torch
.
max
(
voxel_canvas
,
dim
=
1
)[
0
]
if
torch
.
isnan
(
out
).
any
():
import
pdb
pdb
.
set_trace
()
return
out
return
out
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment