Commit 191288eb authored by zhangwenwei's avatar zhangwenwei
Browse files

Clean voxel encoders

parent 27ebcfac
# model settings
voxel_size = [0.25, 0.25, 8]
point_cloud_range = [-50, -50, -5, 50, 50, 3]
class_names = [
'car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier'
]
model = dict(
type='MVXFasterRCNNV2',
pts_voxel_layer=dict(
max_num_points=64, # max_points_per_voxel
point_cloud_range=point_cloud_range, # velodyne coordinates, x, y, z
voxel_size=voxel_size,
max_voxels=(30000, 40000), # (training, testing) max_coxels
),
pts_voxel_encoder=dict(
type='HardVFE',
num_input_features=4,
num_filters=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
norm_cfg=dict(type='naiveSyncBN1d', eps=1e-3, momentum=0.01)),
pts_middle_encoder=dict(
type='PointPillarsScatter',
in_channels=64,
output_shape=[400, 400], # checked from PointCloud3D
),
pts_backbone=dict(
type='SECOND',
in_channels=64,
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
layer_nums=[3, 5, 5],
layer_strides=[2, 2, 2],
out_channels=[64, 128, 256],
),
pts_neck=dict(
type='SECONDFPN',
norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
in_channels=[64, 128, 256],
upsample_strides=[1, 2, 4],
out_channels=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
[-49.6, -49.6, -1.80032795, 49.6, 49.6, -1.80032795],
[-49.6, -49.6, -1.74440365, 49.6, 49.6, -1.74440365],
[-49.6, -49.6, -1.68526504, 49.6, 49.6, -1.68526504],
[-49.6, -49.6, -1.67339111, 49.6, 49.6, -1.67339111],
[-49.6, -49.6, -1.61785072, 49.6, 49.6, -1.61785072],
[-49.6, -49.6, -1.80984986, 49.6, 49.6, -1.80984986],
[-49.6, -49.6, -1.763965, 49.6, 49.6, -1.763965],
],
sizes=[
[1.95017717, 4.60718145, 1.72270761], # car
[2.4560939, 6.73778078, 2.73004906], # truck
[2.87427237, 12.01320693, 3.81509561], # trailer
[0.60058911, 1.68452161, 1.27192197], # bicycle
[0.66344886, 0.7256437, 1.75748069], # pedestrian
[0.39694519, 0.40359262, 1.06232151], # traffic_cone
[2.49008838, 0.48578221, 0.98297065], # barrier
],
custom_values=[0, 0],
rotations=[0, 1.57],
reshape_out=True),
assigner_per_size=False,
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
# model training and testing settings
train_cfg = dict(
pts=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
allowed_border=0,
code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2],
pos_weight=-1,
debug=False))
test_cfg = dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_pre=1000,
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
input_modality = dict(
use_lidar=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(),
classes=class_names,
sample_groups=dict(
bus=4,
trailer=4,
truck=4,
))
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.3925, 0.3925],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0, 0, 0]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_train.pkl',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'nuscenes_infos_val.pkl',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
optimizer = dict(type='AdamW', lr=0.001, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[20, 23])
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 24
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d'
load_from = None
resume_from = None
workflow = [('train', 1)]
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='VoxelNet',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseEncoder',
in_channels=4,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256],
),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256],
),
bbox_head=dict(
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
),
)
# model training and testing settings
train_cfg = dict(
assigner=dict(
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False)
test_cfg = dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False,
)
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
classes=class_names,
sample_groups=dict(Car=15),
)
file_client_args = dict(
backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
]
test_pipeline = [
dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points']),
]
data = dict(
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=train_pipeline,
modality=input_modality,
classes=class_names,
test_mode=False),
val=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True),
test=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pts_prefix='velodyne_reduced',
pipeline=test_pipeline,
modality=input_modality,
classes=class_names,
test_mode=True))
# optimizer
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/sec_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -28,8 +28,8 @@ model = dict( ...@@ -28,8 +28,8 @@ model = dict(
), ),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='DynamicVFE', type='DynamicVFE',
num_input_features=4, in_channels=4,
num_filters=[64, 64], feat_channels=[64, 64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
......
...@@ -12,8 +12,8 @@ model = dict( ...@@ -12,8 +12,8 @@ model = dict(
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicPillarFeatureNet', type='DynamicPillarFeatureNet',
num_input_features=4, in_channels=4,
num_filters=[64], feat_channels=[64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
......
...@@ -11,8 +11,7 @@ model = dict( ...@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels max_voxels=(-1, -1), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicVFEV3', type='DynamicSimpleVFE',
num_input_features=4,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range), point_cloud_range=point_cloud_range),
middle_encoder=dict( middle_encoder=dict(
......
...@@ -11,8 +11,7 @@ model = dict( ...@@ -11,8 +11,7 @@ model = dict(
max_voxels=(-1, -1), # (training, testing) max_coxels max_voxels=(-1, -1), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='DynamicVFEV3', type='DynamicSimpleVFE',
num_input_features=4,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range), point_cloud_range=point_cloud_range),
middle_encoder=dict( middle_encoder=dict(
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'), pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels max_voxels=(16000, 40000) # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels max_voxels=(16000, 40000) # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
......
...@@ -10,8 +10,8 @@ model = dict( ...@@ -10,8 +10,8 @@ model = dict(
), ),
voxel_encoder=dict( voxel_encoder=dict(
type='PillarFeatureNet', type='PillarFeatureNet',
num_input_features=4, in_channels=4,
num_filters=[64], feat_channels=[64],
with_distance=False, with_distance=False,
# these two arguments should be consistent with the voxel_generator # these two arguments should be consistent with the voxel_generator
voxel_size=[0.16, 0.16, 4], voxel_size=[0.16, 0.16, 4],
......
...@@ -10,11 +10,7 @@ model = dict( ...@@ -10,11 +10,7 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000), # (training, testing) max_coxels max_voxels=(16000, 40000), # (training, testing) max_coxels
), ),
voxel_encoder=dict( voxel_encoder=dict(type='HardSimpleVFE'),
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseEncoder', type='SparseEncoder',
in_channels=4, in_channels=4,
...@@ -104,9 +100,21 @@ db_sampler = dict( ...@@ -104,9 +100,21 @@ db_sampler = dict(
classes=class_names, classes=class_names,
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
) )
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(
type='LoadAnnotations3D',
with_bbox_3d=True,
with_label_3d=True,
file_client_args=file_client_args),
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
...@@ -126,7 +134,11 @@ train_pipeline = [ ...@@ -126,7 +134,11 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=4, use_dim=4), dict(
type='LoadPointsFromFile',
load_dim=4,
use_dim=4,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
norm_cfg = dict(type='BN', requires_grad=False) norm_cfg = dict(type='BN', requires_grad=False)
model = dict( model = dict(
type='FasterRCNN', type='FasterRCNN',
pretrained=('open-mmlab://resnet50_caffe_bgr'), pretrained=('open-mmlab://detectron2/resnet50_caffe'),
backbone=dict( backbone=dict(
type='ResNet', type='ResNet',
depth=50, depth=50,
...@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle', ...@@ -120,13 +120,25 @@ classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier') 'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict( img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False) mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
# file_client_args = dict(backend='disk')
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [ train_pipeline = [
dict(type='LoadImageFromFile'), dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False), dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict( dict(
type='Resize', type='Resize',
img_scale=[(1200, 720), (1920, 1080)], img_scale=(1280, 720),
multiscale_mode='range', ratio_range=(0.75, 1.25),
keep_ratio=True), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5), dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg), dict(type='Normalize', **img_norm_cfg),
...@@ -135,10 +147,10 @@ train_pipeline = [ ...@@ -135,10 +147,10 @@ train_pipeline = [
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadImageFromFile'), dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict( dict(
type='MultiScaleFlipAug', type='MultiScaleFlipAug',
img_scale=(1600, 900), img_scale=(1280, 720),
flip=False, flip=False,
transforms=[ transforms=[
dict(type='Resize', keep_ratio=True), dict(type='Resize', keep_ratio=True),
...@@ -192,6 +204,6 @@ total_epochs = 12 ...@@ -192,6 +204,6 @@ total_epochs = 12
dist_params = dict(backend='nccl', port=29501) dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO' log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x' work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = None load_from = './pretrain_mmdet/faster_r50_fpn_detectron2-caffe_freezeBN_l1-loss_roialign-v2_3x-4767dd8e.pth' # noqa
resume_from = None resume_from = None
workflow = [('train', 1)] workflow = [('train', 1)]
# model settings
norm_cfg = dict(type='BN', requires_grad=False)
model = dict(
type='FasterRCNN',
pretrained='open-mmlab://regnetx_3.2gf',
backbone=dict(
type='RegNet',
arch='regnetx_3.2gf',
out_indices=(0, 1, 2, 3),
frozen_stages=1,
base_channels=32,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch'),
neck=dict(
type='FPN',
in_channels=[96, 192, 432, 1008],
out_channels=256,
num_outs=5),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_generator=dict(
type='AnchorGenerator',
scales=[8],
ratios=[0.5, 1.0, 2.0],
strides=[4, 8, 16, 32, 64]),
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0]),
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
roi_head=dict(
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=7, sample_num=0),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=dict(
type='Shared2FCBBoxHead',
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=10,
bbox_coder=dict(
type='DeltaXYWHBBoxCoder',
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2]),
reg_class_agnostic=False,
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=-1,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
# following the setting of detectron,
# which improves ~0.2 bbox mAP.
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
pos_weight=-1,
debug=False))
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.05, nms=dict(type='nms', iou_thr=0.5), max_per_img=100)
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/nuscenes/'
classes = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle', 'bicycle',
'motorcycle', 'pedestrian', 'traffic_cone', 'barrier')
img_norm_cfg = dict(
# The mean and std is used in PyCls when training RegNets
mean=[103.53, 116.28, 123.675],
std=[57.375, 57.12, 58.395],
to_rgb=False)
file_client_args = dict(
backend='petrel',
path_mapping=dict({
'./data/nuscenes/': 's3://nuscenes/nuscenes/',
'data/nuscenes/': 's3://nuscenes/nuscenes/'
}))
train_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='LoadAnnotations',
with_bbox=True,
with_mask=False,
file_client_args=file_client_args),
dict(
type='Resize',
img_scale=(1280, 720),
ratio_range=(0.75, 1.25),
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(
type='MultiScaleFlipAug',
img_scale=(1280, 720),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_train.coco.json',
pipeline=train_pipeline),
val=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline),
test=dict(
type=dataset_type,
classes=classes,
ann_file=data_root + 'nuscenes_infos_val.coco.json',
pipeline=test_pipeline))
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 1000,
step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
evaluation = dict(interval=1)
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl', port=29501)
log_level = 'INFO'
work_dir = './work_dirs/faster_rcnn_r50_fpn_1x'
load_from = './pretrain_mmdet/mask_rcnn_regnetx-3GF_fpn_mstrain_3x_coco_box-AP-43.1_mask-AP-38.7-e003695a.pth' # noqa
resume_from = None
workflow = [('train', 1)]
...@@ -15,8 +15,8 @@ model = dict( ...@@ -15,8 +15,8 @@ model = dict(
), ),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='HardVFE', type='HardVFE',
num_input_features=4, in_channels=4,
num_filters=[64, 64], feat_channels=[64, 64],
with_distance=False, with_distance=False,
voxel_size=voxel_size, voxel_size=voxel_size,
with_cluster_center=True, with_cluster_center=True,
...@@ -85,9 +85,7 @@ model = dict( ...@@ -85,9 +85,7 @@ model = dict(
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)))
),
)
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
pts=dict( pts=dict(
...@@ -138,10 +136,23 @@ db_sampler = dict( ...@@ -138,10 +136,23 @@ db_sampler = dict(
trailer=4, trailer=4,
truck=4, truck=4,
)) ))
file_client_args = dict(backend='disk')
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10), type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True), dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
...@@ -156,8 +167,15 @@ train_pipeline = [ ...@@ -156,8 +167,15 @@ train_pipeline = [
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
] ]
test_pipeline = [ test_pipeline = [
dict(type='LoadPointsFromFile', load_dim=5, use_dim=5), dict(
dict(type='LoadPointsFromMultiSweeps', sweeps_num=10), type='LoadPointsFromFile',
load_dim=5,
use_dim=5,
file_client_args=file_client_args),
dict(
type='LoadPointsFromMultiSweeps',
sweeps_num=10,
file_client_args=file_client_args),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='RandomFlip3D', flip_ratio=0), dict(type='RandomFlip3D', flip_ratio=0),
dict( dict(
......
...@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100): ...@@ -466,8 +466,8 @@ def get_frustum(bbox_image, C, near_clip=0.001, far_clip=100):
def surface_equ_3d(polygon_surfaces): def surface_equ_3d(polygon_surfaces):
# return [a, b, c], d in ax+by+cz+d=0 # return [a, b, c], d in ax+by+cz+d=0
# polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3] # polygon_surfaces: [num_polygon, num_surfaces, num_points_of_polygon, 3]
surface_vec = polygon_surfaces[:, :, :2, :] - polygon_surfaces[:, :, surface_vec = polygon_surfaces[:, :, :2, :] - \
1:3, :] polygon_surfaces[:, :, 1:3, :]
# normal_vec: [..., 3] # normal_vec: [..., 3]
normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :]) normal_vec = np.cross(surface_vec[:, :, 0, :], surface_vec[:, :, 1, :])
# print(normal_vec.shape, points[..., 0, :].shape) # print(normal_vec.shape, points[..., 0, :].shape)
......
...@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module): ...@@ -121,8 +121,7 @@ class PartA2BboxHead(nn.Module):
3, 3,
padding=1, padding=1,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
indice_key=f'rcnn_down0', indice_key='rcnn_down0'))
conv_type='SubMConv3d'))
merge_conv_channel_last = channel merge_conv_channel_last = channel
down_conv_channel_last = merge_conv_channel_last down_conv_channel_last = merge_conv_channel_last
...@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module): ...@@ -135,8 +134,7 @@ class PartA2BboxHead(nn.Module):
3, 3,
padding=1, padding=1,
norm_cfg=norm_cfg, norm_cfg=norm_cfg,
indice_key=f'rcnn_down1', indice_key='rcnn_down1'))
conv_type='SubMConv3d'))
down_conv_channel_last = channel down_conv_channel_last = channel
self.conv_down.add_module('merge_conv', self.conv_down.add_module('merge_conv',
......
from .pillar_encoder import AlignedPillarFeatureNet, PillarFeatureNet from .pillar_encoder import PillarFeatureNet
from .voxel_encoder import (DynamicVFE, VoxelFeatureExtractor, from .voxel_encoder import DynamicSimpleVFE, DynamicVFE, HardSimpleVFE, HardVFE
VoxelFeatureExtractorV2, VoxelFeatureExtractorV3)
__all__ = [ __all__ = [
'PillarFeatureNet', 'AlignedPillarFeatureNet', 'VoxelFeatureExtractor', 'PillarFeatureNet', 'HardVFE', 'DynamicVFE', 'HardSimpleVFE',
'DynamicVFE', 'VoxelFeatureExtractorV2', 'VoxelFeatureExtractorV3' 'DynamicSimpleVFE'
] ]
...@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator ...@@ -9,55 +9,54 @@ from .utils import PFNLayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class PillarFeatureNet(nn.Module): class PillarFeatureNet(nn.Module):
"""Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
in_channels (int). Number of input features,
either x, y, z or x, y, z, r.
feat_channels (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float]). Point cloud range, only
utilizes x and y min.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
use_norm=True, feat_channels=(64, ),
num_filters=(64, ),
with_distance=False, with_distance=False,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
voxel_size=(0.2, 0.2, 4), voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1), point_cloud_range=(0, -40, -3, 70.4, 40, 1),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
mode='max'): mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int). Number of input features,
either x, y, z or x, y, z, r.
use_norm (bool). Whether to include BatchNorm.
num_filters (list[int]). Number of features in each of the
N PFNLayers.
with_distance (bool). Whether to include Euclidean distance
to points.
voxel_size (list[float]). Size of voxels, only utilize x and y
size.
point_cloud_range (list[float>]). Point cloud range, only
utilize x and y min.
"""
super(PillarFeatureNet, self).__init__() super(PillarFeatureNet, self).__init__()
assert len(num_filters) > 0 assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 2 in_channels += 2
if with_distance: if with_distance:
num_input_features += 1 in_channels += 1
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers # Create PillarFeatureNet layers
self.num_input_features = num_input_features self.in_channels = in_channels
num_filters = [num_input_features] + list(num_filters) feat_channels = [in_channels] + list(feat_channels)
pfn_layers = [] pfn_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i < len(num_filters) - 2: if i < len(feat_channels) - 2:
last_layer = False last_layer = False
else: else:
last_layer = True last_layer = True
...@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module): ...@@ -65,7 +64,7 @@ class PillarFeatureNet(nn.Module):
PFNLayer( PFNLayer(
in_filters, in_filters,
out_filters, out_filters,
use_norm, norm_cfg=norm_cfg,
last_layer=last_layer, last_layer=last_layer,
mode=mode)) mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers) self.pfn_layers = nn.ModuleList(pfn_layers)
...@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module): ...@@ -122,9 +121,8 @@ class PillarFeatureNet(nn.Module):
class DynamicPillarFeatureNet(PillarFeatureNet): class DynamicPillarFeatureNet(PillarFeatureNet):
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
use_norm=True, feat_channels=(64, ),
num_filters=(64, ),
with_distance=False, with_distance=False,
with_cluster_center=True, with_cluster_center=True,
with_voxel_center=True, with_voxel_center=True,
...@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -138,23 +136,23 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
""" """
super(DynamicPillarFeatureNet, self).__init__( super(DynamicPillarFeatureNet, self).__init__(
num_input_features, in_channels,
use_norm, feat_channels,
num_filters,
with_distance, with_distance,
with_cluster_center=with_cluster_center, with_cluster_center=with_cluster_center,
with_voxel_center=with_voxel_center, with_voxel_center=with_voxel_center,
voxel_size=voxel_size, voxel_size=voxel_size,
point_cloud_range=point_cloud_range, point_cloud_range=point_cloud_range,
norm_cfg=norm_cfg,
mode=mode) mode=mode)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
pfn_layers = [] pfn_layers = []
# TODO: currently only support one PFNLayer # TODO: currently only support one PFNLayer
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
...@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet): ...@@ -235,145 +233,3 @@ class DynamicPillarFeatureNet(PillarFeatureNet):
features = torch.cat([point_feats, feat_per_point], dim=1) features = torch.cat([point_feats, feat_per_point], dim=1)
return voxel_feats, voxel_coors return voxel_feats, voxel_coors
@VOXEL_ENCODERS.register_module()
class AlignedPillarFeatureNet(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=(64, ),
with_distance=False,
with_cluster_center=True,
with_voxel_center=True,
voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1),
mode='max'):
""" Pillar Feature Net.
The network prepares the pillar features and performs forward pass
through PFNLayers.
Args:
num_input_features (int): Number of input features, either x, y, z
or x, y, z, r.
use_norm (bool): Whether to include BatchNorm.
num_filters (list[int]): Number of features in each of the N
PFNLayers.
with_distance (bool): Whether to include Euclidean distance to
points.
voxel_size (list[float]): Size of voxels, only utilize x and y
size.
point_cloud_range: (list[float]): Point cloud range, only
utilize x and y min.
"""
super(AlignedPillarFeatureNet, self).__init__()
assert len(num_filters) > 0
if with_cluster_center:
print('Use cluster center')
num_input_features += 3
if with_voxel_center:
print('Use voxel center')
num_input_features += 2
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center
# Create PillarFeatureNet layers
num_filters = [num_input_features] + list(num_filters)
pfn_layers = []
for i in range(len(num_filters) - 1):
in_filters = num_filters[i]
out_filters = num_filters[i + 1]
if i < len(num_filters) - 2:
last_layer = False
else:
last_layer = True
pfn_layers.append(
PFNLayer(
in_filters,
out_filters,
use_norm,
last_layer=last_layer,
mode=mode))
self.pfn_layers = nn.ModuleList(pfn_layers)
# Need pillar (voxel) size and x/y offset in order to
# calculate pillar offset
self.vx = voxel_size[0]
self.vy = voxel_size[1]
self.vz = voxel_size[2]
self.x_offset = self.vx / 2 + point_cloud_range[0]
self.y_offset = self.vy / 2 + point_cloud_range[1]
self.z_offset = self.vz / 2 + point_cloud_range[2]
def forward(self, features, num_points, coors):
features_ls = [features]
# Find distance of x, y, and z from cluster center
if self._with_cluster_center:
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_points.type_as(features).view(
-1, 1, 1)
f_cluster = features[:, :, :3] - points_mean
features_ls.append(f_cluster)
x_distance = features[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
y_distance = features[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
z_distance = features[:, :, 2] - (
coors[:, 1].type_as(features).unsqueeze(1) * self.vz +
self.z_offset)
normed_x_distance = 1 - torch.abs(x_distance / self.vx)
normed_y_distance = 1 - torch.abs(y_distance / self.vy)
normed_z_distance = 1 - torch.abs(z_distance / self.vz)
x_mask = torch.gt(normed_x_distance, 0).type_as(features)
y_mask = torch.gt(normed_y_distance, 0).type_as(features)
z_mask = torch.gt(normed_z_distance, 0).type_as(features)
nonzero_points_mask = x_mask.mul(y_mask).mul(z_mask)
aligned_distance = normed_x_distance.mul(normed_y_distance).mul(
normed_z_distance).mul(nonzero_points_mask)
# Find distance of x, y, and z from pillar center
if self._with_voxel_center:
f_center = features[:, :, :2]
f_center[:, :, 0] = f_center[:, :, 0] - (
coors[:, 3].type_as(features).unsqueeze(1) * self.vx +
self.x_offset)
f_center[:, :, 1] = f_center[:, :, 1] - (
coors[:, 2].type_as(features).unsqueeze(1) * self.vy +
self.y_offset)
features_ls.append(f_center)
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features_ls.append(points_dist)
# Combine together feature decorations
features = torch.cat(features_ls, dim=-1)
# The feature decorations were calculated without regard to
# whether pillar was empty. Need to ensure that
# empty pillars remain set to zeros.
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_points, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
features *= mask
for pfn in self.pfn_layers:
if pfn.last_vfe:
features = pfn(features, aligned_distance)
else:
features = pfn(features)
return features.squeeze()
...@@ -4,28 +4,15 @@ from torch import nn ...@@ -4,28 +4,15 @@ from torch import nn
from torch.nn import functional as F from torch.nn import functional as F
class Empty(nn.Module):
def __init__(self, *args, **kwargs):
super(Empty, self).__init__()
def forward(self, *args, **kwargs):
if len(args) == 1:
return args[0]
elif len(args) == 0:
return None
return args
def get_paddings_indicator(actual_num, max_num, axis=0): def get_paddings_indicator(actual_num, max_num, axis=0):
"""Create boolean mask by actually number of a padded tensor. """Create boolean mask by actually number of a padded tensor.
Args: Args:
actual_num ([type]): [description] actual_num (torch.Tensor): Actual number of points in each voxel.
max_num ([type]): [description] max_num (int): Max number of points in each voxel
Returns: Returns:
[type]: [description] torch.Tensor: Mask indicates which points are valid inside a voxel.
""" """
actual_num = torch.unsqueeze(actual_num, axis + 1) actual_num = torch.unsqueeze(actual_num, axis + 1)
# tiled_actual_num: [N, M, 1] # tiled_actual_num: [N, M, 1]
...@@ -52,13 +39,9 @@ class VFELayer(nn.Module): ...@@ -52,13 +39,9 @@ class VFELayer(nn.Module):
self.cat_max = cat_max self.cat_max = cat_max
self.max_out = max_out self.max_out = max_out
# self.units = int(out_channels / 2) # self.units = int(out_channels / 2)
if norm_cfg:
norm_name, norm_layer = build_norm_layer(norm_cfg, out_channels) self.norm = build_norm_layer(norm_cfg, out_channels)[1]
self.norm = norm_layer self.linear = nn.Linear(in_channels, out_channels, bias=False)
self.linear = nn.Linear(in_channels, out_channels, bias=False)
else:
self.norm = Empty(out_channels)
self.linear = nn.Linear(in_channels, out_channels, bias=True)
def forward(self, inputs): def forward(self, inputs):
# [K, T, 7] tensordot [7, units] = [K, T, units] # [K, T, 7] tensordot [7, units] = [K, T, units]
...@@ -89,7 +72,7 @@ class PFNLayer(nn.Module): ...@@ -89,7 +72,7 @@ class PFNLayer(nn.Module):
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channels, out_channels,
use_norm=True, norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
last_layer=False, last_layer=False,
mode='max'): mode='max'):
""" Pillar Feature Net Layer. """ Pillar Feature Net Layer.
...@@ -100,9 +83,11 @@ class PFNLayer(nn.Module): ...@@ -100,9 +83,11 @@ class PFNLayer(nn.Module):
Args: Args:
in_channels (int): Number of input channels. in_channels (int): Number of input channels.
out_channels (int): Number of output channels. out_channels (int): Number of output channels.
use_norm (bool): Whether to include BatchNorm. norm_cfg (dict): Config dict of normalization layers
last_layer (bool): If last_layer, there is no concatenation of last_layer (bool): If last_layer, there is no concatenation of
features. features.
mode (str): Pooling model to gather features inside voxels.
Default to 'max'.
""" """
super().__init__() super().__init__()
...@@ -112,13 +97,10 @@ class PFNLayer(nn.Module): ...@@ -112,13 +97,10 @@ class PFNLayer(nn.Module):
out_channels = out_channels // 2 out_channels = out_channels // 2
self.units = out_channels self.units = out_channels
if use_norm: self.norm = build_norm_layer(norm_cfg, self.units)[1]
self.norm = nn.BatchNorm1d(self.units, eps=1e-3, momentum=0.01) self.linear = nn.Linear(in_channels, self.units, bias=False)
self.linear = nn.Linear(in_channels, self.units, bias=False)
else:
self.norm = Empty(self.unints)
self.linear = nn.Linear(in_channels, self.units, bias=True)
assert mode in ['max', 'avg']
self.mode = mode self.mode = mode
def forward(self, inputs, num_voxels=None, aligned_distance=None): def forward(self, inputs, num_voxels=None, aligned_distance=None):
......
import torch import torch
from mmcv.cnn import build_norm_layer from mmcv.cnn import build_norm_layer
from torch import nn from torch import nn
from torch.nn import functional as F
from mmdet3d.ops import DynamicScatter from mmdet3d.ops import DynamicScatter
from .. import builder from .. import builder
from ..registry import VOXEL_ENCODERS from ..registry import VOXEL_ENCODERS
from .utils import Empty, VFELayer, get_paddings_indicator from .utils import VFELayer, get_paddings_indicator
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractor(nn.Module): class HardSimpleVFE(nn.Module):
"""Simple voxel feature encoder used in SECOND
def __init__(self, It simply averages the values of points in a voxel.
num_input_features=4, """
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractor, self).__init__()
self.name = name
assert len(num_filters) == 2
num_input_features += 3 # add mean features
if with_distance:
num_input_features += 1
self._with_distance = with_distance
self.vfe1 = VFELayer(num_input_features, num_filters[0], use_norm)
self.vfe2 = VFELayer(num_filters[0], num_filters[1], use_norm)
if use_norm:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=False)
self.norm = nn.BatchNorm1d(num_filters[1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(num_filters[1], num_filters[1], bias=True)
self.norm = Empty(num_filters[1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
# t = time.time()
# torch.cuda.synchronize()
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
# mask = features.max(dim=2, keepdim=True)[0] != 0
# torch.cuda.synchronize()
# print("vfe prep forward time", time.time() - t)
x = self.vfe1(features)
x *= mask
x = self.vfe2(x)
x *= mask
x = self.linear(x)
x = self.norm(x.permute(0, 2, 1).contiguous()).permute(0, 2,
1).contiguous()
x = F.relu(x)
x *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(x, dim=1)[0]
return voxelwise
def __init__(self):
@VOXEL_ENCODERS.register_module() super(HardSimpleVFE, self).__init__()
class VoxelFeatureExtractorV2(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV2, self).__init__()
self.name = name
assert len(num_filters) > 0
num_input_features += 3
if with_distance:
num_input_features += 1
self._with_distance = with_distance
num_filters = [num_input_features] + num_filters
filters_pairs = [[num_filters[i], num_filters[i + 1]]
for i in range(len(num_filters) - 1)]
self.vfe_layers = nn.ModuleList(
[VFELayer(i, o, use_norm) for i, o in filters_pairs])
if use_norm:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=False)
self.norm = nn.BatchNorm1d(
num_filters[-1], eps=1e-3, momentum=0.01)
else:
self.linear = nn.Linear(
num_filters[-1], num_filters[-1], bias=True)
self.norm = Empty(num_filters[-1])
def forward(self, features, num_voxels, **kwargs):
# features: [concated_num_points, num_voxel_size, 3(4)]
# num_voxels: [concated_num_points]
points_mean = features[:, :, :3].sum(
dim=1, keepdim=True) / num_voxels.type_as(features).view(-1, 1, 1)
features_relative = features[:, :, :3] - points_mean
if self._with_distance:
points_dist = torch.norm(features[:, :, :3], 2, 2, keepdim=True)
features = torch.cat([features, features_relative, points_dist],
dim=-1)
else:
features = torch.cat([features, features_relative], dim=-1)
voxel_count = features.shape[1]
mask = get_paddings_indicator(num_voxels, voxel_count, axis=0)
mask = torch.unsqueeze(mask, -1).type_as(features)
for vfe in self.vfe_layers:
features = vfe(features)
features *= mask
features = self.linear(features)
features = self.norm(features.permute(0, 2, 1).contiguous()).permute(
0, 2, 1).contiguous()
features = F.relu(features)
features *= mask
# x: [concated_num_points, num_voxel_size, 128]
voxelwise = torch.max(features, dim=1)[0]
return voxelwise
@VOXEL_ENCODERS.register_module()
class VoxelFeatureExtractorV3(nn.Module):
def __init__(self,
num_input_features=4,
use_norm=True,
num_filters=[32, 128],
with_distance=False,
name='VoxelFeatureExtractor'):
super(VoxelFeatureExtractorV3, self).__init__()
self.name = name
def forward(self, features, num_points, coors): def forward(self, features, num_points, coors):
# features: [concated_num_points, num_voxel_size, 3(4)] # features: [concated_num_points, num_voxel_size, 3(4)]
...@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module): ...@@ -153,13 +27,21 @@ class VoxelFeatureExtractorV3(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class DynamicVFEV3(nn.Module): class DynamicSimpleVFE(nn.Module):
"""Simple dynamic voxel feature encoder used in DV-SECOND
It simply averages the values of points in a voxel.
But the number of points in a voxel is dynamic and varies.
Args:
voxel_size (tupe[float]): Size of a single voxel
point_cloud_range (tuple[float]): Range of the point cloud and voxels
"""
def __init__(self, def __init__(self,
num_input_features=4,
voxel_size=(0.2, 0.2, 4), voxel_size=(0.2, 0.2, 4),
point_cloud_range=(0, -40, -3, 70.4, 40, 1)): point_cloud_range=(0, -40, -3, 70.4, 40, 1)):
super(DynamicVFEV3, self).__init__() super(DynamicSimpleVFE, self).__init__()
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
@torch.no_grad() @torch.no_grad()
...@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module): ...@@ -172,10 +54,37 @@ class DynamicVFEV3(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class DynamicVFE(nn.Module): class DynamicVFE(nn.Module):
"""Dynamic Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
The number of points inside the voxel varies.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
num_filters=[], feat_channels=[],
with_distance=False, with_distance=False,
with_cluster_center=False, with_cluster_center=False,
with_voxel_center=False, with_voxel_center=False,
...@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module): ...@@ -186,14 +95,15 @@ class DynamicVFE(nn.Module):
fusion_layer=None, fusion_layer=None,
return_point_feats=False): return_point_feats=False):
super(DynamicVFE, self).__init__() super(DynamicVFE, self).__init__()
assert len(num_filters) > 0 assert mode in ['avg', 'max']
assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 3 in_channels += 3
if with_distance: if with_distance:
num_input_features += 3 in_channels += 3
self.num_input_features = num_input_features self.in_channels = in_channels
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
...@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module): ...@@ -209,11 +119,11 @@ class DynamicVFE(nn.Module):
self.point_cloud_range = point_cloud_range self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = [] vfe_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
...@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module): ...@@ -232,6 +142,16 @@ class DynamicVFE(nn.Module):
self.fusion_layer = builder.build_fusion_layer(fusion_layer) self.fusion_layer = builder.build_fusion_layer(fusion_layer)
def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors): def map_voxel_center_to_point(self, pts_coors, voxel_mean, voxel_coors):
"""Map voxel features to its corresponding points.
Args:
pts_coors (torch.Tensor): Voxel coordinate of each point.
voxel_mean (torch.Tensor): Voxel features to be mapped.
voxel_coors (torch.Tensor): Coordinates of valid voxels
Returns:
torch.Tensor: Features or centers of each point.
"""
# Step 1: scatter voxel into canvas # Step 1: scatter voxel into canvas
# Calculate necessary things for canvas creation # Calculate necessary things for canvas creation
canvas_z = int( canvas_z = int(
...@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module): ...@@ -269,9 +189,21 @@ class DynamicVFE(nn.Module):
points=None, points=None,
img_feats=None, img_feats=None,
img_meta=None): img_meta=None):
""" """Forward functions
features (torch.Tensor): NxC
coors (torch.Tensor): Nx(1+NDim) Args:
features (torch.Tensor): Features of voxels, shape is NxC.
coors (torch.Tensor): Coordinates of voxels, shape is Nx(1+NDim).
points (list[torch.Tensor], optional): Raw points used to guide the
multi-modality fusion. Defaults to None.
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
""" """
features_ls = [features] features_ls = [features]
# Find distance of x, y, and z from cluster center # Find distance of x, y, and z from cluster center
...@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module): ...@@ -320,10 +252,36 @@ class DynamicVFE(nn.Module):
@VOXEL_ENCODERS.register_module() @VOXEL_ENCODERS.register_module()
class HardVFE(nn.Module): class HardVFE(nn.Module):
"""Voxel feature encoder used in DV-SECOND
It encodes features of voxels and their points. It could also fuse
image feature into voxel features in a point-wise manner.
Args:
in_channels (int): Input channels of VFE. Defaults to 4.
feat_channels (list(int)): Channels of features in VFE.
with_distance (bool): Whether to use the L2 distance of points to the
origin point. Default False.
with_cluster_center (bool): Whether to use the distance to cluster
center of points inside a voxel. Default to False.
with_voxel_center (bool): Whether to use the distance to center of
voxel for each points inside a voxel. Default to False.
voxel_size (tuple[float]): Size of a single voxel. Default to
(0.2, 0.2, 4).
point_cloud_range (tuple[float]): The range of points or voxels.
Default to (0, -40, -3, 70.4, 40, 1).
norm_cfg (dict): Config dict of normalization layers.
mode (str): The mode when pooling features of points inside a voxel.
Available options include 'max' and 'avg'. Default to 'max'.
fusion_layer (dict | None): The config dict of fusion layer used in
multi-modal detectors. Default to None.
return_point_feats (bool): Whether to return the features of each
points. Default to False.
"""
def __init__(self, def __init__(self,
num_input_features=4, in_channels=4,
num_filters=[], feat_channels=[],
with_distance=False, with_distance=False,
with_cluster_center=False, with_cluster_center=False,
with_voxel_center=False, with_voxel_center=False,
...@@ -334,14 +292,14 @@ class HardVFE(nn.Module): ...@@ -334,14 +292,14 @@ class HardVFE(nn.Module):
fusion_layer=None, fusion_layer=None,
return_point_feats=False): return_point_feats=False):
super(HardVFE, self).__init__() super(HardVFE, self).__init__()
assert len(num_filters) > 0 assert len(feat_channels) > 0
if with_cluster_center: if with_cluster_center:
num_input_features += 3 in_channels += 3
if with_voxel_center: if with_voxel_center:
num_input_features += 3 in_channels += 3
if with_distance: if with_distance:
num_input_features += 3 in_channels += 3
self.num_input_features = num_input_features self.in_channels = in_channels
self._with_distance = with_distance self._with_distance = with_distance
self._with_cluster_center = with_cluster_center self._with_cluster_center = with_cluster_center
self._with_voxel_center = with_voxel_center self._with_voxel_center = with_voxel_center
...@@ -357,16 +315,16 @@ class HardVFE(nn.Module): ...@@ -357,16 +315,16 @@ class HardVFE(nn.Module):
self.point_cloud_range = point_cloud_range self.point_cloud_range = point_cloud_range
self.scatter = DynamicScatter(voxel_size, point_cloud_range, True) self.scatter = DynamicScatter(voxel_size, point_cloud_range, True)
num_filters = [self.num_input_features] + list(num_filters) feat_channels = [self.in_channels] + list(feat_channels)
vfe_layers = [] vfe_layers = []
for i in range(len(num_filters) - 1): for i in range(len(feat_channels) - 1):
in_filters = num_filters[i] in_filters = feat_channels[i]
out_filters = num_filters[i + 1] out_filters = feat_channels[i + 1]
if i > 0: if i > 0:
in_filters *= 2 in_filters *= 2
# TODO: pass norm_cfg to VFE # TODO: pass norm_cfg to VFE
# norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters) # norm_name, norm_layer = build_norm_layer(norm_cfg, out_filters)
if i == (len(num_filters) - 2): if i == (len(feat_channels) - 2):
cat_max = False cat_max = False
max_out = True max_out = True
if fusion_layer: if fusion_layer:
...@@ -394,9 +352,20 @@ class HardVFE(nn.Module): ...@@ -394,9 +352,20 @@ class HardVFE(nn.Module):
coors, coors,
img_feats=None, img_feats=None,
img_meta=None): img_meta=None):
""" """Forward functions
features (torch.Tensor): NxMxC
coors (torch.Tensor): Nx(1+NDim) Args:
features (torch.Tensor): Features of voxels, shape is MxNxC.
num_points (torch.Tensor): Number of points in each voxel.
coors (torch.Tensor): Coordinates of voxels, shape is Mx(1+NDim).
img_feats (list[torch.Tensor], optional): Image fetures used for
multi-modality fusion. Defaults to None.
img_meta (dict, optional): [description]. Defaults to None.
Returns:
tuple: If `return_point_feats` is False, returns voxel features and
its coordinates. If `return_point_feats` is True, returns
feature of each points inside voxels.
""" """
features_ls = [features] features_ls = [features]
# Find distance of x, y, and z from cluster center # Find distance of x, y, and z from cluster center
...@@ -438,19 +407,29 @@ class HardVFE(nn.Module): ...@@ -438,19 +407,29 @@ class HardVFE(nn.Module):
for i, vfe in enumerate(self.vfe_layers): for i, vfe in enumerate(self.vfe_layers):
voxel_feats = vfe(voxel_feats) voxel_feats = vfe(voxel_feats)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
if (self.fusion_layer is not None and img_feats is not None): if (self.fusion_layer is not None and img_feats is not None):
voxel_feats = self.fusion_with_mask(features, mask, voxel_feats, voxel_feats = self.fusion_with_mask(features, mask, voxel_feats,
coors, img_feats, img_meta) coors, img_feats, img_meta)
if torch.isnan(voxel_feats).any():
import pdb
pdb.set_trace()
return voxel_feats return voxel_feats
def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats, def fusion_with_mask(self, features, mask, voxel_feats, coors, img_feats,
img_meta): img_meta):
"""Fuse image and point features with mask.
Args:
features (torch.Tensor): Features of voxel, usually it is the
values of points in voxels.
mask (torch.Tensor): Mask indicates valid features in each voxel.
voxel_feats (torch.Tensor): Features of voxels.
coors (torch.Tensor): Coordinates of each single voxel.
img_feats (list[torch.Tensor]): Multi-scale feature maps of image.
img_meta (list(dict)): Meta information of image and points.
Returns:
torch.Tensor: Fused features of each voxel.
"""
# the features is consist of a batch of points # the features is consist of a batch of points
batch_size = coors[-1, 0] + 1 batch_size = coors[-1, 0] + 1
points = [] points = []
...@@ -459,20 +438,13 @@ class HardVFE(nn.Module): ...@@ -459,20 +438,13 @@ class HardVFE(nn.Module):
points.append(features[single_mask][mask[single_mask]]) points.append(features[single_mask][mask[single_mask]])
point_feats = voxel_feats[mask] point_feats = voxel_feats[mask]
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
point_feats = self.fusion_layer(img_feats, points, point_feats, point_feats = self.fusion_layer(img_feats, points, point_feats,
img_meta) img_meta)
if torch.isnan(point_feats).any():
import pdb
pdb.set_trace()
voxel_canvas = voxel_feats.new_zeros( voxel_canvas = voxel_feats.new_zeros(
size=(voxel_feats.size(0), voxel_feats.size(1), size=(voxel_feats.size(0), voxel_feats.size(1),
point_feats.size(-1))) point_feats.size(-1)))
voxel_canvas[mask] = point_feats voxel_canvas[mask] = point_feats
out = torch.max(voxel_canvas, dim=1)[0] out = torch.max(voxel_canvas, dim=1)[0]
if torch.isnan(out).any():
import pdb
pdb.set_trace()
return out return out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment