Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
a991105c
Unverified
Commit
a991105c
authored
Jan 05, 2022
by
Shaoshuai Shi
Committed by
GitHub
Jan 05, 2022
Browse files
Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2
Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2
parents
1483517a
b6fbf07f
Changes
26
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1317 additions
and
1 deletion
+1317
-1
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp
+203
-0
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu
+486
-0
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h
+71
-0
setup.py
setup.py
+3
-1
tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml
tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml
+277
-0
tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml
tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml
+277
-0
No files found.
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp
0 → 100644
View file @
a991105c
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#include <torch/serialize/tensor.h>
#include <vector>
#include <THC/THC.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "vector_pool_gpu.h"
extern
THCState
*
state
;
#define CHECK_CUDA(x) do { \
if (!x.type().is_cuda()) { \
fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_CONTIGUOUS(x) do { \
if (!x.is_contiguous()) { \
fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
int
query_stacked_local_neighbor_idxs_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
xyz_batch_cnt_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_batch_cnt_tensor
,
at
::
Tensor
stack_neighbor_idxs_tensor
,
at
::
Tensor
start_len_tensor
,
at
::
Tensor
cumsum_tensor
,
int
avg_length_of_neighbor_idxs
,
float
max_neighbour_distance
,
int
nsample
,
int
neighbor_type
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
CHECK_INPUT
(
support_xyz_tensor
);
CHECK_INPUT
(
xyz_batch_cnt_tensor
);
CHECK_INPUT
(
new_xyz_tensor
);
CHECK_INPUT
(
new_xyz_batch_cnt_tensor
);
CHECK_INPUT
(
stack_neighbor_idxs_tensor
);
CHECK_INPUT
(
start_len_tensor
);
CHECK_INPUT
(
cumsum_tensor
);
const
float
*
support_xyz
=
support_xyz_tensor
.
data
<
float
>
();
const
int
*
xyz_batch_cnt
=
xyz_batch_cnt_tensor
.
data
<
int
>
();
const
float
*
new_xyz
=
new_xyz_tensor
.
data
<
float
>
();
const
int
*
new_xyz_batch_cnt
=
new_xyz_batch_cnt_tensor
.
data
<
int
>
();
int
*
stack_neighbor_idxs
=
stack_neighbor_idxs_tensor
.
data
<
int
>
();
int
*
start_len
=
start_len_tensor
.
data
<
int
>
();
int
*
cumsum
=
cumsum_tensor
.
data
<
int
>
();
int
batch_size
=
xyz_batch_cnt_tensor
.
size
(
0
);
int
M
=
new_xyz_tensor
.
size
(
0
);
query_stacked_local_neighbor_idxs_kernel_launcher_stack
(
support_xyz
,
xyz_batch_cnt
,
new_xyz
,
new_xyz_batch_cnt
,
stack_neighbor_idxs
,
start_len
,
cumsum
,
avg_length_of_neighbor_idxs
,
max_neighbour_distance
,
batch_size
,
M
,
nsample
,
neighbor_type
);
return
0
;
}
int
query_three_nn_by_stacked_local_idxs_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_grid_centers_tensor
,
at
::
Tensor
new_xyz_grid_idxs_tensor
,
at
::
Tensor
new_xyz_grid_dist2_tensor
,
at
::
Tensor
stack_neighbor_idxs_tensor
,
at
::
Tensor
start_len_tensor
,
int
M
,
int
num_total_grids
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
CHECK_INPUT
(
support_xyz_tensor
);
CHECK_INPUT
(
new_xyz_tensor
);
CHECK_INPUT
(
new_xyz_grid_centers_tensor
);
CHECK_INPUT
(
new_xyz_grid_idxs_tensor
);
CHECK_INPUT
(
new_xyz_grid_dist2_tensor
);
CHECK_INPUT
(
stack_neighbor_idxs_tensor
);
CHECK_INPUT
(
start_len_tensor
);
const
float
*
support_xyz
=
support_xyz_tensor
.
data
<
float
>
();
const
float
*
new_xyz
=
new_xyz_tensor
.
data
<
float
>
();
const
float
*
new_xyz_grid_centers
=
new_xyz_grid_centers_tensor
.
data
<
float
>
();
int
*
new_xyz_grid_idxs
=
new_xyz_grid_idxs_tensor
.
data
<
int
>
();
float
*
new_xyz_grid_dist2
=
new_xyz_grid_dist2_tensor
.
data
<
float
>
();
int
*
stack_neighbor_idxs
=
stack_neighbor_idxs_tensor
.
data
<
int
>
();
int
*
start_len
=
start_len_tensor
.
data
<
int
>
();
query_three_nn_by_stacked_local_idxs_kernel_launcher_stack
(
support_xyz
,
new_xyz
,
new_xyz_grid_centers
,
new_xyz_grid_idxs
,
new_xyz_grid_dist2
,
stack_neighbor_idxs
,
start_len
,
M
,
num_total_grids
);
return
0
;
}
int
vector_pool_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
xyz_batch_cnt_tensor
,
at
::
Tensor
support_features_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_batch_cnt_tensor
,
at
::
Tensor
new_features_tensor
,
at
::
Tensor
new_local_xyz_tensor
,
at
::
Tensor
point_cnt_of_grid_tensor
,
at
::
Tensor
grouped_idxs_tensor
,
int
num_grid_x
,
int
num_grid_y
,
int
num_grid_z
,
float
max_neighbour_distance
,
int
use_xyz
,
int
num_max_sum_points
,
int
nsample
,
int
neighbor_type
,
int
pooling_type
){
// support_xyz_tensor: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features_tensor: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz_tensor: (M1 + M2 ..., 3) centers of new positions
// new_features_tensor: (M1 + M2 ..., C)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs_tensor: (num_max_sum_points, 3)
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// use_xyz: whether to calculate new_local_xyz
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
CHECK_INPUT
(
support_xyz_tensor
);
CHECK_INPUT
(
support_features_tensor
);
CHECK_INPUT
(
xyz_batch_cnt_tensor
);
CHECK_INPUT
(
new_xyz_tensor
);
CHECK_INPUT
(
new_xyz_batch_cnt_tensor
);
CHECK_INPUT
(
new_features_tensor
);
CHECK_INPUT
(
new_local_xyz_tensor
);
CHECK_INPUT
(
point_cnt_of_grid_tensor
);
CHECK_INPUT
(
grouped_idxs_tensor
);
const
float
*
support_xyz
=
support_xyz_tensor
.
data
<
float
>
();
const
float
*
support_features
=
support_features_tensor
.
data
<
float
>
();
const
int
*
xyz_batch_cnt
=
xyz_batch_cnt_tensor
.
data
<
int
>
();
const
float
*
new_xyz
=
new_xyz_tensor
.
data
<
float
>
();
const
int
*
new_xyz_batch_cnt
=
new_xyz_batch_cnt_tensor
.
data
<
int
>
();
float
*
new_features
=
new_features_tensor
.
data
<
float
>
();
float
*
new_local_xyz
=
new_local_xyz_tensor
.
data
<
float
>
();
int
*
point_cnt_of_grid
=
point_cnt_of_grid_tensor
.
data
<
int
>
();
int
*
grouped_idxs
=
grouped_idxs_tensor
.
data
<
int
>
();
int
N
=
support_xyz_tensor
.
size
(
0
);
int
batch_size
=
xyz_batch_cnt_tensor
.
size
(
0
);
int
M
=
new_xyz_tensor
.
size
(
0
);
int
num_c_out
=
new_features_tensor
.
size
(
1
);
int
num_c_in
=
support_features_tensor
.
size
(
1
);
int
num_total_grids
=
point_cnt_of_grid_tensor
.
size
(
1
);
int
cum_sum
=
vector_pool_kernel_launcher_stack
(
support_xyz
,
support_features
,
xyz_batch_cnt
,
new_xyz
,
new_features
,
new_local_xyz
,
new_xyz_batch_cnt
,
point_cnt_of_grid
,
grouped_idxs
,
num_grid_x
,
num_grid_y
,
num_grid_z
,
max_neighbour_distance
,
batch_size
,
N
,
M
,
num_c_in
,
num_c_out
,
num_total_grids
,
use_xyz
,
num_max_sum_points
,
nsample
,
neighbor_type
,
pooling_type
);
return
cum_sum
;
}
int
vector_pool_grad_wrapper_stack
(
at
::
Tensor
grad_new_features_tensor
,
at
::
Tensor
point_cnt_of_grid_tensor
,
at
::
Tensor
grouped_idxs_tensor
,
at
::
Tensor
grad_support_features_tensor
)
{
// grad_new_features_tensor: (M1 + M2 ..., C_out)
// point_cnt_of_grid_tensor: (M1 + M2 ..., num_total_grids)
// grouped_idxs_tensor: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features_tensor: (N1 + N2 ..., C_in)
CHECK_INPUT
(
grad_new_features_tensor
);
CHECK_INPUT
(
point_cnt_of_grid_tensor
);
CHECK_INPUT
(
grouped_idxs_tensor
);
CHECK_INPUT
(
grad_support_features_tensor
);
int
M
=
grad_new_features_tensor
.
size
(
0
);
int
num_c_out
=
grad_new_features_tensor
.
size
(
1
);
int
N
=
grad_support_features_tensor
.
size
(
0
);
int
num_c_in
=
grad_support_features_tensor
.
size
(
1
);
int
num_total_grids
=
point_cnt_of_grid_tensor
.
size
(
1
);
int
num_max_sum_points
=
grouped_idxs_tensor
.
size
(
0
);
const
float
*
grad_new_features
=
grad_new_features_tensor
.
data
<
float
>
();
const
int
*
point_cnt_of_grid
=
point_cnt_of_grid_tensor
.
data
<
int
>
();
const
int
*
grouped_idxs
=
grouped_idxs_tensor
.
data
<
int
>
();
float
*
grad_support_features
=
grad_support_features_tensor
.
data
<
float
>
();
vector_pool_grad_kernel_launcher_stack
(
grad_new_features
,
point_cnt_of_grid
,
grouped_idxs
,
grad_support_features
,
N
,
M
,
num_c_out
,
num_c_in
,
num_total_grids
,
num_max_sum_points
);
return
1
;
}
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu
0 → 100644
View file @
a991105c
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "vector_pool_gpu.h"
#include "cuda_utils.h"
__global__
void
query_three_nn_by_stacked_local_idxs_kernel
(
const
float
*
support_xyz
,
const
float
*
new_xyz
,
const
float
*
new_xyz_grid_centers
,
int
*
new_xyz_grid_idxs
,
float
*
new_xyz_grid_dist2
,
const
int
*
stack_neighbor_idxs
,
const
int
*
start_len
,
int
M
,
int
num_total_grids
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
int
grid_idx
=
blockIdx
.
y
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
pt_idx
>=
M
||
grid_idx
>=
num_total_grids
)
return
;
new_xyz
+=
pt_idx
*
3
;
new_xyz_grid_centers
+=
pt_idx
*
num_total_grids
*
3
+
grid_idx
*
3
;
new_xyz_grid_idxs
+=
pt_idx
*
num_total_grids
*
3
+
grid_idx
*
3
;
new_xyz_grid_dist2
+=
pt_idx
*
num_total_grids
*
3
+
grid_idx
*
3
;
start_len
+=
pt_idx
*
2
;
stack_neighbor_idxs
+=
start_len
[
0
];
int
neighbor_length
=
start_len
[
1
];
float
center_x
=
new_xyz_grid_centers
[
0
];
float
center_y
=
new_xyz_grid_centers
[
1
];
float
center_z
=
new_xyz_grid_centers
[
2
];
double
best1
=
1e40
,
best2
=
1e40
,
best3
=
1e40
;
int
besti1
=
-
1
,
besti2
=
-
1
,
besti3
=
-
1
;
for
(
int
k
=
0
;
k
<
neighbor_length
;
k
++
){
int
cur_neighbor_idx
=
stack_neighbor_idxs
[
k
];
float
x
=
support_xyz
[
cur_neighbor_idx
*
3
+
0
];
float
y
=
support_xyz
[
cur_neighbor_idx
*
3
+
1
];
float
z
=
support_xyz
[
cur_neighbor_idx
*
3
+
2
];
float
d
=
(
center_x
-
x
)
*
(
center_x
-
x
)
+
(
center_y
-
y
)
*
(
center_y
-
y
)
+
(
center_z
-
z
)
*
(
center_z
-
z
);
if
(
d
<
best1
)
{
best3
=
best2
;
besti3
=
besti2
;
best2
=
best1
;
besti2
=
besti1
;
best1
=
d
;
besti1
=
cur_neighbor_idx
;
}
else
if
(
d
<
best2
)
{
best3
=
best2
;
besti3
=
besti2
;
best2
=
d
;
besti2
=
cur_neighbor_idx
;
}
else
if
(
d
<
best3
)
{
best3
=
d
;
besti3
=
cur_neighbor_idx
;
}
}
if
(
besti2
==
-
1
){
besti2
=
besti1
;
best2
=
best1
;
}
if
(
besti3
==
-
1
){
besti3
=
besti1
;
best3
=
best1
;
}
new_xyz_grid_dist2
[
0
]
=
best1
;
new_xyz_grid_dist2
[
1
]
=
best2
;
new_xyz_grid_dist2
[
2
]
=
best3
;
new_xyz_grid_idxs
[
0
]
=
besti1
;
new_xyz_grid_idxs
[
1
]
=
besti2
;
new_xyz_grid_idxs
[
2
]
=
besti3
;
}
int
query_three_nn_by_stacked_local_idxs_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
float
*
new_xyz
,
const
float
*
new_xyz_grid_centers
,
int
*
new_xyz_grid_idxs
,
float
*
new_xyz_grid_dist2
,
const
int
*
stack_neighbor_idxs
,
const
int
*
start_len
,
int
M
,
int
num_total_grids
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
M
,
THREADS_PER_BLOCK
),
num_total_grids
);
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
);
query_three_nn_by_stacked_local_idxs_kernel
<<<
blocks
,
threads
>>>
(
support_xyz
,
new_xyz
,
new_xyz_grid_centers
,
new_xyz_grid_idxs
,
new_xyz_grid_dist2
,
stack_neighbor_idxs
,
start_len
,
M
,
num_total_grids
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
0
;
}
__global__
void
query_stacked_local_neighbor_idxs_kernel
(
const
float
*
support_xyz
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
*
stack_neighbor_idxs
,
int
*
start_len
,
int
*
cumsum
,
int
avg_length_of_neighbor_idxs
,
float
max_neighbour_distance
,
int
batch_size
,
int
M
,
int
nsample
,
int
neighbor_type
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
// cumsum: (1), max offset of current data in stack_neighbor_idxs
// max_neighbour_distance: float
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
pt_idx
>=
M
)
return
;
int
bs_idx
=
0
,
pt_cnt
=
new_xyz_batch_cnt
[
0
];
for
(
int
k
=
1
;
k
<
batch_size
;
k
++
){
if
(
pt_idx
<
pt_cnt
)
break
;
pt_cnt
+=
new_xyz_batch_cnt
[
k
];
bs_idx
=
k
;
}
int
xyz_batch_start_idx
=
0
;
for
(
int
k
=
0
;
k
<
bs_idx
;
k
++
)
xyz_batch_start_idx
+=
xyz_batch_cnt
[
k
];
support_xyz
+=
xyz_batch_start_idx
*
3
;
new_xyz
+=
pt_idx
*
3
;
start_len
+=
pt_idx
*
2
;
float
new_x
=
new_xyz
[
0
];
float
new_y
=
new_xyz
[
1
];
float
new_z
=
new_xyz
[
2
];
int
n
=
xyz_batch_cnt
[
bs_idx
];
float
local_x
,
local_y
,
local_z
;
float
radius2
=
max_neighbour_distance
*
max_neighbour_distance
;
int
temp_idxs
[
1000
];
int
sample_cnt
=
0
;
for
(
int
k
=
0
;
k
<
n
;
++
k
)
{
local_x
=
support_xyz
[
k
*
3
+
0
]
-
new_x
;
local_y
=
support_xyz
[
k
*
3
+
1
]
-
new_y
;
local_z
=
support_xyz
[
k
*
3
+
2
]
-
new_z
;
if
(
neighbor_type
==
1
){
// ball
if
(
local_x
*
local_x
+
local_y
*
local_y
+
local_z
*
local_z
>
radius2
){
continue
;
}
}
else
{
// voxel
if
((
fabs
(
local_x
)
>
max_neighbour_distance
)
|
(
fabs
(
local_y
)
>
max_neighbour_distance
)
|
(
fabs
(
local_z
)
>
max_neighbour_distance
)){
continue
;
}
}
if
(
sample_cnt
<
1000
){
temp_idxs
[
sample_cnt
]
=
k
;
}
else
{
break
;
}
sample_cnt
++
;
if
(
nsample
>
0
&&
sample_cnt
>=
nsample
)
break
;
}
start_len
[
0
]
=
atomicAdd
(
cumsum
,
sample_cnt
);
start_len
[
1
]
=
sample_cnt
;
int
max_thresh
=
avg_length_of_neighbor_idxs
*
M
;
if
(
start_len
[
0
]
>=
max_thresh
)
return
;
stack_neighbor_idxs
+=
start_len
[
0
];
if
(
start_len
[
0
]
+
sample_cnt
>=
max_thresh
)
sample_cnt
=
max_thresh
-
start_len
[
0
];
for
(
int
k
=
0
;
k
<
sample_cnt
;
k
++
){
stack_neighbor_idxs
[
k
]
=
temp_idxs
[
k
]
+
xyz_batch_start_idx
;
}
}
int
query_stacked_local_neighbor_idxs_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
*
stack_neighbor_idxs
,
int
*
start_len
,
int
*
cumsum
,
int
avg_length_of_neighbor_idxs
,
float
max_neighbour_distance
,
int
batch_size
,
int
M
,
int
nsample
,
int
neighbor_type
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
// cumsum: (1), max offset of current data in stack_neighbor_idxs
// max_neighbour_distance: float
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
M
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
);
query_stacked_local_neighbor_idxs_kernel
<<<
blocks
,
threads
>>>
(
support_xyz
,
xyz_batch_cnt
,
new_xyz
,
new_xyz_batch_cnt
,
stack_neighbor_idxs
,
start_len
,
cumsum
,
avg_length_of_neighbor_idxs
,
max_neighbour_distance
,
batch_size
,
M
,
nsample
,
neighbor_type
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
0
;
}
__global__
void
vector_pool_kernel_stack
(
const
float
*
support_xyz
,
const
float
*
support_features
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
float
*
new_features
,
float
*
new_local_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
num_grid_x
,
int
num_grid_y
,
int
num_grid_z
,
float
max_neighbour_distance
,
int
batch_size
,
int
M
,
int
num_c_in
,
int
num_c_out
,
int
num_c_each_grid
,
int
num_total_grids
,
int
*
point_cnt_of_grid
,
int
*
grouped_idxs
,
int
use_xyz
,
float
grid_size_x
,
float
grid_size_y
,
float
grid_size_z
,
int
*
cum_sum
,
int
num_max_sum_points
,
int
nsample
,
int
neighbor_type
,
int
pooling_type
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_features: (M1 + M2 ..., C), C = num_total_grids * num_c_each_grid
// new_local_xyz: (M1 + M2 ..., 3 * num_total_grids)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// use_xyz: whether to calculate new_local_xyz
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
pt_idx
>=
M
)
return
;
int
bs_idx
=
0
,
pt_cnt
=
new_xyz_batch_cnt
[
0
];
for
(
int
k
=
1
;
k
<
batch_size
;
k
++
){
if
(
pt_idx
<
pt_cnt
)
break
;
pt_cnt
+=
new_xyz_batch_cnt
[
k
];
bs_idx
=
k
;
}
int
xyz_batch_start_idx
=
0
;
for
(
int
k
=
0
;
k
<
bs_idx
;
k
++
)
xyz_batch_start_idx
+=
xyz_batch_cnt
[
k
];
support_xyz
+=
xyz_batch_start_idx
*
3
;
support_features
+=
xyz_batch_start_idx
*
num_c_in
;
new_xyz
+=
pt_idx
*
3
;
new_features
+=
pt_idx
*
num_c_out
;
point_cnt_of_grid
+=
pt_idx
*
num_total_grids
;
new_local_xyz
+=
pt_idx
*
3
*
num_total_grids
;
float
new_x
=
new_xyz
[
0
];
float
new_y
=
new_xyz
[
1
];
float
new_z
=
new_xyz
[
2
];
int
n
=
xyz_batch_cnt
[
bs_idx
],
grid_idx_x
,
grid_idx_y
,
grid_idx_z
,
grid_idx
;
float
local_x
,
local_y
,
local_z
;
float
radius2
=
max_neighbour_distance
*
max_neighbour_distance
;
int
sample_cnt
=
0
;
for
(
int
k
=
0
;
k
<
n
;
++
k
)
{
local_x
=
support_xyz
[
k
*
3
+
0
]
-
new_x
;
local_y
=
support_xyz
[
k
*
3
+
1
]
-
new_y
;
local_z
=
support_xyz
[
k
*
3
+
2
]
-
new_z
;
if
(
neighbor_type
==
1
){
// ball
if
(
local_x
*
local_x
+
local_y
*
local_y
+
local_z
*
local_z
>
radius2
){
continue
;
}
}
else
{
// voxel
if
((
fabs
(
local_x
)
>
max_neighbour_distance
)
|
(
fabs
(
local_y
)
>
max_neighbour_distance
)
|
(
fabs
(
local_z
)
>
max_neighbour_distance
)){
continue
;
}
}
grid_idx_x
=
floorf
((
local_x
+
max_neighbour_distance
)
/
grid_size_x
);
grid_idx_y
=
floorf
((
local_y
+
max_neighbour_distance
)
/
grid_size_y
);
grid_idx_z
=
floorf
((
local_z
+
max_neighbour_distance
)
/
grid_size_z
);
grid_idx
=
grid_idx_x
*
num_grid_y
*
num_grid_z
+
grid_idx_y
*
num_grid_z
+
grid_idx_z
;
grid_idx
=
min
(
max
(
grid_idx
,
0
),
num_total_grids
-
1
);
if
(
pooling_type
==
0
){
// avg pooling
point_cnt_of_grid
[
grid_idx
]
++
;
for
(
int
i
=
0
;
i
<
num_c_in
;
i
++
){
new_features
[
grid_idx
*
num_c_each_grid
+
i
%
num_c_each_grid
]
+=
support_features
[
k
*
num_c_in
+
i
];
}
if
(
use_xyz
){
new_local_xyz
[
grid_idx
*
3
+
0
]
+=
local_x
;
new_local_xyz
[
grid_idx
*
3
+
1
]
+=
local_y
;
new_local_xyz
[
grid_idx
*
3
+
2
]
+=
local_z
;
}
int
cnt
=
atomicAdd
(
cum_sum
,
1
);
if
(
cnt
>=
num_max_sum_points
)
continue
;
// continue to statistics the max number of points
grouped_idxs
[
cnt
*
3
+
0
]
=
xyz_batch_start_idx
+
k
;
grouped_idxs
[
cnt
*
3
+
1
]
=
pt_idx
;
grouped_idxs
[
cnt
*
3
+
2
]
=
grid_idx
;
sample_cnt
++
;
if
(
nsample
>
0
&&
sample_cnt
>=
nsample
)
break
;
}
else
if
(
pooling_type
==
1
){
// random choose one within sub-voxel
// printf("new_xyz=(%.2f, %.2f, %.2f, ), find neighbor k=%d: support_xyz=(%.2f, %.2f, %.2f), local_xyz=(%.2f, %.2f, %.2f), neighbor=%.2f, grid_idx=%d, point_cnt_of_grid_idx=%d\n",
// new_x, new_y, new_z, k, support_xyz[k * 3 + 0], support_xyz[k * 3 + 1], support_xyz[k * 3 + 2], local_x, local_y, local_z, max_neighbour_distance, grid_idx, point_cnt_of_grid[grid_idx]);
if
(
point_cnt_of_grid
[
grid_idx
]
==
0
){
point_cnt_of_grid
[
grid_idx
]
++
;
for
(
int
i
=
0
;
i
<
num_c_in
;
i
++
){
new_features
[
grid_idx
*
num_c_each_grid
+
i
%
num_c_each_grid
]
=
support_features
[
k
*
num_c_in
+
i
];
}
if
(
use_xyz
){
new_local_xyz
[
grid_idx
*
3
+
0
]
=
local_x
;
new_local_xyz
[
grid_idx
*
3
+
1
]
=
local_y
;
new_local_xyz
[
grid_idx
*
3
+
2
]
=
local_z
;
}
int
cnt
=
atomicAdd
(
cum_sum
,
1
);
if
(
cnt
>=
num_max_sum_points
)
continue
;
// continue to statistics the max number of points
grouped_idxs
[
cnt
*
3
+
0
]
=
xyz_batch_start_idx
+
k
;
grouped_idxs
[
cnt
*
3
+
1
]
=
pt_idx
;
grouped_idxs
[
cnt
*
3
+
2
]
=
grid_idx
;
sample_cnt
++
;
if
(
nsample
>
0
&&
sample_cnt
>=
nsample
||
sample_cnt
>=
num_total_grids
)
break
;
}
}
}
}
int
vector_pool_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
float
*
support_features
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
float
*
new_features
,
float
*
new_local_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
*
point_cnt_of_grid
,
int
*
grouped_idxs
,
int
num_grid_x
,
int
num_grid_y
,
int
num_grid_z
,
float
max_neighbour_distance
,
int
batch_size
,
int
N
,
int
M
,
int
num_c_in
,
int
num_c_out
,
int
num_total_grids
,
int
use_xyz
,
int
num_max_sum_points
,
int
nsample
,
int
neighbor_type
,
int
pooling_type
){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_features: (M1 + M2 ..., C)
// new_local_xyz: (M1 + M2 ..., 3)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// use_xyz: whether to calculate new_local_xyz
// grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
cudaError_t
err
;
int
num_c_each_grid
=
num_c_out
/
num_total_grids
;
float
grid_size_x
=
max_neighbour_distance
*
2
/
num_grid_x
;
float
grid_size_y
=
max_neighbour_distance
*
2
/
num_grid_y
;
float
grid_size_z
=
max_neighbour_distance
*
2
/
num_grid_z
;
dim3
blocks
(
DIVUP
(
M
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
);
int
cum_sum
=
0
;
int
*
p_cum_sum
;
cudaMalloc
((
void
**
)
&
p_cum_sum
,
sizeof
(
int
));
cudaMemcpy
(
p_cum_sum
,
&
cum_sum
,
sizeof
(
int
),
cudaMemcpyHostToDevice
);
vector_pool_kernel_stack
<<<
blocks
,
threads
>>>
(
support_xyz
,
support_features
,
xyz_batch_cnt
,
new_xyz
,
new_features
,
new_local_xyz
,
new_xyz_batch_cnt
,
num_grid_x
,
num_grid_y
,
num_grid_z
,
max_neighbour_distance
,
batch_size
,
M
,
num_c_in
,
num_c_out
,
num_c_each_grid
,
num_total_grids
,
point_cnt_of_grid
,
grouped_idxs
,
use_xyz
,
grid_size_x
,
grid_size_y
,
grid_size_z
,
p_cum_sum
,
num_max_sum_points
,
nsample
,
neighbor_type
,
pooling_type
);
cudaMemcpy
(
&
cum_sum
,
p_cum_sum
,
sizeof
(
int
),
cudaMemcpyDeviceToHost
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
cum_sum
;
}
__global__
void
vector_pool_grad_kernel_stack
(
const
float
*
grad_new_features
,
const
int
*
point_cnt_of_grid
,
const
int
*
grouped_idxs
,
float
*
grad_support_features
,
int
N
,
int
M
,
int
num_c_out
,
int
num_c_in
,
int
num_c_each_grid
,
int
num_total_grids
,
int
num_max_sum_points
){
// grad_new_features: (M1 + M2 ..., C_out)
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features: (N1 + N2 ..., C_in)
int
channel_idx
=
blockIdx
.
y
;
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
>=
num_max_sum_points
||
channel_idx
>=
num_c_in
)
return
;
int
idx_of_support_xyz
=
grouped_idxs
[
index
*
3
+
0
];
int
idx_of_new_xyz
=
grouped_idxs
[
index
*
3
+
1
];
int
idx_of_grid_idx
=
grouped_idxs
[
index
*
3
+
2
];
int
num_total_pts
=
point_cnt_of_grid
[
idx_of_new_xyz
*
num_total_grids
+
idx_of_grid_idx
];
grad_support_features
+=
idx_of_support_xyz
*
num_c_in
+
channel_idx
;
grad_new_features
+=
idx_of_new_xyz
*
num_c_out
+
idx_of_grid_idx
*
num_c_each_grid
;
int
channel_idx_of_cin
=
channel_idx
%
num_c_each_grid
;
float
cur_grad
=
1
/
fmaxf
(
float
(
num_total_pts
),
1.0
);
atomicAdd
(
grad_support_features
,
grad_new_features
[
channel_idx_of_cin
]
*
cur_grad
);
}
void
vector_pool_grad_kernel_launcher_stack
(
const
float
*
grad_new_features
,
const
int
*
point_cnt_of_grid
,
const
int
*
grouped_idxs
,
float
*
grad_support_features
,
int
N
,
int
M
,
int
num_c_out
,
int
num_c_in
,
int
num_total_grids
,
int
num_max_sum_points
){
// grad_new_features: (M1 + M2 ..., C_out)
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features: (N1 + N2 ..., C_in)
int
num_c_each_grid
=
num_c_out
/
num_total_grids
;
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
num_max_sum_points
,
THREADS_PER_BLOCK
),
num_c_in
);
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
);
vector_pool_grad_kernel_stack
<<<
blocks
,
threads
>>>
(
grad_new_features
,
point_cnt_of_grid
,
grouped_idxs
,
grad_support_features
,
N
,
M
,
num_c_out
,
num_c_in
,
num_c_each_grid
,
num_total_grids
,
num_max_sum_points
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
}
\ No newline at end of file
pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h
0 → 100644
View file @
a991105c
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#ifndef _STACK_VECTOR_POOL_GPU_H
#define _STACK_VECTOR_POOL_GPU_H
#include <torch/serialize/tensor.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
int
query_stacked_local_neighbor_idxs_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
*
stack_neighbor_idxs
,
int
*
start_len
,
int
*
cumsum
,
int
avg_length_of_neighbor_idxs
,
float
max_neighbour_distance
,
int
batch_size
,
int
M
,
int
nsample
,
int
neighbor_type
);
int
query_stacked_local_neighbor_idxs_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
xyz_batch_cnt_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_batch_cnt_tensor
,
at
::
Tensor
stack_neighbor_idxs_tensor
,
at
::
Tensor
start_len_tensor
,
at
::
Tensor
cumsum_tensor
,
int
avg_length_of_neighbor_idxs
,
float
max_neighbour_distance
,
int
nsample
,
int
neighbor_type
);
int
query_three_nn_by_stacked_local_idxs_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
float
*
new_xyz
,
const
float
*
new_xyz_grid_centers
,
int
*
new_xyz_grid_idxs
,
float
*
new_xyz_grid_dist2
,
const
int
*
stack_neighbor_idxs
,
const
int
*
start_len
,
int
M
,
int
num_total_grids
);
int
query_three_nn_by_stacked_local_idxs_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_grid_centers_tensor
,
at
::
Tensor
new_xyz_grid_idxs_tensor
,
at
::
Tensor
new_xyz_grid_dist2_tensor
,
at
::
Tensor
stack_neighbor_idxs_tensor
,
at
::
Tensor
start_len_tensor
,
int
M
,
int
num_total_grids
);
int
vector_pool_wrapper_stack
(
at
::
Tensor
support_xyz_tensor
,
at
::
Tensor
xyz_batch_cnt_tensor
,
at
::
Tensor
support_features_tensor
,
at
::
Tensor
new_xyz_tensor
,
at
::
Tensor
new_xyz_batch_cnt_tensor
,
at
::
Tensor
new_features_tensor
,
at
::
Tensor
new_local_xyz
,
at
::
Tensor
point_cnt_of_grid_tensor
,
at
::
Tensor
grouped_idxs_tensor
,
int
num_grid_x
,
int
num_grid_y
,
int
num_grid_z
,
float
max_neighbour_distance
,
int
use_xyz
,
int
num_max_sum_points
,
int
nsample
,
int
neighbor_type
,
int
pooling_type
);
int
vector_pool_kernel_launcher_stack
(
const
float
*
support_xyz
,
const
float
*
support_features
,
const
int
*
xyz_batch_cnt
,
const
float
*
new_xyz
,
float
*
new_features
,
float
*
new_local_xyz
,
const
int
*
new_xyz_batch_cnt
,
int
*
point_cnt_of_grid
,
int
*
grouped_idxs
,
int
num_grid_x
,
int
num_grid_y
,
int
num_grid_z
,
float
max_neighbour_distance
,
int
batch_size
,
int
N
,
int
M
,
int
num_c_in
,
int
num_c_out
,
int
num_total_grids
,
int
use_xyz
,
int
num_max_sum_points
,
int
nsample
,
int
neighbor_type
,
int
pooling_type
);
int
vector_pool_grad_wrapper_stack
(
at
::
Tensor
grad_new_features_tensor
,
at
::
Tensor
point_cnt_of_grid_tensor
,
at
::
Tensor
grouped_idxs_tensor
,
at
::
Tensor
grad_support_features_tensor
);
void
vector_pool_grad_kernel_launcher_stack
(
const
float
*
grad_new_features
,
const
int
*
point_cnt_of_grid
,
const
int
*
grouped_idxs
,
float
*
grad_support_features
,
int
N
,
int
M
,
int
num_c_out
,
int
num_c_in
,
int
num_total_grids
,
int
num_max_sum_points
);
#endif
setup.py
View file @
a991105c
...
...
@@ -28,7 +28,7 @@ def write_version_to_file(version, target_file):
if
__name__
==
'__main__'
:
version
=
'0.5.
1
+%s'
%
get_git_commit_number
()
version
=
'0.5.
2
+%s'
%
get_git_commit_number
()
write_version_to_file
(
version
,
'pcdet/version.py'
)
setup
(
...
...
@@ -97,6 +97,8 @@ if __name__ == '__main__':
'src/interpolate_gpu.cu'
,
'src/voxel_query.cpp'
,
'src/voxel_query_gpu.cu'
,
'src/vector_pool.cpp'
,
'src/vector_pool_gpu.cu'
],
),
make_cuda_ext
(
...
...
tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml
0 → 100644
View file @
a991105c
CLASS_NAMES
:
[
'
Vehicle'
,
'
Pedestrian'
,
'
Cyclist'
]
DATA_CONFIG
:
_BASE_CONFIG_
:
cfgs/dataset_configs/waymo_dataset.yaml
MODEL
:
NAME
:
PVRCNNPlusPlus
VFE
:
NAME
:
MeanVFE
BACKBONE_3D
:
NAME
:
VoxelBackBone8x
MAP_TO_BEV
:
NAME
:
HeightCompression
NUM_BEV_FEATURES
:
256
BACKBONE_2D
:
NAME
:
BaseBEVBackbone
LAYER_NUMS
:
[
5
,
5
]
LAYER_STRIDES
:
[
1
,
2
]
NUM_FILTERS
:
[
128
,
256
]
UPSAMPLE_STRIDES
:
[
1
,
2
]
NUM_UPSAMPLE_FILTERS
:
[
256
,
256
]
DENSE_HEAD
:
NAME
:
CenterHead
CLASS_AGNOSTIC
:
False
CLASS_NAMES_EACH_HEAD
:
[
[
'
Vehicle'
,
'
Pedestrian'
,
'
Cyclist'
]
]
SHARED_CONV_CHANNEL
:
64
USE_BIAS_BEFORE_NORM
:
True
NUM_HM_CONV
:
2
SEPARATE_HEAD_CFG
:
HEAD_ORDER
:
[
'
center'
,
'
center_z'
,
'
dim'
,
'
rot'
]
HEAD_DICT
:
{
'
center'
:
{
'
out_channels'
:
2
,
'
num_conv'
:
2
},
'
center_z'
:
{
'
out_channels'
:
1
,
'
num_conv'
:
2
},
'
dim'
:
{
'
out_channels'
:
3
,
'
num_conv'
:
2
},
'
rot'
:
{
'
out_channels'
:
2
,
'
num_conv'
:
2
},
}
TARGET_ASSIGNER_CONFIG
:
FEATURE_MAP_STRIDE
:
8
NUM_MAX_OBJS
:
500
GAUSSIAN_OVERLAP
:
0.1
MIN_RADIUS
:
2
LOSS_CONFIG
:
LOSS_WEIGHTS
:
{
'
cls_weight'
:
1.0
,
'
loc_weight'
:
2.0
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
POST_PROCESSING
:
SCORE_THRESH
:
0.1
POST_CENTER_LIMIT_RANGE
:
[
-75.2
,
-75.2
,
-2
,
75.2
,
75.2
,
4
]
MAX_OBJ_PER_SAMPLE
:
500
NMS_CONFIG
:
NMS_TYPE
:
nms_gpu
NMS_THRESH
:
0.7
NMS_PRE_MAXSIZE
:
4096
NMS_POST_MAXSIZE
:
500
PFE
:
NAME
:
VoxelSetAbstraction
POINT_SOURCE
:
raw_points
NUM_KEYPOINTS
:
4096
NUM_OUTPUT_FEATURES
:
90
SAMPLE_METHOD
:
SPC
SPC_SAMPLING
:
NUM_SECTORS
:
6
SAMPLE_RADIUS_WITH_ROI
:
1.6
FEATURES_SOURCE
:
[
'
bev'
,
'
x_conv3'
,
'
x_conv4'
,
'
raw_points'
]
SA_LAYER
:
raw_points
:
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
2
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
32
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
2.4
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
2
,
2
,
2
]
MAX_NEIGHBOR_DISTANCE
:
0.2
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
32
,
32
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
0.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
32
,
32
]
x_conv3
:
DOWNSAMPLE_FACTOR
:
4
INPUT_CHANNELS
:
64
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
32
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
4.0
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
1.2
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
2.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
x_conv4
:
DOWNSAMPLE_FACTOR
:
8
INPUT_CHANNELS
:
64
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
32
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
6.4
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
2.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
4.8
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
POINT_HEAD
:
NAME
:
PointHeadSimple
CLS_FC
:
[
256
,
256
]
CLASS_AGNOSTIC
:
True
USE_POINT_FEATURES_BEFORE_FUSION
:
True
TARGET_CONFIG
:
GT_EXTRA_WIDTH
:
[
0.2
,
0.2
,
0.2
]
LOSS_CONFIG
:
LOSS_REG
:
smooth-l1
LOSS_WEIGHTS
:
{
'
point_cls_weight'
:
1.0
,
}
ROI_HEAD
:
NAME
:
PVRCNNHead
CLASS_AGNOSTIC
:
True
SHARED_FC
:
[
256
,
256
]
CLS_FC
:
[
256
,
256
]
REG_FC
:
[
256
,
256
]
DP_RATIO
:
0.3
NMS_CONFIG
:
TRAIN
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
NMS_PRE_MAXSIZE
:
9000
NMS_POST_MAXSIZE
:
512
NMS_THRESH
:
0.8
TEST
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
NMS_PRE_MAXSIZE
:
1024
NMS_POST_MAXSIZE
:
100
NMS_THRESH
:
0.7
SCORE_THRESH
:
0.1
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 500
# NMS_THRESH: 0.85
ROI_GRID_POOL
:
GRID_SIZE
:
6
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
voxel_random_choice
NUM_REDUCED_CHANNELS
:
30
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
0.8
NEIGHBOR_NSAMPLE
:
32
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
1.6
NEIGHBOR_NSAMPLE
:
32
POST_MLPS
:
[
64
,
64
]
TARGET_CONFIG
:
BOX_CODER
:
ResidualCoder
ROI_PER_IMAGE
:
128
FG_RATIO
:
0.5
SAMPLE_ROI_BY_EACH_CLASS
:
True
CLS_SCORE_TYPE
:
roi_iou
CLS_FG_THRESH
:
0.75
CLS_BG_THRESH
:
0.25
CLS_BG_THRESH_LO
:
0.1
HARD_BG_RATIO
:
0.8
REG_FG_THRESH
:
0.55
LOSS_CONFIG
:
CLS_LOSS
:
BinaryCrossEntropy
REG_LOSS
:
smooth-l1
CORNER_LOSS_REGULARIZATION
:
True
LOSS_WEIGHTS
:
{
'
rcnn_cls_weight'
:
1.0
,
'
rcnn_reg_weight'
:
1.0
,
'
rcnn_corner_weight'
:
1.0
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
POST_PROCESSING
:
RECALL_THRESH_LIST
:
[
0.3
,
0.5
,
0.7
]
SCORE_THRESH
:
0.1
OUTPUT_RAW_SCORE
:
False
EVAL_METRIC
:
waymo
NMS_CONFIG
:
MULTI_CLASSES_NMS
:
False
NMS_TYPE
:
nms_gpu
NMS_THRESH
:
0.7
NMS_PRE_MAXSIZE
:
4096
NMS_POST_MAXSIZE
:
500
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
2
NUM_EPOCHS
:
30
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.001
MOMENTUM
:
0.9
MOMS
:
[
0.95
,
0.85
]
PCT_START
:
0.4
DIV_FACTOR
:
10
DECAY_STEP_LIST
:
[
35
,
45
]
LR_DECAY
:
0.1
LR_CLIP
:
0.0000001
LR_WARMUP
:
False
WARMUP_EPOCH
:
1
GRAD_NORM_CLIP
:
10
\ No newline at end of file
tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml
0 → 100644
View file @
a991105c
CLASS_NAMES
:
[
'
Vehicle'
,
'
Pedestrian'
,
'
Cyclist'
]
DATA_CONFIG
:
_BASE_CONFIG_
:
cfgs/dataset_configs/waymo_dataset.yaml
MODEL
:
NAME
:
PVRCNNPlusPlus
VFE
:
NAME
:
MeanVFE
BACKBONE_3D
:
NAME
:
VoxelResBackBone8x
MAP_TO_BEV
:
NAME
:
HeightCompression
NUM_BEV_FEATURES
:
256
BACKBONE_2D
:
NAME
:
BaseBEVBackbone
LAYER_NUMS
:
[
5
,
5
]
LAYER_STRIDES
:
[
1
,
2
]
NUM_FILTERS
:
[
128
,
256
]
UPSAMPLE_STRIDES
:
[
1
,
2
]
NUM_UPSAMPLE_FILTERS
:
[
256
,
256
]
DENSE_HEAD
:
NAME
:
CenterHead
CLASS_AGNOSTIC
:
False
CLASS_NAMES_EACH_HEAD
:
[
[
'
Vehicle'
,
'
Pedestrian'
,
'
Cyclist'
]
]
SHARED_CONV_CHANNEL
:
64
USE_BIAS_BEFORE_NORM
:
True
NUM_HM_CONV
:
2
SEPARATE_HEAD_CFG
:
HEAD_ORDER
:
[
'
center'
,
'
center_z'
,
'
dim'
,
'
rot'
]
HEAD_DICT
:
{
'
center'
:
{
'
out_channels'
:
2
,
'
num_conv'
:
2
},
'
center_z'
:
{
'
out_channels'
:
1
,
'
num_conv'
:
2
},
'
dim'
:
{
'
out_channels'
:
3
,
'
num_conv'
:
2
},
'
rot'
:
{
'
out_channels'
:
2
,
'
num_conv'
:
2
},
}
TARGET_ASSIGNER_CONFIG
:
FEATURE_MAP_STRIDE
:
8
NUM_MAX_OBJS
:
500
GAUSSIAN_OVERLAP
:
0.1
MIN_RADIUS
:
2
LOSS_CONFIG
:
LOSS_WEIGHTS
:
{
'
cls_weight'
:
1.0
,
'
loc_weight'
:
2.0
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
POST_PROCESSING
:
SCORE_THRESH
:
0.1
POST_CENTER_LIMIT_RANGE
:
[
-75.2
,
-75.2
,
-2
,
75.2
,
75.2
,
4
]
MAX_OBJ_PER_SAMPLE
:
500
NMS_CONFIG
:
NMS_TYPE
:
nms_gpu
NMS_THRESH
:
0.7
NMS_PRE_MAXSIZE
:
4096
NMS_POST_MAXSIZE
:
500
PFE
:
NAME
:
VoxelSetAbstraction
POINT_SOURCE
:
raw_points
NUM_KEYPOINTS
:
4096
NUM_OUTPUT_FEATURES
:
90
SAMPLE_METHOD
:
SPC
SPC_SAMPLING
:
NUM_SECTORS
:
6
SAMPLE_RADIUS_WITH_ROI
:
1.6
FEATURES_SOURCE
:
[
'
bev'
,
'
x_conv3'
,
'
x_conv4'
,
'
raw_points'
]
SA_LAYER
:
raw_points
:
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
2
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
32
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
2.4
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
2
,
2
,
2
]
MAX_NEIGHBOR_DISTANCE
:
0.2
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
32
,
32
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
0.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
32
,
32
]
x_conv3
:
DOWNSAMPLE_FACTOR
:
4
INPUT_CHANNELS
:
64
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
32
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
4.0
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
1.2
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
2.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
x_conv4
:
DOWNSAMPLE_FACTOR
:
8
INPUT_CHANNELS
:
64
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
local_interpolation
NUM_REDUCED_CHANNELS
:
32
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
FILTER_NEIGHBOR_WITH_ROI
:
True
RADIUS_OF_NEIGHBOR_WITH_ROI
:
6.4
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
2.4
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
4.8
NEIGHBOR_NSAMPLE
:
-1
POST_MLPS
:
[
64
,
64
]
POINT_HEAD
:
NAME
:
PointHeadSimple
CLS_FC
:
[
256
,
256
]
CLASS_AGNOSTIC
:
True
USE_POINT_FEATURES_BEFORE_FUSION
:
True
TARGET_CONFIG
:
GT_EXTRA_WIDTH
:
[
0.2
,
0.2
,
0.2
]
LOSS_CONFIG
:
LOSS_REG
:
smooth-l1
LOSS_WEIGHTS
:
{
'
point_cls_weight'
:
1.0
,
}
ROI_HEAD
:
NAME
:
PVRCNNHead
CLASS_AGNOSTIC
:
True
SHARED_FC
:
[
256
,
256
]
CLS_FC
:
[
256
,
256
]
REG_FC
:
[
256
,
256
]
DP_RATIO
:
0.3
NMS_CONFIG
:
TRAIN
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
NMS_PRE_MAXSIZE
:
9000
NMS_POST_MAXSIZE
:
512
NMS_THRESH
:
0.8
TEST
:
NMS_TYPE
:
nms_gpu
MULTI_CLASSES_NMS
:
False
NMS_PRE_MAXSIZE
:
1024
NMS_POST_MAXSIZE
:
100
NMS_THRESH
:
0.7
SCORE_THRESH
:
0.1
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 500
# NMS_THRESH: 0.85
ROI_GRID_POOL
:
GRID_SIZE
:
6
NAME
:
VectorPoolAggregationModuleMSG
NUM_GROUPS
:
2
LOCAL_AGGREGATION_TYPE
:
voxel_random_choice
NUM_REDUCED_CHANNELS
:
30
NUM_CHANNELS_OF_LOCAL_AGGREGATION
:
32
MSG_POST_MLPS
:
[
128
]
GROUP_CFG_0
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
0.8
NEIGHBOR_NSAMPLE
:
32
POST_MLPS
:
[
64
,
64
]
GROUP_CFG_1
:
NUM_LOCAL_VOXEL
:
[
3
,
3
,
3
]
MAX_NEIGHBOR_DISTANCE
:
1.6
NEIGHBOR_NSAMPLE
:
32
POST_MLPS
:
[
64
,
64
]
TARGET_CONFIG
:
BOX_CODER
:
ResidualCoder
ROI_PER_IMAGE
:
128
FG_RATIO
:
0.5
SAMPLE_ROI_BY_EACH_CLASS
:
True
CLS_SCORE_TYPE
:
roi_iou
CLS_FG_THRESH
:
0.75
CLS_BG_THRESH
:
0.25
CLS_BG_THRESH_LO
:
0.1
HARD_BG_RATIO
:
0.8
REG_FG_THRESH
:
0.55
LOSS_CONFIG
:
CLS_LOSS
:
BinaryCrossEntropy
REG_LOSS
:
smooth-l1
CORNER_LOSS_REGULARIZATION
:
True
LOSS_WEIGHTS
:
{
'
rcnn_cls_weight'
:
1.0
,
'
rcnn_reg_weight'
:
1.0
,
'
rcnn_corner_weight'
:
1.0
,
'
code_weights'
:
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
]
}
POST_PROCESSING
:
RECALL_THRESH_LIST
:
[
0.3
,
0.5
,
0.7
]
SCORE_THRESH
:
0.1
OUTPUT_RAW_SCORE
:
False
EVAL_METRIC
:
waymo
NMS_CONFIG
:
MULTI_CLASSES_NMS
:
False
NMS_TYPE
:
nms_gpu
NMS_THRESH
:
0.7
NMS_PRE_MAXSIZE
:
4096
NMS_POST_MAXSIZE
:
500
OPTIMIZATION
:
BATCH_SIZE_PER_GPU
:
2
NUM_EPOCHS
:
30
OPTIMIZER
:
adam_onecycle
LR
:
0.01
WEIGHT_DECAY
:
0.001
MOMENTUM
:
0.9
MOMS
:
[
0.95
,
0.85
]
PCT_START
:
0.4
DIV_FACTOR
:
10
DECAY_STEP_LIST
:
[
35
,
45
]
LR_DECAY
:
0.1
LR_CLIP
:
0.0000001
LR_WARMUP
:
False
WARMUP_EPOCH
:
1
GRAD_NORM_CLIP
:
10
\ No newline at end of file
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment