Unverified Commit a991105c authored by Shaoshuai Shi's avatar Shaoshuai Shi Committed by GitHub
Browse files

Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2

Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2
parents 1483517a b6fbf07f
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#include <torch/serialize/tensor.h>
#include <vector>
#include <THC/THC.h>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "vector_pool_gpu.h"
extern THCState *state;
#define CHECK_CUDA(x) do { \
if (!x.type().is_cuda()) { \
fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_CONTIGUOUS(x) do { \
if (!x.is_contiguous()) { \
fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \
exit(-1); \
} \
} while (0)
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor,
int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
CHECK_INPUT(support_xyz_tensor);
CHECK_INPUT(xyz_batch_cnt_tensor);
CHECK_INPUT(new_xyz_tensor);
CHECK_INPUT(new_xyz_batch_cnt_tensor);
CHECK_INPUT(stack_neighbor_idxs_tensor);
CHECK_INPUT(start_len_tensor);
CHECK_INPUT(cumsum_tensor);
const float *support_xyz = support_xyz_tensor.data<float>();
const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data<int>();
const float *new_xyz = new_xyz_tensor.data<float>();
const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data<int>();
int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data<int>();
int *start_len = start_len_tensor.data<int>();
int *cumsum = cumsum_tensor.data<int>();
int batch_size = xyz_batch_cnt_tensor.size(0);
int M = new_xyz_tensor.size(0);
query_stacked_local_neighbor_idxs_kernel_launcher_stack(
support_xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt,
stack_neighbor_idxs, start_len, cumsum, avg_length_of_neighbor_idxs,
max_neighbour_distance, batch_size, M, nsample, neighbor_type
);
return 0;
}
int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor,
at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor,
at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor,
at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor,
int M, int num_total_grids){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
CHECK_INPUT(support_xyz_tensor);
CHECK_INPUT(new_xyz_tensor);
CHECK_INPUT(new_xyz_grid_centers_tensor);
CHECK_INPUT(new_xyz_grid_idxs_tensor);
CHECK_INPUT(new_xyz_grid_dist2_tensor);
CHECK_INPUT(stack_neighbor_idxs_tensor);
CHECK_INPUT(start_len_tensor);
const float *support_xyz = support_xyz_tensor.data<float>();
const float *new_xyz = new_xyz_tensor.data<float>();
const float *new_xyz_grid_centers = new_xyz_grid_centers_tensor.data<float>();
int *new_xyz_grid_idxs = new_xyz_grid_idxs_tensor.data<int>();
float *new_xyz_grid_dist2 = new_xyz_grid_dist2_tensor.data<float>();
int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data<int>();
int *start_len = start_len_tensor.data<int>();
query_three_nn_by_stacked_local_idxs_kernel_launcher_stack(
support_xyz, new_xyz, new_xyz_grid_centers,
new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len,
M, num_total_grids
);
return 0;
}
int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
at::Tensor new_features_tensor, at::Tensor new_local_xyz_tensor,
at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz,
int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){
// support_xyz_tensor: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features_tensor: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz_tensor: (M1 + M2 ..., 3) centers of new positions
// new_features_tensor: (M1 + M2 ..., C)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs_tensor: (num_max_sum_points, 3)
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// use_xyz: whether to calculate new_local_xyz
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
CHECK_INPUT(support_xyz_tensor);
CHECK_INPUT(support_features_tensor);
CHECK_INPUT(xyz_batch_cnt_tensor);
CHECK_INPUT(new_xyz_tensor);
CHECK_INPUT(new_xyz_batch_cnt_tensor);
CHECK_INPUT(new_features_tensor);
CHECK_INPUT(new_local_xyz_tensor);
CHECK_INPUT(point_cnt_of_grid_tensor);
CHECK_INPUT(grouped_idxs_tensor);
const float *support_xyz = support_xyz_tensor.data<float>();
const float *support_features = support_features_tensor.data<float>();
const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data<int>();
const float *new_xyz = new_xyz_tensor.data<float>();
const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data<int>();
float *new_features = new_features_tensor.data<float>();
float *new_local_xyz = new_local_xyz_tensor.data<float>();
int *point_cnt_of_grid = point_cnt_of_grid_tensor.data<int>();
int *grouped_idxs = grouped_idxs_tensor.data<int>();
int N = support_xyz_tensor.size(0);
int batch_size = xyz_batch_cnt_tensor.size(0);
int M = new_xyz_tensor.size(0);
int num_c_out = new_features_tensor.size(1);
int num_c_in = support_features_tensor.size(1);
int num_total_grids = point_cnt_of_grid_tensor.size(1);
int cum_sum = vector_pool_kernel_launcher_stack(
support_xyz, support_features, xyz_batch_cnt,
new_xyz, new_features, new_local_xyz, new_xyz_batch_cnt,
point_cnt_of_grid, grouped_idxs,
num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance,
batch_size, N, M, num_c_in, num_c_out, num_total_grids, use_xyz, num_max_sum_points, nsample, neighbor_type, pooling_type
);
return cum_sum;
}
int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor,
at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
at::Tensor grad_support_features_tensor) {
// grad_new_features_tensor: (M1 + M2 ..., C_out)
// point_cnt_of_grid_tensor: (M1 + M2 ..., num_total_grids)
// grouped_idxs_tensor: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features_tensor: (N1 + N2 ..., C_in)
CHECK_INPUT(grad_new_features_tensor);
CHECK_INPUT(point_cnt_of_grid_tensor);
CHECK_INPUT(grouped_idxs_tensor);
CHECK_INPUT(grad_support_features_tensor);
int M = grad_new_features_tensor.size(0);
int num_c_out = grad_new_features_tensor.size(1);
int N = grad_support_features_tensor.size(0);
int num_c_in = grad_support_features_tensor.size(1);
int num_total_grids = point_cnt_of_grid_tensor.size(1);
int num_max_sum_points = grouped_idxs_tensor.size(0);
const float *grad_new_features = grad_new_features_tensor.data<float>();
const int *point_cnt_of_grid = point_cnt_of_grid_tensor.data<int>();
const int *grouped_idxs = grouped_idxs_tensor.data<int>();
float *grad_support_features = grad_support_features_tensor.data<float>();
vector_pool_grad_kernel_launcher_stack(
grad_new_features, point_cnt_of_grid, grouped_idxs, grad_support_features,
N, M, num_c_out, num_c_in, num_total_grids, num_max_sum_points
);
return 1;
}
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "vector_pool_gpu.h"
#include "cuda_utils.h"
__global__ void query_three_nn_by_stacked_local_idxs_kernel(
const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers,
int *new_xyz_grid_idxs, float *new_xyz_grid_dist2,
const int *stack_neighbor_idxs, const int *start_len,
int M, int num_total_grids){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
int grid_idx = blockIdx.y;
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (pt_idx >= M || grid_idx >= num_total_grids) return;
new_xyz += pt_idx * 3;
new_xyz_grid_centers += pt_idx * num_total_grids * 3 + grid_idx * 3;
new_xyz_grid_idxs += pt_idx * num_total_grids * 3 + grid_idx * 3;
new_xyz_grid_dist2 += pt_idx * num_total_grids * 3 + grid_idx * 3;
start_len += pt_idx * 2;
stack_neighbor_idxs += start_len[0];
int neighbor_length = start_len[1];
float center_x = new_xyz_grid_centers[0];
float center_y = new_xyz_grid_centers[1];
float center_z = new_xyz_grid_centers[2];
double best1 = 1e40, best2 = 1e40, best3 = 1e40;
int besti1 = -1, besti2 = -1, besti3 = -1;
for (int k = 0; k < neighbor_length; k++){
int cur_neighbor_idx = stack_neighbor_idxs[k];
float x = support_xyz[cur_neighbor_idx * 3 + 0];
float y = support_xyz[cur_neighbor_idx * 3 + 1];
float z = support_xyz[cur_neighbor_idx * 3 + 2];
float d = (center_x - x) * (center_x - x) + (center_y - y) * (center_y - y) + (center_z - z) * (center_z - z);
if (d < best1) {
best3 = best2; besti3 = besti2;
best2 = best1; besti2 = besti1;
best1 = d; besti1 = cur_neighbor_idx;
}
else if (d < best2) {
best3 = best2; besti3 = besti2;
best2 = d; besti2 = cur_neighbor_idx;
}
else if (d < best3) {
best3 = d; besti3 = cur_neighbor_idx;
}
}
if (besti2 == -1){
besti2 = besti1; best2 = best1;
}
if (besti3 == -1){
besti3 = besti1; best3 = best1;
}
new_xyz_grid_dist2[0] = best1;
new_xyz_grid_dist2[1] = best2;
new_xyz_grid_dist2[2] = best3;
new_xyz_grid_idxs[0] = besti1;
new_xyz_grid_idxs[1] = besti2;
new_xyz_grid_idxs[2] = besti3;
}
int query_three_nn_by_stacked_local_idxs_kernel_launcher_stack(
const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers,
int *new_xyz_grid_idxs, float *new_xyz_grid_dist2,
const int *stack_neighbor_idxs, const int *start_len,
int M, int num_total_grids){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
// new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
// new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
cudaError_t err;
dim3 blocks(DIVUP(M, THREADS_PER_BLOCK), num_total_grids); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
query_three_nn_by_stacked_local_idxs_kernel<<<blocks, threads>>>(
support_xyz, new_xyz, new_xyz_grid_centers,
new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len,
M, num_total_grids
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
return 0;
}
__global__ void query_stacked_local_neighbor_idxs_kernel(
const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt,
int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs,
float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
// cumsum: (1), max offset of current data in stack_neighbor_idxs
// max_neighbour_distance: float
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (pt_idx >= M) return;
int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0];
for (int k = 1; k < batch_size; k++){
if (pt_idx < pt_cnt) break;
pt_cnt += new_xyz_batch_cnt[k];
bs_idx = k;
}
int xyz_batch_start_idx = 0;
for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k];
support_xyz += xyz_batch_start_idx * 3;
new_xyz += pt_idx * 3;
start_len += pt_idx * 2;
float new_x = new_xyz[0];
float new_y = new_xyz[1];
float new_z = new_xyz[2];
int n = xyz_batch_cnt[bs_idx];
float local_x, local_y, local_z;
float radius2 = max_neighbour_distance * max_neighbour_distance;
int temp_idxs[1000];
int sample_cnt = 0;
for (int k = 0; k < n; ++k) {
local_x = support_xyz[k * 3 + 0] - new_x;
local_y = support_xyz[k * 3 + 1] - new_y;
local_z = support_xyz[k * 3 + 2] - new_z;
if (neighbor_type == 1){
// ball
if (local_x * local_x + local_y * local_y + local_z * local_z > radius2){
continue;
}
}
else{
// voxel
if ((fabs(local_x) > max_neighbour_distance) |
(fabs(local_y) > max_neighbour_distance) |
(fabs(local_z) > max_neighbour_distance)){
continue;
}
}
if (sample_cnt < 1000){
temp_idxs[sample_cnt] = k;
}
else{
break;
}
sample_cnt++;
if (nsample > 0 && sample_cnt >= nsample) break;
}
start_len[0] = atomicAdd(cumsum, sample_cnt);
start_len[1] = sample_cnt;
int max_thresh = avg_length_of_neighbor_idxs * M;
if (start_len[0] >= max_thresh) return;
stack_neighbor_idxs += start_len[0];
if (start_len[0] + sample_cnt >= max_thresh) sample_cnt = max_thresh - start_len[0];
for (int k = 0; k < sample_cnt; k++){
stack_neighbor_idxs[k] = temp_idxs[k] + xyz_batch_start_idx;
}
}
int query_stacked_local_neighbor_idxs_kernel_launcher_stack(
const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt,
int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs,
float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// stack_neighbor_idxs: (max_length_of_neighbor_idxs)
// start_len: (M1 + M2, 2) [start_offset, neighbor_length]
// cumsum: (1), max offset of current data in stack_neighbor_idxs
// max_neighbour_distance: float
// nsample: find all (-1), find limited number(>0)
// neighbor_type: 1: ball, others: cube
cudaError_t err;
dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
query_stacked_local_neighbor_idxs_kernel<<<blocks, threads>>>(
support_xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt,
stack_neighbor_idxs, start_len, cumsum, avg_length_of_neighbor_idxs,
max_neighbour_distance, batch_size, M, nsample, neighbor_type
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
return 0;
}
__global__ void vector_pool_kernel_stack(
const float *support_xyz, const float *support_features, const int *xyz_batch_cnt,
const float *new_xyz, float *new_features, float *new_local_xyz, const int *new_xyz_batch_cnt,
int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance,
int batch_size, int M, int num_c_in, int num_c_out,
int num_c_each_grid, int num_total_grids, int *point_cnt_of_grid, int *grouped_idxs,
int use_xyz, float grid_size_x, float grid_size_y,
float grid_size_z, int *cum_sum, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_features: (M1 + M2 ..., C), C = num_total_grids * num_c_each_grid
// new_local_xyz: (M1 + M2 ..., 3 * num_total_grids)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// use_xyz: whether to calculate new_local_xyz
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
int pt_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (pt_idx >= M) return;
int bs_idx = 0, pt_cnt = new_xyz_batch_cnt[0];
for (int k = 1; k < batch_size; k++){
if (pt_idx < pt_cnt) break;
pt_cnt += new_xyz_batch_cnt[k];
bs_idx = k;
}
int xyz_batch_start_idx = 0;
for (int k = 0; k < bs_idx; k++) xyz_batch_start_idx += xyz_batch_cnt[k];
support_xyz += xyz_batch_start_idx * 3;
support_features += xyz_batch_start_idx * num_c_in;
new_xyz += pt_idx * 3;
new_features += pt_idx * num_c_out;
point_cnt_of_grid += pt_idx * num_total_grids;
new_local_xyz += pt_idx * 3 * num_total_grids;
float new_x = new_xyz[0];
float new_y = new_xyz[1];
float new_z = new_xyz[2];
int n = xyz_batch_cnt[bs_idx], grid_idx_x, grid_idx_y, grid_idx_z, grid_idx;
float local_x, local_y, local_z;
float radius2 = max_neighbour_distance * max_neighbour_distance;
int sample_cnt = 0;
for (int k = 0; k < n; ++k) {
local_x = support_xyz[k * 3 + 0] - new_x;
local_y = support_xyz[k * 3 + 1] - new_y;
local_z = support_xyz[k * 3 + 2] - new_z;
if (neighbor_type == 1){
// ball
if (local_x * local_x + local_y * local_y + local_z * local_z > radius2){
continue;
}
}
else{
// voxel
if ((fabs(local_x) > max_neighbour_distance) |
(fabs(local_y) > max_neighbour_distance) |
(fabs(local_z) > max_neighbour_distance)){
continue;
}
}
grid_idx_x = floorf((local_x + max_neighbour_distance) / grid_size_x);
grid_idx_y = floorf((local_y + max_neighbour_distance) / grid_size_y);
grid_idx_z = floorf((local_z + max_neighbour_distance) / grid_size_z);
grid_idx = grid_idx_x * num_grid_y * num_grid_z + grid_idx_y * num_grid_z + grid_idx_z;
grid_idx = min(max(grid_idx, 0), num_total_grids - 1);
if (pooling_type == 0){
// avg pooling
point_cnt_of_grid[grid_idx] ++;
for (int i = 0; i < num_c_in; i++){
new_features[grid_idx * num_c_each_grid + i % num_c_each_grid] += support_features[k * num_c_in + i];
}
if (use_xyz){
new_local_xyz[grid_idx * 3 + 0] += local_x;
new_local_xyz[grid_idx * 3 + 1] += local_y;
new_local_xyz[grid_idx * 3 + 2] += local_z;
}
int cnt = atomicAdd(cum_sum, 1);
if (cnt >= num_max_sum_points) continue; // continue to statistics the max number of points
grouped_idxs[cnt * 3 + 0] = xyz_batch_start_idx + k;
grouped_idxs[cnt * 3 + 1] = pt_idx;
grouped_idxs[cnt * 3 + 2] = grid_idx;
sample_cnt++;
if(nsample > 0 && sample_cnt >= nsample) break;
}
else if (pooling_type == 1){
// random choose one within sub-voxel
// printf("new_xyz=(%.2f, %.2f, %.2f, ), find neighbor k=%d: support_xyz=(%.2f, %.2f, %.2f), local_xyz=(%.2f, %.2f, %.2f), neighbor=%.2f, grid_idx=%d, point_cnt_of_grid_idx=%d\n",
// new_x, new_y, new_z, k, support_xyz[k * 3 + 0], support_xyz[k * 3 + 1], support_xyz[k * 3 + 2], local_x, local_y, local_z, max_neighbour_distance, grid_idx, point_cnt_of_grid[grid_idx]);
if (point_cnt_of_grid[grid_idx] == 0){
point_cnt_of_grid[grid_idx] ++;
for (int i = 0; i < num_c_in; i++){
new_features[grid_idx * num_c_each_grid + i % num_c_each_grid] = support_features[k * num_c_in + i];
}
if (use_xyz){
new_local_xyz[grid_idx * 3 + 0] = local_x;
new_local_xyz[grid_idx * 3 + 1] = local_y;
new_local_xyz[grid_idx * 3 + 2] = local_z;
}
int cnt = atomicAdd(cum_sum, 1);
if (cnt >= num_max_sum_points) continue; // continue to statistics the max number of points
grouped_idxs[cnt * 3 + 0] = xyz_batch_start_idx + k;
grouped_idxs[cnt * 3 + 1] = pt_idx;
grouped_idxs[cnt * 3 + 2] = grid_idx;
sample_cnt++;
if(nsample > 0 && sample_cnt >= nsample || sample_cnt >= num_total_grids) break;
}
}
}
}
int vector_pool_kernel_launcher_stack(
const float *support_xyz, const float *support_features, const int *xyz_batch_cnt,
const float *new_xyz, float *new_features, float *new_local_xyz, const int *new_xyz_batch_cnt,
int *point_cnt_of_grid, int *grouped_idxs,
int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance,
int batch_size, int N, int M, int num_c_in, int num_c_out, int num_total_grids,
int use_xyz, int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){
// support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
// support_features: (N1 + N2 ..., C)
// xyz_batch_cnt: (batch_size), [N1, N2, ...]
// new_xyz: (M1 + M2 ..., 3) centers of the ball query
// new_features: (M1 + M2 ..., C)
// new_local_xyz: (M1 + M2 ..., 3)
// new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
// num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
// use_xyz: whether to calculate new_local_xyz
// grouped_idxs: (num_max_sum_points, 3)[idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// neighbor_type: 1: ball, others: cube
// pooling_type: 0: avg_pool, 1: random choice
cudaError_t err;
int num_c_each_grid = num_c_out / num_total_grids;
float grid_size_x = max_neighbour_distance * 2 / num_grid_x;
float grid_size_y = max_neighbour_distance * 2 / num_grid_y;
float grid_size_z = max_neighbour_distance * 2 / num_grid_z;
dim3 blocks(DIVUP(M, THREADS_PER_BLOCK)); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
int cum_sum = 0;
int *p_cum_sum;
cudaMalloc((void**)&p_cum_sum, sizeof(int));
cudaMemcpy(p_cum_sum, &cum_sum, sizeof(int), cudaMemcpyHostToDevice);
vector_pool_kernel_stack<<<blocks, threads>>>(
support_xyz, support_features, xyz_batch_cnt,
new_xyz, new_features, new_local_xyz, new_xyz_batch_cnt,
num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance,
batch_size, M, num_c_in, num_c_out,
num_c_each_grid, num_total_grids, point_cnt_of_grid, grouped_idxs,
use_xyz, grid_size_x, grid_size_y, grid_size_z, p_cum_sum, num_max_sum_points,
nsample, neighbor_type, pooling_type
);
cudaMemcpy(&cum_sum, p_cum_sum, sizeof(int), cudaMemcpyDeviceToHost);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
return cum_sum;
}
__global__ void vector_pool_grad_kernel_stack(const float *grad_new_features,
const int *point_cnt_of_grid, const int *grouped_idxs,
float *grad_support_features, int N, int M, int num_c_out, int num_c_in,
int num_c_each_grid, int num_total_grids, int num_max_sum_points){
// grad_new_features: (M1 + M2 ..., C_out)
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features: (N1 + N2 ..., C_in)
int channel_idx = blockIdx.y;
int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index >= num_max_sum_points || channel_idx >= num_c_in) return;
int idx_of_support_xyz = grouped_idxs[index * 3 + 0];
int idx_of_new_xyz = grouped_idxs[index * 3 + 1];
int idx_of_grid_idx = grouped_idxs[index * 3 + 2];
int num_total_pts = point_cnt_of_grid[idx_of_new_xyz * num_total_grids + idx_of_grid_idx];
grad_support_features += idx_of_support_xyz * num_c_in + channel_idx;
grad_new_features += idx_of_new_xyz * num_c_out + idx_of_grid_idx * num_c_each_grid;
int channel_idx_of_cin = channel_idx % num_c_each_grid;
float cur_grad = 1 / fmaxf(float(num_total_pts), 1.0);
atomicAdd(grad_support_features, grad_new_features[channel_idx_of_cin] * cur_grad);
}
void vector_pool_grad_kernel_launcher_stack(
const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs,
float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_total_grids,
int num_max_sum_points){
// grad_new_features: (M1 + M2 ..., C_out)
// point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
// grouped_idxs: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
// grad_support_features: (N1 + N2 ..., C_in)
int num_c_each_grid = num_c_out / num_total_grids;
cudaError_t err;
dim3 blocks(DIVUP(num_max_sum_points, THREADS_PER_BLOCK), num_c_in); // blockIdx.x(col), blockIdx.y(row)
dim3 threads(THREADS_PER_BLOCK);
vector_pool_grad_kernel_stack<<<blocks, threads>>>(
grad_new_features, point_cnt_of_grid, grouped_idxs, grad_support_features,
N, M, num_c_out, num_c_in, num_c_each_grid, num_total_grids, num_max_sum_points
);
// cudaDeviceSynchronize(); // for using printf in kernel function
err = cudaGetLastError();
if (cudaSuccess != err) {
fprintf(stderr, "CUDA kernel failed : %s\n", cudaGetErrorString(err));
exit(-1);
}
}
\ No newline at end of file
/*
Vector-pool aggregation based local feature aggregation for point cloud.
PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
https://arxiv.org/abs/2102.00463
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#ifndef _STACK_VECTOR_POOL_GPU_H
#define _STACK_VECTOR_POOL_GPU_H
#include <torch/serialize/tensor.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
int query_stacked_local_neighbor_idxs_kernel_launcher_stack(
const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt,
int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs,
float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type);
int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor,
int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type);
int query_three_nn_by_stacked_local_idxs_kernel_launcher_stack(
const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers,
int *new_xyz_grid_idxs, float *new_xyz_grid_dist2,
const int *stack_neighbor_idxs, const int *start_len,
int M, int num_total_grids);
int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor,
at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor,
at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor,
at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor,
int M, int num_total_grids);
int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
at::Tensor new_features_tensor, at::Tensor new_local_xyz,
at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz,
int num_max_sum_points, int nsample, int neighbor_type, int pooling_type);
int vector_pool_kernel_launcher_stack(
const float *support_xyz, const float *support_features, const int *xyz_batch_cnt,
const float *new_xyz, float *new_features, float * new_local_xyz, const int *new_xyz_batch_cnt,
int *point_cnt_of_grid, int *grouped_idxs,
int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance,
int batch_size, int N, int M, int num_c_in, int num_c_out, int num_total_grids, int use_xyz,
int num_max_sum_points, int nsample, int neighbor_type, int pooling_type);
int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor,
at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
at::Tensor grad_support_features_tensor);
void vector_pool_grad_kernel_launcher_stack(
const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs,
float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_total_grids,
int num_max_sum_points);
#endif
...@@ -28,7 +28,7 @@ def write_version_to_file(version, target_file): ...@@ -28,7 +28,7 @@ def write_version_to_file(version, target_file):
if __name__ == '__main__': if __name__ == '__main__':
version = '0.5.1+%s' % get_git_commit_number() version = '0.5.2+%s' % get_git_commit_number()
write_version_to_file(version, 'pcdet/version.py') write_version_to_file(version, 'pcdet/version.py')
setup( setup(
...@@ -97,6 +97,8 @@ if __name__ == '__main__': ...@@ -97,6 +97,8 @@ if __name__ == '__main__':
'src/interpolate_gpu.cu', 'src/interpolate_gpu.cu',
'src/voxel_query.cpp', 'src/voxel_query.cpp',
'src/voxel_query_gpu.cu', 'src/voxel_query_gpu.cu',
'src/vector_pool.cpp',
'src/vector_pool_gpu.cu'
], ],
), ),
make_cuda_ext( make_cuda_ext(
......
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
MODEL:
NAME: PVRCNNPlusPlus
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
[ 'Vehicle', 'Pedestrian', 'Cyclist' ]
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2
SEPARATE_HEAD_CFG:
HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
HEAD_DICT: {
'center': { 'out_channels': 2, 'num_conv': 2 },
'center_z': { 'out_channels': 1, 'num_conv': 2 },
'dim': { 'out_channels': 3, 'num_conv': 2 },
'rot': { 'out_channels': 2, 'num_conv': 2 },
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
}
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
PFE:
NAME: VoxelSetAbstraction
POINT_SOURCE: raw_points
NUM_KEYPOINTS: 4096
NUM_OUTPUT_FEATURES: 90
SAMPLE_METHOD: SPC
SPC_SAMPLING:
NUM_SECTORS: 6
SAMPLE_RADIUS_WITH_ROI: 1.6
FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
SA_LAYER:
raw_points:
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 2
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 32 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 2, 2, 2 ]
MAX_NEIGHBOR_DISTANCE: 0.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
x_conv3:
DOWNSAMPLE_FACTOR: 4
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [128]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [3, 3, 3]
MAX_NEIGHBOR_DISTANCE: 1.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [64, 64]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
x_conv4:
DOWNSAMPLE_FACTOR: 8
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 4.8
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
POINT_HEAD:
NAME: PointHeadSimple
CLS_FC: [256, 256]
CLASS_AGNOSTIC: True
USE_POINT_FEATURES_BEFORE_FUSION: True
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
LOSS_CONFIG:
LOSS_REG: smooth-l1
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
}
ROI_HEAD:
NAME: PVRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 1024
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
SCORE_THRESH: 0.1
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 500
# NMS_THRESH: 0.85
ROI_GRID_POOL:
GRID_SIZE: 6
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: voxel_random_choice
NUM_REDUCED_CHANNELS: 30
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.8
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 1.6
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: waymo
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 30
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.001
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
MODEL:
NAME: PVRCNNPlusPlus
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
[ 'Vehicle', 'Pedestrian', 'Cyclist' ]
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2
SEPARATE_HEAD_CFG:
HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
HEAD_DICT: {
'center': { 'out_channels': 2, 'num_conv': 2 },
'center_z': { 'out_channels': 1, 'num_conv': 2 },
'dim': { 'out_channels': 3, 'num_conv': 2 },
'rot': { 'out_channels': 2, 'num_conv': 2 },
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
}
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
PFE:
NAME: VoxelSetAbstraction
POINT_SOURCE: raw_points
NUM_KEYPOINTS: 4096
NUM_OUTPUT_FEATURES: 90
SAMPLE_METHOD: SPC
SPC_SAMPLING:
NUM_SECTORS: 6
SAMPLE_RADIUS_WITH_ROI: 1.6
FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
SA_LAYER:
raw_points:
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 2
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 32 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 2, 2, 2 ]
MAX_NEIGHBOR_DISTANCE: 0.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 32, 32 ]
x_conv3:
DOWNSAMPLE_FACTOR: 4
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [128]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [3, 3, 3]
MAX_NEIGHBOR_DISTANCE: 1.2
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [64, 64]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
x_conv4:
DOWNSAMPLE_FACTOR: 8
INPUT_CHANNELS: 64
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: local_interpolation
NUM_REDUCED_CHANNELS: 32
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
FILTER_NEIGHBOR_WITH_ROI: True
RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 2.4
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 4.8
NEIGHBOR_NSAMPLE: -1
POST_MLPS: [ 64, 64 ]
POINT_HEAD:
NAME: PointHeadSimple
CLS_FC: [256, 256]
CLASS_AGNOSTIC: True
USE_POINT_FEATURES_BEFORE_FUSION: True
TARGET_CONFIG:
GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
LOSS_CONFIG:
LOSS_REG: smooth-l1
LOSS_WEIGHTS: {
'point_cls_weight': 1.0,
}
ROI_HEAD:
NAME: PVRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 1024
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
SCORE_THRESH: 0.1
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 500
# NMS_THRESH: 0.85
ROI_GRID_POOL:
GRID_SIZE: 6
NAME: VectorPoolAggregationModuleMSG
NUM_GROUPS: 2
LOCAL_AGGREGATION_TYPE: voxel_random_choice
NUM_REDUCED_CHANNELS: 30
NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
MSG_POST_MLPS: [ 128 ]
GROUP_CFG_0:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 0.8
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
GROUP_CFG_1:
NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
MAX_NEIGHBOR_DISTANCE: 1.6
NEIGHBOR_NSAMPLE: 32
POST_MLPS: [ 64, 64 ]
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: waymo
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 2
NUM_EPOCHS: 30
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.001
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment