Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2

Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2

Release the codes of PV-RCNN++, update OpenPCDet to v0.5.2
a991105c · Shaoshuai Shi · GitHub · 1483517a · b6fbf07f · a991105c
Unverified Commit a991105c authored Jan 05, 2022 by Shaoshuai Shi Committed by GitHub Jan 05, 2022
6 changed files
--- a/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp
+++ b/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool.cpp
+/*
+Vector-pool aggregation based local feature aggregation for point cloud.
+PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
+https://arxiv.org/abs/2102.00463
+
+Written by Shaoshuai Shi
+All Rights Reserved 2020.
+*/
+
+
+#include <torch/serialize/tensor.h>
+#include <vector>
+#include <THC/THC.h>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+#include "vector_pool_gpu.h"
+
+extern THCState *state;
+
+#define CHECK_CUDA(x) do { \
+  if (!x.type().is_cuda()) { \
+    fprintf(stderr, "%s must be CUDA tensor at %s:%d\n", #x, __FILE__, __LINE__); \
+    exit(-1); \
+  } \
+} while (0)
+#define CHECK_CONTIGUOUS(x) do { \
+  if (!x.is_contiguous()) { \
+    fprintf(stderr, "%s must be contiguous tensor at %s:%d\n", #x, __FILE__, __LINE__); \
+    exit(-1); \
+  } \
+} while (0)
+#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
+
+
+int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
+    at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
+    at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor,
+    int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type){
+    // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
+    // xyz_batch_cnt: (batch_size), [N1, N2, ...]
+    // new_xyz: (M1 + M2 ..., 3) centers of the ball query
+    // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
+    // new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
+    // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
+    // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
+    // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
+    // nsample: find all (-1), find limited number(>0)
+    // neighbor_type: 1: ball, others: cube
+
+    CHECK_INPUT(support_xyz_tensor);
+    CHECK_INPUT(xyz_batch_cnt_tensor);
+    CHECK_INPUT(new_xyz_tensor);
+    CHECK_INPUT(new_xyz_batch_cnt_tensor);
+    CHECK_INPUT(stack_neighbor_idxs_tensor);
+    CHECK_INPUT(start_len_tensor);
+    CHECK_INPUT(cumsum_tensor);
+
+    const float *support_xyz = support_xyz_tensor.data<float>();
+    const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data<int>();
+    const float *new_xyz = new_xyz_tensor.data<float>();
+    const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data<int>();
+    int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data<int>();
+    int *start_len = start_len_tensor.data<int>();
+    int *cumsum = cumsum_tensor.data<int>();
+
+    int batch_size = xyz_batch_cnt_tensor.size(0);
+    int M = new_xyz_tensor.size(0);
+
+    query_stacked_local_neighbor_idxs_kernel_launcher_stack(
+        support_xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt,
+        stack_neighbor_idxs, start_len, cumsum, avg_length_of_neighbor_idxs,
+        max_neighbour_distance, batch_size, M, nsample, neighbor_type
+    );
+    return 0;
+}
+
+
+int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor,
+    at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor,
+    at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor,
+    at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor,
+    int M, int num_total_grids){
+    // support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
+    // new_xyz: (M1 + M2 ..., 3) centers of the ball query
+    // new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
+    // new_xyz_grid_idxs: (M1 + M2 ..., num_total_grids, 3) three-nn
+    // new_xyz_grid_dist2: (M1 + M2 ..., num_total_grids, 3) square of dist of three-nn
+    // stack_neighbor_idxs: (max_length_of_neighbor_idxs)
+    // start_len: (M1 + M2, 2)  [start_offset, neighbor_length]
+
+    CHECK_INPUT(support_xyz_tensor);
+    CHECK_INPUT(new_xyz_tensor);
+    CHECK_INPUT(new_xyz_grid_centers_tensor);
+    CHECK_INPUT(new_xyz_grid_idxs_tensor);
+    CHECK_INPUT(new_xyz_grid_dist2_tensor);
+    CHECK_INPUT(stack_neighbor_idxs_tensor);
+    CHECK_INPUT(start_len_tensor);
+
+    const float *support_xyz = support_xyz_tensor.data<float>();
+    const float *new_xyz = new_xyz_tensor.data<float>();
+    const float *new_xyz_grid_centers = new_xyz_grid_centers_tensor.data<float>();
+    int *new_xyz_grid_idxs = new_xyz_grid_idxs_tensor.data<int>();
+    float *new_xyz_grid_dist2 = new_xyz_grid_dist2_tensor.data<float>();
+    int *stack_neighbor_idxs = stack_neighbor_idxs_tensor.data<int>();
+    int *start_len = start_len_tensor.data<int>();
+
+    query_three_nn_by_stacked_local_idxs_kernel_launcher_stack(
+        support_xyz, new_xyz, new_xyz_grid_centers,
+        new_xyz_grid_idxs, new_xyz_grid_dist2, stack_neighbor_idxs, start_len,
+        M, num_total_grids
+    );
+    return 0;
+}
+
+
+int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
+    at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
+    at::Tensor new_features_tensor, at::Tensor new_local_xyz_tensor,
+    at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
+    int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz,
+    int num_max_sum_points, int nsample, int neighbor_type, int pooling_type){
+    // support_xyz_tensor: (N1 + N2 ..., 3) xyz coordinates of the features
+    // support_features_tensor: (N1 + N2 ..., C)
+    // xyz_batch_cnt: (batch_size), [N1, N2, ...]
+    // new_xyz_tensor: (M1 + M2 ..., 3) centers of new positions
+    // new_features_tensor: (M1 + M2 ..., C)
+    // new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
+    // point_cnt_of_grid: (M1 + M2 ..., num_total_grids)
+    // grouped_idxs_tensor: (num_max_sum_points, 3)
+    // num_grid_x, num_grid_y, num_grid_z: number of grids in each local area centered at new_xyz
+    // use_xyz: whether to calculate new_local_xyz
+    // neighbor_type: 1: ball, others: cube
+    // pooling_type: 0: avg_pool, 1: random choice
+
+    CHECK_INPUT(support_xyz_tensor);
+    CHECK_INPUT(support_features_tensor);
+    CHECK_INPUT(xyz_batch_cnt_tensor);
+    CHECK_INPUT(new_xyz_tensor);
+    CHECK_INPUT(new_xyz_batch_cnt_tensor);
+    CHECK_INPUT(new_features_tensor);
+    CHECK_INPUT(new_local_xyz_tensor);
+    CHECK_INPUT(point_cnt_of_grid_tensor);
+    CHECK_INPUT(grouped_idxs_tensor);
+
+    const float *support_xyz = support_xyz_tensor.data<float>();
+    const float *support_features = support_features_tensor.data<float>();
+    const int *xyz_batch_cnt = xyz_batch_cnt_tensor.data<int>();
+    const float *new_xyz = new_xyz_tensor.data<float>();
+    const int *new_xyz_batch_cnt = new_xyz_batch_cnt_tensor.data<int>();
+    float *new_features = new_features_tensor.data<float>();
+    float *new_local_xyz = new_local_xyz_tensor.data<float>();
+    int *point_cnt_of_grid = point_cnt_of_grid_tensor.data<int>();
+    int *grouped_idxs = grouped_idxs_tensor.data<int>();
+
+    int N = support_xyz_tensor.size(0);
+    int batch_size = xyz_batch_cnt_tensor.size(0);
+    int M = new_xyz_tensor.size(0);
+    int num_c_out = new_features_tensor.size(1);
+    int num_c_in = support_features_tensor.size(1);
+    int num_total_grids = point_cnt_of_grid_tensor.size(1);
+
+    int cum_sum = vector_pool_kernel_launcher_stack(
+        support_xyz, support_features, xyz_batch_cnt,
+        new_xyz, new_features, new_local_xyz, new_xyz_batch_cnt,
+        point_cnt_of_grid, grouped_idxs,
+        num_grid_x, num_grid_y, num_grid_z, max_neighbour_distance,
+        batch_size, N, M, num_c_in, num_c_out, num_total_grids, use_xyz, num_max_sum_points, nsample, neighbor_type, pooling_type
+    );
+    return cum_sum;
+}
+
+
+int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor,
+    at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
+    at::Tensor grad_support_features_tensor) {
+    // grad_new_features_tensor: (M1 + M2 ..., C_out)
+    // point_cnt_of_grid_tensor: (M1 + M2 ..., num_total_grids)
+    // grouped_idxs_tensor: (num_max_sum_points, 3) [idx of support_xyz, idx of new_xyz, idx of grid_idx in new_xyz]
+    // grad_support_features_tensor: (N1 + N2 ..., C_in)
+
+    CHECK_INPUT(grad_new_features_tensor);
+    CHECK_INPUT(point_cnt_of_grid_tensor);
+    CHECK_INPUT(grouped_idxs_tensor);
+    CHECK_INPUT(grad_support_features_tensor);
+
+    int M = grad_new_features_tensor.size(0);
+    int num_c_out = grad_new_features_tensor.size(1);
+    int N = grad_support_features_tensor.size(0);
+    int num_c_in = grad_support_features_tensor.size(1);
+    int num_total_grids = point_cnt_of_grid_tensor.size(1);
+    int num_max_sum_points = grouped_idxs_tensor.size(0);
+
+    const float *grad_new_features = grad_new_features_tensor.data<float>();
+    const int *point_cnt_of_grid = point_cnt_of_grid_tensor.data<int>();
+    const int *grouped_idxs = grouped_idxs_tensor.data<int>();
+    float *grad_support_features = grad_support_features_tensor.data<float>();
+
+    vector_pool_grad_kernel_launcher_stack(
+        grad_new_features, point_cnt_of_grid, grouped_idxs, grad_support_features,
+        N, M, num_c_out, num_c_in, num_total_grids, num_max_sum_points
+    );
+    return 1;
+}
--- a/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu
+++ b/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.cu
--- a/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h
+++ b/pcdet/ops/pointnet2/pointnet2_stack/src/vector_pool_gpu.h
+/*
+Vector-pool aggregation based local feature aggregation for point cloud.
+PV-RCNN++: Point-Voxel Feature Set Abstraction With Local Vector Representation for 3D Object Detection
+https://arxiv.org/abs/2102.00463
+
+Written by Shaoshuai Shi
+All Rights Reserved 2020.
+*/
+
+
+#ifndef _STACK_VECTOR_POOL_GPU_H
+#define _STACK_VECTOR_POOL_GPU_H
+
+#include <torch/serialize/tensor.h>
+#include <vector>
+#include <cuda.h>
+#include <cuda_runtime_api.h>
+
+
+int query_stacked_local_neighbor_idxs_kernel_launcher_stack(
+    const float *support_xyz, const int *xyz_batch_cnt, const float *new_xyz, const int *new_xyz_batch_cnt,
+    int *stack_neighbor_idxs, int *start_len, int *cumsum, int avg_length_of_neighbor_idxs,
+    float max_neighbour_distance, int batch_size, int M, int nsample, int neighbor_type);
+
+int query_stacked_local_neighbor_idxs_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
+    at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
+    at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor, at::Tensor cumsum_tensor,
+    int avg_length_of_neighbor_idxs, float max_neighbour_distance, int nsample, int neighbor_type);
+
+
+int query_three_nn_by_stacked_local_idxs_kernel_launcher_stack(
+    const float *support_xyz, const float *new_xyz, const float *new_xyz_grid_centers,
+    int *new_xyz_grid_idxs, float *new_xyz_grid_dist2,
+    const int *stack_neighbor_idxs, const int *start_len,
+    int M, int num_total_grids);
+
+int query_three_nn_by_stacked_local_idxs_wrapper_stack(at::Tensor support_xyz_tensor,
+    at::Tensor new_xyz_tensor, at::Tensor new_xyz_grid_centers_tensor,
+    at::Tensor new_xyz_grid_idxs_tensor, at::Tensor new_xyz_grid_dist2_tensor,
+    at::Tensor stack_neighbor_idxs_tensor, at::Tensor start_len_tensor,
+    int M, int num_total_grids);
+
+
+int vector_pool_wrapper_stack(at::Tensor support_xyz_tensor, at::Tensor xyz_batch_cnt_tensor,
+    at::Tensor support_features_tensor, at::Tensor new_xyz_tensor, at::Tensor new_xyz_batch_cnt_tensor,
+    at::Tensor new_features_tensor, at::Tensor new_local_xyz,
+    at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
+    int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance, int use_xyz,
+    int num_max_sum_points, int nsample, int neighbor_type, int pooling_type);
+
+
+int vector_pool_kernel_launcher_stack(
+    const float *support_xyz, const float *support_features, const int *xyz_batch_cnt,
+    const float *new_xyz, float *new_features, float * new_local_xyz, const int *new_xyz_batch_cnt,
+    int *point_cnt_of_grid, int *grouped_idxs,
+    int num_grid_x, int num_grid_y, int num_grid_z, float max_neighbour_distance,
+    int batch_size, int N, int M, int num_c_in, int num_c_out, int num_total_grids, int use_xyz,
+    int num_max_sum_points, int nsample, int neighbor_type, int pooling_type);
+
+
+int vector_pool_grad_wrapper_stack(at::Tensor grad_new_features_tensor,
+    at::Tensor point_cnt_of_grid_tensor, at::Tensor grouped_idxs_tensor,
+    at::Tensor grad_support_features_tensor);
+
+
+void vector_pool_grad_kernel_launcher_stack(
+    const float *grad_new_features, const int *point_cnt_of_grid, const int *grouped_idxs,
+    float *grad_support_features, int N, int M, int num_c_out, int num_c_in, int num_total_grids,
+    int num_max_sum_points);
+
+#endif
--- a/setup.py
+++ b/setup.py
@@ -28,7 +28,7 @@ def write_version_to_file(version, target_file):


 if __name__ == '__main__':
-    version = '0.5.1+%s' % get_git_commit_number()
+    version = '0.5.2+%s' % get_git_commit_number()
    write_version_to_file(version, 'pcdet/version.py')

    setup(
@@ -97,6 +97,8 @@ if __name__ == '__main__':
                    'src/interpolate_gpu.cu',
                    'src/voxel_query.cpp', 
                    'src/voxel_query_gpu.cu',
+                    'src/vector_pool.cpp',
+                    'src/vector_pool_gpu.cu'
                ],
            ),
            make_cuda_ext(

--- a/tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml
+++ b/tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml
+CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
+
+DATA_CONFIG:
+    _BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
+
+
+MODEL:
+    NAME: PVRCNNPlusPlus
+
+    VFE:
+        NAME: MeanVFE
+
+    BACKBONE_3D:
+        NAME: VoxelBackBone8x
+
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+
+    DENSE_HEAD:
+        NAME: CenterHead
+        CLASS_AGNOSTIC: False
+
+        CLASS_NAMES_EACH_HEAD: [
+            [ 'Vehicle', 'Pedestrian', 'Cyclist' ]
+        ]
+
+        SHARED_CONV_CHANNEL: 64
+        USE_BIAS_BEFORE_NORM: True
+        NUM_HM_CONV: 2
+        SEPARATE_HEAD_CFG:
+            HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
+            HEAD_DICT: {
+                'center': { 'out_channels': 2, 'num_conv': 2 },
+                'center_z': { 'out_channels': 1, 'num_conv': 2 },
+                'dim': { 'out_channels': 3, 'num_conv': 2 },
+                'rot': { 'out_channels': 2, 'num_conv': 2 },
+            }
+
+        TARGET_ASSIGNER_CONFIG:
+            FEATURE_MAP_STRIDE: 8
+            NUM_MAX_OBJS: 500
+            GAUSSIAN_OVERLAP: 0.1
+            MIN_RADIUS: 2
+
+        LOSS_CONFIG:
+            LOSS_WEIGHTS: {
+                'cls_weight': 1.0,
+                'loc_weight': 2.0,
+                'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+            }
+
+        POST_PROCESSING:
+            SCORE_THRESH: 0.1
+            POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
+            MAX_OBJ_PER_SAMPLE: 500
+            NMS_CONFIG:
+                NMS_TYPE: nms_gpu
+                NMS_THRESH: 0.7
+                NMS_PRE_MAXSIZE: 4096
+                NMS_POST_MAXSIZE: 500
+
+    PFE:
+        NAME: VoxelSetAbstraction
+        POINT_SOURCE: raw_points
+        NUM_KEYPOINTS: 4096
+        NUM_OUTPUT_FEATURES: 90
+        SAMPLE_METHOD: SPC
+        SPC_SAMPLING:
+            NUM_SECTORS: 6
+            SAMPLE_RADIUS_WITH_ROI: 1.6
+
+        FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
+        SA_LAYER:
+            raw_points:
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 2
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [ 32 ]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [ 2, 2, 2 ]
+                    MAX_NEIGHBOR_DISTANCE: 0.2
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 32, 32 ]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 0.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 32, 32 ]
+
+            x_conv3:
+                DOWNSAMPLE_FACTOR: 4
+                INPUT_CHANNELS: 64
+
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 32
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [128]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [3, 3, 3]
+                    MAX_NEIGHBOR_DISTANCE: 1.2
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [64, 64]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 2.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+
+            x_conv4:
+                DOWNSAMPLE_FACTOR: 8
+                INPUT_CHANNELS: 64
+
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 32
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [ 128 ]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 2.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 4.8
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+
+
+    POINT_HEAD:
+        NAME: PointHeadSimple
+        CLS_FC: [256, 256]
+        CLASS_AGNOSTIC: True
+        USE_POINT_FEATURES_BEFORE_FUSION: True
+        TARGET_CONFIG:
+            GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
+        LOSS_CONFIG:
+            LOSS_REG: smooth-l1
+            LOSS_WEIGHTS: {
+                'point_cls_weight': 1.0,
+            }
+
+    ROI_HEAD:
+        NAME: PVRCNNHead
+        CLASS_AGNOSTIC: True
+
+        SHARED_FC: [256, 256]
+        CLS_FC: [256, 256]
+        REG_FC: [256, 256]
+        DP_RATIO: 0.3
+
+        NMS_CONFIG:
+            TRAIN:
+                NMS_TYPE: nms_gpu
+                MULTI_CLASSES_NMS: False
+                NMS_PRE_MAXSIZE: 9000
+                NMS_POST_MAXSIZE: 512
+                NMS_THRESH: 0.8
+            TEST:
+                NMS_TYPE: nms_gpu
+                MULTI_CLASSES_NMS: False
+                NMS_PRE_MAXSIZE: 1024
+                NMS_POST_MAXSIZE: 100
+                NMS_THRESH: 0.7
+                SCORE_THRESH: 0.1
+
+#                NMS_PRE_MAXSIZE: 4096
+#                NMS_POST_MAXSIZE: 500
+#                NMS_THRESH: 0.85
+
+
+        ROI_GRID_POOL:
+            GRID_SIZE: 6
+
+            NAME: VectorPoolAggregationModuleMSG
+            NUM_GROUPS: 2
+            LOCAL_AGGREGATION_TYPE: voxel_random_choice
+            NUM_REDUCED_CHANNELS: 30
+            NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+            MSG_POST_MLPS: [ 128 ]
+
+            GROUP_CFG_0:
+                NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                MAX_NEIGHBOR_DISTANCE: 0.8
+                NEIGHBOR_NSAMPLE: 32
+                POST_MLPS: [ 64, 64 ]
+            GROUP_CFG_1:
+                NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                MAX_NEIGHBOR_DISTANCE: 1.6
+                NEIGHBOR_NSAMPLE: 32
+                POST_MLPS: [ 64, 64 ]
+
+        TARGET_CONFIG:
+            BOX_CODER: ResidualCoder
+            ROI_PER_IMAGE: 128
+            FG_RATIO: 0.5
+
+            SAMPLE_ROI_BY_EACH_CLASS: True
+            CLS_SCORE_TYPE: roi_iou
+
+            CLS_FG_THRESH: 0.75
+            CLS_BG_THRESH: 0.25
+            CLS_BG_THRESH_LO: 0.1
+            HARD_BG_RATIO: 0.8
+
+            REG_FG_THRESH: 0.55
+
+        LOSS_CONFIG:
+            CLS_LOSS: BinaryCrossEntropy
+            REG_LOSS: smooth-l1
+            CORNER_LOSS_REGULARIZATION: True
+            LOSS_WEIGHTS: {
+                'rcnn_cls_weight': 1.0,
+                'rcnn_reg_weight': 1.0,
+                'rcnn_corner_weight': 1.0,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+
+        EVAL_METRIC: waymo
+
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.7
+            NMS_PRE_MAXSIZE: 4096
+            NMS_POST_MAXSIZE: 500
+
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 2
+    NUM_EPOCHS: 30
+
+    OPTIMIZER: adam_onecycle
+    LR: 0.01
+    WEIGHT_DECAY: 0.001
+    MOMENTUM: 0.9
+
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+
+    GRAD_NORM_CLIP: 10
\ No newline at end of file
--- a/tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml
+++ b/tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml
+CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
+
+DATA_CONFIG:
+    _BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
+
+
+MODEL:
+    NAME: PVRCNNPlusPlus
+
+    VFE:
+        NAME: MeanVFE
+
+    BACKBONE_3D:
+        NAME: VoxelResBackBone8x
+
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+
+    DENSE_HEAD:
+        NAME: CenterHead
+        CLASS_AGNOSTIC: False
+
+        CLASS_NAMES_EACH_HEAD: [
+            [ 'Vehicle', 'Pedestrian', 'Cyclist' ]
+        ]
+
+        SHARED_CONV_CHANNEL: 64
+        USE_BIAS_BEFORE_NORM: True
+        NUM_HM_CONV: 2
+        SEPARATE_HEAD_CFG:
+            HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
+            HEAD_DICT: {
+                'center': { 'out_channels': 2, 'num_conv': 2 },
+                'center_z': { 'out_channels': 1, 'num_conv': 2 },
+                'dim': { 'out_channels': 3, 'num_conv': 2 },
+                'rot': { 'out_channels': 2, 'num_conv': 2 },
+            }
+
+        TARGET_ASSIGNER_CONFIG:
+            FEATURE_MAP_STRIDE: 8
+            NUM_MAX_OBJS: 500
+            GAUSSIAN_OVERLAP: 0.1
+            MIN_RADIUS: 2
+
+        LOSS_CONFIG:
+            LOSS_WEIGHTS: {
+                'cls_weight': 1.0,
+                'loc_weight': 2.0,
+                'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
+            }
+
+        POST_PROCESSING:
+            SCORE_THRESH: 0.1
+            POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
+            MAX_OBJ_PER_SAMPLE: 500
+            NMS_CONFIG:
+                NMS_TYPE: nms_gpu
+                NMS_THRESH: 0.7
+                NMS_PRE_MAXSIZE: 4096
+                NMS_POST_MAXSIZE: 500
+
+    PFE:
+        NAME: VoxelSetAbstraction
+        POINT_SOURCE: raw_points
+        NUM_KEYPOINTS: 4096
+        NUM_OUTPUT_FEATURES: 90
+        SAMPLE_METHOD: SPC
+        SPC_SAMPLING:
+            NUM_SECTORS: 6
+            SAMPLE_RADIUS_WITH_ROI: 1.6
+
+        FEATURES_SOURCE: ['bev', 'x_conv3', 'x_conv4', 'raw_points']
+        SA_LAYER:
+            raw_points:
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 2
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [ 32 ]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 2.4
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [ 2, 2, 2 ]
+                    MAX_NEIGHBOR_DISTANCE: 0.2
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 32, 32 ]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 0.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 32, 32 ]
+
+            x_conv3:
+                DOWNSAMPLE_FACTOR: 4
+                INPUT_CHANNELS: 64
+
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 32
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [128]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 4.0
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [3, 3, 3]
+                    MAX_NEIGHBOR_DISTANCE: 1.2
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [64, 64]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 2.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+
+            x_conv4:
+                DOWNSAMPLE_FACTOR: 8
+                INPUT_CHANNELS: 64
+
+                NAME: VectorPoolAggregationModuleMSG
+                NUM_GROUPS: 2
+                LOCAL_AGGREGATION_TYPE: local_interpolation
+                NUM_REDUCED_CHANNELS: 32
+                NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+                MSG_POST_MLPS: [ 128 ]
+                FILTER_NEIGHBOR_WITH_ROI: True
+                RADIUS_OF_NEIGHBOR_WITH_ROI: 6.4
+
+                GROUP_CFG_0:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 2.4
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+                GROUP_CFG_1:
+                    NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                    MAX_NEIGHBOR_DISTANCE: 4.8
+                    NEIGHBOR_NSAMPLE: -1
+                    POST_MLPS: [ 64, 64 ]
+
+
+    POINT_HEAD:
+        NAME: PointHeadSimple
+        CLS_FC: [256, 256]
+        CLASS_AGNOSTIC: True
+        USE_POINT_FEATURES_BEFORE_FUSION: True
+        TARGET_CONFIG:
+            GT_EXTRA_WIDTH: [0.2, 0.2, 0.2]
+        LOSS_CONFIG:
+            LOSS_REG: smooth-l1
+            LOSS_WEIGHTS: {
+                'point_cls_weight': 1.0,
+            }
+
+    ROI_HEAD:
+        NAME: PVRCNNHead
+        CLASS_AGNOSTIC: True
+
+        SHARED_FC: [256, 256]
+        CLS_FC: [256, 256]
+        REG_FC: [256, 256]
+        DP_RATIO: 0.3
+
+        NMS_CONFIG:
+            TRAIN:
+                NMS_TYPE: nms_gpu
+                MULTI_CLASSES_NMS: False
+                NMS_PRE_MAXSIZE: 9000
+                NMS_POST_MAXSIZE: 512
+                NMS_THRESH: 0.8
+            TEST:
+                NMS_TYPE: nms_gpu
+                MULTI_CLASSES_NMS: False
+                NMS_PRE_MAXSIZE: 1024
+                NMS_POST_MAXSIZE: 100
+                NMS_THRESH: 0.7
+                SCORE_THRESH: 0.1
+
+#                NMS_PRE_MAXSIZE: 4096
+#                NMS_POST_MAXSIZE: 500
+#                NMS_THRESH: 0.85
+
+
+        ROI_GRID_POOL:
+            GRID_SIZE: 6
+
+            NAME: VectorPoolAggregationModuleMSG
+            NUM_GROUPS: 2
+            LOCAL_AGGREGATION_TYPE: voxel_random_choice
+            NUM_REDUCED_CHANNELS: 30
+            NUM_CHANNELS_OF_LOCAL_AGGREGATION: 32
+            MSG_POST_MLPS: [ 128 ]
+
+            GROUP_CFG_0:
+                NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                MAX_NEIGHBOR_DISTANCE: 0.8
+                NEIGHBOR_NSAMPLE: 32
+                POST_MLPS: [ 64, 64 ]
+            GROUP_CFG_1:
+                NUM_LOCAL_VOXEL: [ 3, 3, 3 ]
+                MAX_NEIGHBOR_DISTANCE: 1.6
+                NEIGHBOR_NSAMPLE: 32
+                POST_MLPS: [ 64, 64 ]
+
+        TARGET_CONFIG:
+            BOX_CODER: ResidualCoder
+            ROI_PER_IMAGE: 128
+            FG_RATIO: 0.5
+
+            SAMPLE_ROI_BY_EACH_CLASS: True
+            CLS_SCORE_TYPE: roi_iou
+
+            CLS_FG_THRESH: 0.75
+            CLS_BG_THRESH: 0.25
+            CLS_BG_THRESH_LO: 0.1
+            HARD_BG_RATIO: 0.8
+
+            REG_FG_THRESH: 0.55
+
+        LOSS_CONFIG:
+            CLS_LOSS: BinaryCrossEntropy
+            REG_LOSS: smooth-l1
+            CORNER_LOSS_REGULARIZATION: True
+            LOSS_WEIGHTS: {
+                'rcnn_cls_weight': 1.0,
+                'rcnn_reg_weight': 1.0,
+                'rcnn_corner_weight': 1.0,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+
+        EVAL_METRIC: waymo
+
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.7
+            NMS_PRE_MAXSIZE: 4096
+            NMS_POST_MAXSIZE: 500
+
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 2
+    NUM_EPOCHS: 30
+
+    OPTIMIZER: adam_onecycle
+    LR: 0.01
+    WEIGHT_DECAY: 0.001
+    MOMENTUM: 0.9
+
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+
+    GRAD_NORM_CLIP: 10
\ No newline at end of file