Unverified Commit 1376e610 authored by Jiajun Deng's avatar Jiajun Deng Committed by GitHub
Browse files

Add dynamic voxelization and config for voxel r-cnn on Waymo Open Dataset (#760)

* add dynamic pillar vfe

* make the upperbound unaccessible

* add place holder for voxel generation

* add DynPillarVFE

* add PFNLayerV2

* add try except for torch_scatter package

* add dynamic pillar in readme

* add the cfg file of centerpoint with dynamic pillar

* recover original mask_points_by_range

* masking points with points_coords out of grid_size

* add dynamic voxelization and config for voxel_rcnn
parent c47a94bc
...@@ -152,6 +152,7 @@ By default, all models are trained with **a single frame** of **20% data (~32k f ...@@ -152,6 +152,7 @@ By default, all models are trained with **a single frame** of **20% data (~32k f
| [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 74.66/74.12 |65.82/65.32 |71.71/62.24 |62.46/54.06 |66.53/65.18 |64.05/62.75 | | [Part-A2-Anchor](tools/cfgs/waymo_models/PartA2.yaml) | 74.66/74.12 |65.82/65.32 |71.71/62.24 |62.46/54.06 |66.53/65.18 |64.05/62.75 |
| [PV-RCNN (AnchorHead)](tools/cfgs/waymo_models/pv_rcnn.yaml) | 75.41/74.74 |67.44/66.80 |71.98/61.24 |63.70/53.95 |65.88/64.25 |63.39/61.82 | | [PV-RCNN (AnchorHead)](tools/cfgs/waymo_models/pv_rcnn.yaml) | 75.41/74.74 |67.44/66.80 |71.98/61.24 |63.70/53.95 |65.88/64.25 |63.39/61.82 |
| [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 75.95/75.43 |68.02/67.54 |75.94/69.40 |67.66/61.62 |70.18/68.98 |67.73/66.57| | [PV-RCNN (CenterHead)](tools/cfgs/waymo_models/pv_rcnn_with_centerhead_rpn.yaml) | 75.95/75.43 |68.02/67.54 |75.94/69.40 |67.66/61.62 |70.18/68.98 |67.73/66.57|
| [Voxel R-CNN (CenterHead)-Dynamic-Voxel](tools/cfgs/waymo_models/voxel_rcnn_with_centerhead_dyn_voxel.yaml) | 76.13/75.66 |68.18/67.74 |78.20/71.98 |69.29/63.59 | 70.75/69.68 |68.25/67.21|
| [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 77.82/77.32| 69.07/68.62| 77.99/71.36| 69.92/63.74| 71.80/70.71| 69.31/68.26| | [PV-RCNN++](tools/cfgs/waymo_models/pv_rcnn_plusplus.yaml) | 77.82/77.32| 69.07/68.62| 77.99/71.36| 69.92/63.74| 71.80/70.71| 69.31/68.26|
| [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) |77.61/77.14| 69.18/68.75| 79.42/73.31| 70.88/65.21| 72.50/71.39| 69.84/68.77| | [PV-RCNN++ (ResNet)](tools/cfgs/waymo_models/pv_rcnn_plusplus_resnet.yaml) |77.61/77.14| 69.18/68.75| 79.42/73.31| 70.88/65.21| 72.50/71.39| 69.84/68.77|
......
from .mean_vfe import MeanVFE from .mean_vfe import MeanVFE
from .pillar_vfe import PillarVFE from .pillar_vfe import PillarVFE
from .dynamic_mean_vfe import DynamicMeanVFE
from .dynamic_pillar_vfe import DynamicPillarVFE from .dynamic_pillar_vfe import DynamicPillarVFE
from .image_vfe import ImageVFE from .image_vfe import ImageVFE
from .vfe_template import VFETemplate from .vfe_template import VFETemplate
...@@ -9,5 +10,6 @@ __all__ = { ...@@ -9,5 +10,6 @@ __all__ = {
'MeanVFE': MeanVFE, 'MeanVFE': MeanVFE,
'PillarVFE': PillarVFE, 'PillarVFE': PillarVFE,
'ImageVFE': ImageVFE, 'ImageVFE': ImageVFE,
'DynPillarVFE': DynamicPillarVFE 'DynMeanVFE': DynamicMeanVFE,
'DynPillarVFE': DynamicPillarVFE,
} }
import torch
from .vfe_template import VFETemplate
try:
import torch_scatter
except Exception as e:
# Incase someone doesn't want to use dynamic pillar vfe and hasn't installed torch_scatter
pass
from .vfe_template import VFETemplate
class DynamicMeanVFE(VFETemplate):
def __init__(self, model_cfg, num_point_features, voxel_size, grid_size, point_cloud_range, **kwargs):
super().__init__(model_cfg=model_cfg)
self.num_point_features = num_point_features
self.grid_size = torch.tensor(grid_size).cuda()
self.voxel_size = torch.tensor(voxel_size).cuda()
self.point_cloud_range = torch.tensor(point_cloud_range).cuda()
self.voxel_x = voxel_size[0]
self.voxel_y = voxel_size[1]
self.voxel_z = voxel_size[2]
self.x_offset = self.voxel_x / 2 + point_cloud_range[0]
self.y_offset = self.voxel_y / 2 + point_cloud_range[1]
self.z_offset = self.voxel_z / 2 + point_cloud_range[2]
self.scale_xyz = grid_size[0] * grid_size[1] * grid_size[2]
self.scale_yz = grid_size[1] * grid_size[2]
self.scale_z = grid_size[2]
def get_output_feature_dim(self):
return self.num_point_features
@torch.no_grad()
def forward(self, batch_dict, **kwargs):
"""
Args:
batch_dict:
voxels: (num_voxels, max_points_per_voxel, C)
voxel_num_points: optional (num_voxels)
**kwargs:
Returns:
vfe_features: (num_voxels, C)
"""
batch_size = batch_dict['batch_size']
points = batch_dict['points'] # (batch_idx, x, y, z, i, e)
# # debug
point_coords = torch.floor((points[:, 1:4] - self.point_cloud_range[0:3]) / self.voxel_size).int()
mask = ((point_coords >= 0) & (point_coords < self.grid_size)).all(dim=1)
points = points[mask]
point_coords = point_coords[mask]
merge_coords = points[:, 0].int() * self.scale_xyz + \
point_coords[:, 0] * self.scale_yz + \
point_coords[:, 1] * self.scale_z + \
point_coords[:, 2]
points_data = points[:, 1:].contiguous()
unq_coords, unq_inv, unq_cnt = torch.unique(merge_coords, return_inverse=True, return_counts=True)
points_mean = torch_scatter.scatter_mean(points_data, unq_inv, dim=0)
unq_coords = unq_coords.int()
voxel_coords = torch.stack((unq_coords // self.scale_xyz,
(unq_coords % self.scale_xyz) // self.scale_yz,
(unq_coords % self.scale_yz) // self.scale_z,
unq_coords % self.scale_z), dim=1)
voxel_coords = voxel_coords[:, [0, 3, 2, 1]]
batch_dict['voxel_features'] = points_mean.contiguous()
batch_dict['voxel_coords'] = voxel_coords.contiguous()
return batch_dict
CLASS_NAMES: ['Vehicle', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/waymo_dataset.yaml
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
STRICT_MASK: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels_placeholder
VOXEL_SIZE: [ 0.10, 0.10, 0.15 ]
MODEL:
NAME: VoxelRCNN
VFE:
NAME: DynMeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: CenterHead
CLASS_AGNOSTIC: False
CLASS_NAMES_EACH_HEAD: [
[ 'Vehicle', 'Pedestrian', 'Cyclist' ]
]
SHARED_CONV_CHANNEL: 64
USE_BIAS_BEFORE_NORM: True
NUM_HM_CONV: 2
SEPARATE_HEAD_CFG:
HEAD_ORDER: [ 'center', 'center_z', 'dim', 'rot' ]
HEAD_DICT: {
'center': { 'out_channels': 2, 'num_conv': 2 },
'center_z': { 'out_channels': 1, 'num_conv': 2 },
'dim': { 'out_channels': 3, 'num_conv': 2 },
'rot': { 'out_channels': 2, 'num_conv': 2 },
}
TARGET_ASSIGNER_CONFIG:
FEATURE_MAP_STRIDE: 8
NUM_MAX_OBJS: 500
GAUSSIAN_OVERLAP: 0.1
MIN_RADIUS: 2
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'code_weights': [ 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 ]
}
POST_PROCESSING:
SCORE_THRESH: 0.1
POST_CENTER_LIMIT_RANGE: [ -75.2, -75.2, -2, 75.2, 75.2, 4 ]
MAX_OBJ_PER_SAMPLE: 500
NMS_CONFIG:
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
ROI_HEAD:
NAME: VoxelRCNNHead
CLASS_AGNOSTIC: True
SHARED_FC: [256, 256]
CLS_FC: [256, 256]
REG_FC: [256, 256]
DP_RATIO: 0.3
NMS_CONFIG:
TRAIN:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 9000
NMS_POST_MAXSIZE: 512
NMS_THRESH: 0.8
TEST:
NMS_TYPE: nms_gpu
MULTI_CLASSES_NMS: False
NMS_PRE_MAXSIZE: 1024
NMS_POST_MAXSIZE: 100
NMS_THRESH: 0.7
# NMS_PRE_MAXSIZE: 4096
# NMS_POST_MAXSIZE: 300
# NMS_THRESH: 0.85
ROI_GRID_POOL:
FEATURES_SOURCE: ['x_conv2', 'x_conv3', 'x_conv4']
PRE_MLP: True
GRID_SIZE: 6
POOL_LAYERS:
x_conv2:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 0.4 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
x_conv3:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 0.8 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
x_conv4:
MLPS: [ [ 64, 64 ] ]
QUERY_RANGES: [ [ 3, 3, 2 ] ]
POOL_RADIUS: [ 1.6 ]
NSAMPLE: [ 16 ]
POOL_METHOD: max_pool
TARGET_CONFIG:
BOX_CODER: ResidualCoder
ROI_PER_IMAGE: 128
FG_RATIO: 0.5
SAMPLE_ROI_BY_EACH_CLASS: True
CLS_SCORE_TYPE: roi_iou
CLS_FG_THRESH: 0.75
CLS_BG_THRESH: 0.25
CLS_BG_THRESH_LO: 0.1
HARD_BG_RATIO: 0.8
REG_FG_THRESH: 0.55
LOSS_CONFIG:
CLS_LOSS: BinaryCrossEntropy
REG_LOSS: smooth-l1
CORNER_LOSS_REGULARIZATION: True
LOSS_WEIGHTS: {
'rcnn_cls_weight': 1.0,
'rcnn_reg_weight': 1.0,
'rcnn_corner_weight': 1.0,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: waymo
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.7
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
BATCH_SIZE_PER_GPU: 4
NUM_EPOCHS: 30
OPTIMIZER: adam_onecycle
LR: 0.01
WEIGHT_DECAY: 0.001
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment