Commit 72088ee3 authored by Shaoshuai Shi's avatar Shaoshuai Shi
Browse files

support separate regression heads for different properties

parent 3a551ac1
......@@ -6,20 +6,39 @@ import torch
class SingleHead(BaseBEVBackbone):
def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, encode_conv_cfg=None,
head_label_indices=None):
super().__init__(encode_conv_cfg, input_channels)
def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, rpn_head_cfg=None,
head_label_indices=None, separate_reg_config=None):
super().__init__(rpn_head_cfg, input_channels)
self.num_anchors_per_location = num_anchors_per_location
self.num_class = num_class
self.code_size = code_size
self.model_cfg = model_cfg
self.separate_reg_config = separate_reg_config
self.register_buffer('head_label_indices', head_label_indices)
self.conv_cls = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.num_class,
kernel_size=1
)
if self.separate_reg_config is not None:
code_size_cnt = 0
self.conv_box = nn.ModuleDict()
self.conv_box_names = []
for reg_config in self.separate_reg_config:
reg_name, reg_channel = reg_config.split(':')
cur_conv = nn.Conv2d(
input_channels, self.num_anchors_per_location * reg_channel,
kernel_size=3, stride=1, padding=1, bias=True
)
nn.init.kaiming_normal_(cur_conv.weight, mode='fan_out', nonlinearity='relu')
nn.init.constant_(cur_conv.bias, 0)
code_size_cnt += reg_channel
self.conv_box[f'conv_{reg_name}'] = cur_conv
self.conv_box_names.append(f'conv_{reg_name}')
assert code_size_cnt == code_size, f'Code size does not match: {code_size_cnt}:{code_size}'
else:
self.conv_box = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.code_size,
kernel_size=1
......@@ -45,7 +64,14 @@ class SingleHead(BaseBEVBackbone):
spatial_features_2d = super().forward({'spatial_features': spatial_features_2d})['spatial_features_2d']
cls_preds = self.conv_cls(spatial_features_2d)
if self.separate_reg_config is not None:
box_preds = self.conv_box(spatial_features_2d)
else:
box_preds_list = []
for reg_name in self.conv_box_names:
box_preds_list.append(self.conv_box[f'conv_{reg_name}'](spatial_features_2d))
box_preds = torch.cat(box_preds_list, dim=1)
if not self.use_multihead:
box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
......@@ -121,7 +147,8 @@ class AnchorHeadMulti(AnchorHeadTemplate):
self.model_cfg, input_channels,
len(rpn_head_cfg['HEAD_CLS_NAME']) if self.separate_multihead else self.num_class,
num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg,
head_label_indices=head_label_indices
head_label_indices=head_label_indices,
separate_reg_config=self.model_cfg.get('SEPARATE_REG_CONFIG', None)
)
rpn_heads.append(rpn_head)
self.rpn_heads = nn.ModuleList(rpn_heads)
......
......@@ -2,17 +2,18 @@ import torch
class ResidualCoder(object):
def __init__(self, code_size=7, **kwargs):
def __init__(self, code_size=7, encode_angle_by_sincos=False, **kwargs):
super().__init__()
self.code_size = code_size
assert code_size in [7, 9]
self.encode_angle_by_sincos = encode_angle_by_sincos
if self.encode_angle_by_sincos:
self.code_size += 1
@staticmethod
def encode_torch(boxes, anchors):
def encode_torch(self, boxes, anchors):
"""
Args:
boxes: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
anchors: (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
Returns:
......@@ -30,23 +31,30 @@ class ResidualCoder(object):
dxt = torch.log(dxg / dxa)
dyt = torch.log(dyg / dya)
dzt = torch.log(dzg / dza)
rt = rg - ra
if self.encode_angle_by_sincos:
rt_cos = torch.cos(rg) - torch.cos(ra)
rt_sin = torch.sin(rg) - torch.sin(ra)
rts = [rt_cos, rt_sin]
else:
rts = [rg - ra]
cts = [g - a for g, a in zip(cgs, cas)]
return torch.cat([xt, yt, zt, dxt, dyt, dzt, rt, *cts], dim=-1)
return torch.cat([xt, yt, zt, dxt, dyt, dzt, *rts, *cts], dim=-1)
@staticmethod
def decode_torch(box_encodings, anchors):
def decode_torch(self, box_encodings, anchors):
"""
Args:
box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
box_encodings: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading or *[cos, sin], ...]
anchors: (B, N, 7 + C) or (N, 7 + C) [x, y, z, dx, dy, dz, heading, ...]
Returns:
"""
xa, ya, za, dxa, dya, dza, ra, *cas = torch.split(anchors, 1, dim=-1)
if self.encode_angle_by_sincos:
xt, yt, zt, dxt, dyt, dzt, rt, *cts = torch.split(box_encodings, 1, dim=-1)
else:
xt, yt, zt, dxt, dyt, dzt, cost, sint, *cts = torch.split(box_encodings, 1, dim=-1)
diagonal = torch.sqrt(dxa ** 2 + dya ** 2)
xg = xt * diagonal + xa
......@@ -56,6 +64,12 @@ class ResidualCoder(object):
dxg = torch.exp(dxt) * dxa
dyg = torch.exp(dyt) * dya
dzg = torch.exp(dzt) * dza
if self.encode_angle_by_sincos:
rg_cos = cost + torch.cos(ra)
rg_sin = sint + torch.sin(ra)
rg = torch.atan2(rg_sin, rg_cos)
else:
rg = rt + ra
cgs = [t + a for t, a in zip(cts, cas)]
......
CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelResBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadMulti
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
USE_MULTIHEAD: True
SEPARATE_MULTIHEAD: True
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': car,
'anchor_sizes': [[4.63, 1.97, 1.74]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.95],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': truck,
'anchor_sizes': [[6.93, 2.51, 2.84]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': construction_vehicle,
'anchor_sizes': [[6.37, 2.85, 3.19]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.225],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': bus,
'anchor_sizes': [[10.5, 2.94, 3.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': trailer,
'anchor_sizes': [[12.29, 2.90, 3.87]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [0.115],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': barrier,
'anchor_sizes': [[0.50, 2.53, 0.98]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.33],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': motorcycle,
'anchor_sizes': [[2.11, 0.77, 1.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.085],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.3
},
{
'class_name': bicycle,
'anchor_sizes': [[1.70, 0.60, 1.28]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.18],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': pedestrian,
'anchor_sizes': [[0.73, 0.67, 1.77]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.935],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
{
'class_name': traffic_cone,
'anchor_sizes': [[0.41, 0.41, 1.07]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.285],
'align_center': False,
'feature_map_stride': 8,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
]
SHARED_CONV_NUM_FILTER: 64
RPN_HEAD_CFGS: [
{
'HEAD_CLS_NAME': ['car'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['bus', 'trailer'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['barrier'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
]
SEPARATE_REG_CONFIG: ['reg:2', 'height:1', 'size:3', 'angle:2', 'velo:2']
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
BOX_CODER_CONFIG: {
'code_size': 9,
'encode_angle_by_sincos': True
}
LOSS_CONFIG:
REG_LOSS_TYPE: WeightedL1Loss
LOSS_WEIGHTS: {
'pos_cls_weight': 1.0,
'neg_cls_weight': 2.0,
'cls_weight': 1.0,
'loc_weight': 0.25,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.2
NMS_PRE_MAXSIZE: 1000
NMS_POST_MAXSIZE: 100
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
POINT_CLOUD_RANGE: [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
DATA_PROCESSOR:
- NAME: mask_points_and_boxes_outside_range
REMOVE_OUTSIDE_BOXES: True
- NAME: shuffle_points
SHUFFLE_ENABLED: {
'train': True,
'test': True
}
- NAME: transform_points_to_voxels
VOXEL_SIZE: [0.2, 0.2, 8.0]
MAX_POINTS_PER_VOXEL: 20
MAX_NUMBER_OF_VOXELS: {
'train': 30000,
'test': 30000
}
MODEL:
NAME: PointPillar
VFE:
NAME: PillarVFE
WITH_DISTANCE: False
USE_ABSLOTE_XYZ: True
USE_NORM: True
NUM_FILTERS: [64]
MAP_TO_BEV:
NAME: PointPillarScatter
NUM_BEV_FEATURES: 64
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [3, 5, 5]
LAYER_STRIDES: [2, 2, 2]
NUM_FILTERS: [64, 128, 256]
UPSAMPLE_STRIDES: [0.5, 1, 2]
NUM_UPSAMPLE_FILTERS: [128, 128, 128]
DENSE_HEAD:
NAME: AnchorHeadMulti
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
USE_MULTIHEAD: True
SEPARATE_MULTIHEAD: True
ANCHOR_GENERATOR_CONFIG: [
{
'class_name': car,
'anchor_sizes': [[4.63, 1.97, 1.74]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.95],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.6,
'unmatched_threshold': 0.45
},
{
'class_name': truck,
'anchor_sizes': [[6.93, 2.51, 2.84]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': construction_vehicle,
'anchor_sizes': [[6.37, 2.85, 3.19]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.225],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': bus,
'anchor_sizes': [[10.5, 2.94, 3.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.085],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': trailer,
'anchor_sizes': [[12.29, 2.90, 3.87]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [0.115],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': barrier,
'anchor_sizes': [[0.50, 2.53, 0.98]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.33],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.55,
'unmatched_threshold': 0.4
},
{
'class_name': motorcycle,
'anchor_sizes': [[2.11, 0.77, 1.47]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.085],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.5,
'unmatched_threshold': 0.3
},
{
'class_name': bicycle,
'anchor_sizes': [[1.70, 0.60, 1.28]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.18],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.5,
'unmatched_threshold': 0.35
},
{
'class_name': pedestrian,
'anchor_sizes': [[0.73, 0.67, 1.77]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.935],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
{
'class_name': traffic_cone,
'anchor_sizes': [[0.41, 0.41, 1.07]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.285],
'align_center': False,
'feature_map_stride': 4,
'matched_threshold': 0.6,
'unmatched_threshold': 0.4
},
]
SHARED_CONV_NUM_FILTER: 64
RPN_HEAD_CFGS: [
{
'HEAD_CLS_NAME': ['car'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['truck', 'construction_vehicle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['bus', 'trailer'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['barrier'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['motorcycle', 'bicycle'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
{
'HEAD_CLS_NAME': ['pedestrian', 'traffic_cone'],
'LAYER_NUMS': [0],
'LAYER_STRIDES': [1],
'NUM_FILTERS': [64],
},
]
SEPARATE_REG_CONFIG: ['reg:2', 'height:1', 'size:3', 'angle:2', 'velo:2']
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
BOX_CODER_CONFIG: {
'code_size': 9,
'encode_angle_by_sincos': True
}
LOSS_CONFIG:
REG_LOSS_TYPE: WeightedL1Loss
LOSS_WEIGHTS: {
'pos_cls_weight': 1.0,
'neg_cls_weight': 2.0,
'cls_weight': 1.0,
'loc_weight': 0.25,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.2
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 100
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.001
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment