Commit 2fa93c69 authored by Gus-Guo's avatar Gus-Guo
Browse files

support second and multihead

parent 96dfcbd0
......@@ -2,11 +2,12 @@ from .anchor_head_template import AnchorHeadTemplate
from .anchor_head_single import AnchorHeadSingle
from .point_intra_part_head import PointIntraPartOffsetHead
from .point_head_simple import PointHeadSimple
from .anchor_head_multi import AnchorHeadMulti
__all__ = {
'AnchorHeadTemplate': AnchorHeadTemplate,
'AnchorHeadSingle': AnchorHeadSingle,
'PointIntraPartOffsetHead': PointIntraPartOffsetHead,
'PointHeadSimple': PointHeadSimple
'PointHeadSimple': PointHeadSimple,
'AnchorHeadMulti': AnchorHeadMulti,
}
import numpy as np
import torch.nn as nn
from .anchor_head_template import AnchorHeadTemplate
import torch
class SingleHead(nn.Module):
def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, encode_conv_cfg=None):
super(SingleHead, self).__init__()
if encode_conv_cfg is not None:
stride = encode_conv_cfg['stride']
layer_num = encode_conv_cfg['layer_num']
num_filters = input_channels
encode_conv = []
encode_conv.append(nn.Conv2d(num_filters, num_filters, kernel_size=1, stride=stride, bias=False))
for i in range(layer_num-1):
encode_conv.append(nn.Conv2d(num_filters, num_filters, 1, bias=False))
encode_conv.append(nn.BatchNorm2d(num_filters))
encode_conv.append(nn.ReLU(inplace=True))
self.encode_conv = nn.Sequential(*encode_conv)
else:
self.encode_conv = None
self.num_anchors_per_location = num_anchors_per_location
self.num_class = num_class
self.code_size = code_size
self.model_cfg = model_cfg
self.conv_cls = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.num_class,
kernel_size=1
)
self.conv_box = nn.Conv2d(
input_channels, self.num_anchors_per_location * self.code_size,
kernel_size=1
)
if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None:
self.conv_dir_cls = nn.Conv2d(
input_channels,
self.num_anchors_per_location * self.model_cfg.NUM_DIR_BINS,
kernel_size=1
)
else:
self.conv_dir_cls = None
self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False)
self.init_weights()
def init_weights(self):
pi = 0.01
nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
def forward(self, spatial_features_2d):
ret_dict = {}
if self.encode_conv is not None:
spatial_features_2d = self.encode_conv(spatial_features_2d)
cls_preds = self.conv_cls(spatial_features_2d)
box_preds = self.conv_box(spatial_features_2d)
if not self.use_multihead:
box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
else:
H, W = box_preds.shape[2:]
batch_size = box_preds.shape[0]
box_preds = box_preds.view(-1, self.num_anchors_per_location,
self.code_size, H, W).permute(0, 1, 3, 4, 2).contiguous()
cls_preds = cls_preds.view(-1, self.num_anchors_per_location,
self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous()
box_preds = box_preds.view(batch_size, -1, self.code_size)
cls_preds = cls_preds.view(batch_size, -1, self.num_class).unsqueeze(-1)
if self.conv_dir_cls is not None:
dir_cls_preds = self.conv_dir_cls(spatial_features_2d)
if self.use_multihead:
dir_cls_preds = dir_cls_preds.view(
-1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4, 2).contiguous()
dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
else:
dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
else:
dir_cls_preds = None
ret_dict['cls_preds'] = cls_preds
ret_dict['box_preds'] = box_preds
ret_dict['dir_cls_preds'] = dir_cls_preds
return ret_dict
class AnchorHeadMulti(AnchorHeadTemplate):
def __init__(self, model_cfg, input_channels, num_class, grid_size, point_cloud_range, predict_boxes_when_training=True):
super().__init__(
model_cfg=model_cfg, num_class=num_class, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training
)
self.model_cfg = model_cfg
self.make_multihead(input_channels)
def make_multihead(self, input_channels):
rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS
rpn_heads = []
class_names = []
for rpn_head_cfg in rpn_head_cfgs:
class_names.extend(rpn_head_cfg['head_cls_name'])
for rpn_head_cfg in rpn_head_cfgs:
num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)] for head_cls in rpn_head_cfg['head_cls_name']])
rpn_head = SingleHead(self.model_cfg, input_channels, self.num_class, num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg)
rpn_heads.append(rpn_head)
self.rpn_heads = nn.ModuleList(rpn_heads)
def forward(self, data_dict):
spatial_features_2d = data_dict['spatial_features_2d']
ret_dicts = []
for rpn_head in self.rpn_heads:
ret_dicts.append(rpn_head(spatial_features_2d))
cls_preds = torch.cat([ret_dict['cls_preds'] for ret_dict in ret_dicts], dim=1)
box_preds = torch.cat([ret_dict['box_preds'] for ret_dict in ret_dicts], dim=1)
ret = {
'cls_preds': cls_preds,
'box_preds': box_preds,
}
if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False):
dir_cls_preds = torch.cat([ret_dict['dir_cls_preds'] for ret_dict in ret_dicts], dim=1)
ret['dir_cls_preds'] = dir_cls_preds
else:
dir_cls_preds = None
self.forward_ret_dict.update(ret)
if self.training:
targets_dict = self.assign_targets(
gt_boxes=data_dict['gt_boxes']
)
self.forward_ret_dict.update(targets_dict)
else:
batch_cls_preds, batch_box_preds = self.generate_predicted_boxes(
batch_size=data_dict['batch_size'],
cls_preds=cls_preds, box_preds=box_preds, dir_cls_preds=dir_cls_preds
)
data_dict['batch_cls_preds'] = batch_cls_preds
data_dict['batch_box_preds'] = batch_box_preds
data_dict['cls_preds_normalized'] = False
return data_dict
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadSingle
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
ANCHOR_GENERATOR_CONFIG: [
{
'anchor_sizes': [[3.9, 1.6, 1.56]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.78],
'align_center': False,
'feature_map_stride': 8
},
{
'anchor_sizes': [[0.8, 0.6, 1.73]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 8
},
{
'anchor_sizes': [[1.76, 0.6, 1.73]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-0.6],
'align_center': False,
'feature_map_stride': 8
}
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
MATCHED_THRESHOLDS: [0.6, 0.5, 0.5]
UNMATCHED_THRESHOLDS: [0.45, 0.35, 0.35]
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.01
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
DATA_CONFIG:
_BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
MODEL:
NAME: SECONDNet
VFE:
NAME: MeanVFE
BACKBONE_3D:
NAME: VoxelBackBone8x
MAP_TO_BEV:
NAME: HeightCompression
NUM_BEV_FEATURES: 256
BACKBONE_2D:
NAME: BaseBEVBackbone
LAYER_NUMS: [5, 5]
LAYER_STRIDES: [1, 2]
NUM_FILTERS: [128, 256]
UPSAMPLE_STRIDES: [1, 2]
NUM_UPSAMPLE_FILTERS: [256, 256]
DENSE_HEAD:
NAME: AnchorHeadMulti
CLASS_AGNOSTIC: False
USE_DIRECTION_CLASSIFIER: True
DIR_OFFSET: 0.78539
DIR_LIMIT_OFFSET: 0.0
NUM_DIR_BINS: 2
USE_MULTI_HEAD: True
ANCHOR_GENERATOR_CONFIG: [
{
'anchor_sizes': [[3.9, 1.6, 1.56]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.6],
'align_center': False,
'feature_map_stride': 16
},
{
'anchor_sizes': [[0.8, 0.6, 1.73]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.6],
'align_center': False,
'feature_map_stride': 8
},
{
'anchor_sizes': [[1.76, 0.6, 1.73]],
'anchor_rotations': [0, 1.57],
'anchor_bottom_heights': [-1.6],
'align_center': False,
'feature_map_stride': 8
}
]
RPN_HEAD_CFGS: [
{
'head_cls_name': ['Car'],
'stride': 2,
'layer_num': 2
},
{
'head_cls_name': ['Pedestrian', 'Cyclist'],
'stride': 1,
'layer_num': 2
},
]
TARGET_ASSIGNER_CONFIG:
NAME: AxisAlignedTargetAssigner
POS_FRACTION: -1.0
SAMPLE_SIZE: 512
MATCHED_THRESHOLDS: [0.6, 0.5, 0.5]
UNMATCHED_THRESHOLDS: [0.45, 0.35, 0.35]
NORM_BY_NUM_EXAMPLES: False
MATCH_HEIGHT: False
BOX_CODER: ResidualCoder
LOSS_CONFIG:
LOSS_WEIGHTS: {
'cls_weight': 1.0,
'loc_weight': 2.0,
'dir_weight': 0.2,
'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
}
POST_PROCESSING:
RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
MULTI_CLASSES_NMS: False
SCORE_THRESH: 0.1
OUTPUT_RAW_SCORE: False
EVAL_METRIC: kitti
NMS_CONFIG:
MULTI_CLASSES_NMS: False
NMS_TYPE: nms_gpu
NMS_THRESH: 0.1
NMS_PRE_MAXSIZE: 4096
NMS_POST_MAXSIZE: 500
OPTIMIZATION:
OPTIMIZER: adam_onecycle
LR: 0.003
WEIGHT_DECAY: 0.01
MOMENTUM: 0.9
MOMS: [0.95, 0.85]
PCT_START: 0.4
DIV_FACTOR: 10
DECAY_STEP_LIST: [35, 45]
LR_DECAY: 0.1
LR_CLIP: 0.0000001
LR_WARMUP: False
WARMUP_EPOCH: 1
GRAD_NORM_CLIP: 10
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment