support second and multihead

2fa93c69 · Gus-Guo · 96dfcbd0 · 2fa93c69 · 2fa93c69 · 2fa93c69
Commit 2fa93c69 authored Jun 24, 2020 by Gus-Guo
4 changed files
--- a/pcdet/models/dense_heads/__init__.py
+++ b/pcdet/models/dense_heads/__init__.py
@@ -2,11 +2,12 @@ from .anchor_head_template import AnchorHeadTemplate
 from .anchor_head_single import AnchorHeadSingle
 from .point_intra_part_head import PointIntraPartOffsetHead
 from .point_head_simple import PointHeadSimple
-
+from .anchor_head_multi import AnchorHeadMulti

 __all__ = {
    'AnchorHeadTemplate': AnchorHeadTemplate,
    'AnchorHeadSingle': AnchorHeadSingle,
    'PointIntraPartOffsetHead': PointIntraPartOffsetHead,
-    'PointHeadSimple': PointHeadSimple
+    'PointHeadSimple': PointHeadSimple,
+    'AnchorHeadMulti': AnchorHeadMulti,
 }
--- a/pcdet/models/dense_heads/anchor_head_multi.py
+++ b/pcdet/models/dense_heads/anchor_head_multi.py
+import numpy as np
+import torch.nn as nn
+from .anchor_head_template import AnchorHeadTemplate
+import torch
+
+class SingleHead(nn.Module):
+    def __init__(self, model_cfg, input_channels, num_class, num_anchors_per_location, code_size, encode_conv_cfg=None):
+        super(SingleHead, self).__init__()
+        if encode_conv_cfg is not None:
+            stride = encode_conv_cfg['stride']
+            layer_num = encode_conv_cfg['layer_num']
+            num_filters = input_channels
+            encode_conv = []
+            encode_conv.append(nn.Conv2d(num_filters, num_filters, kernel_size=1, stride=stride, bias=False))
+            for i in range(layer_num-1):
+                encode_conv.append(nn.Conv2d(num_filters, num_filters, 1, bias=False))
+                encode_conv.append(nn.BatchNorm2d(num_filters))
+                encode_conv.append(nn.ReLU(inplace=True))
+            self.encode_conv = nn.Sequential(*encode_conv)
+        else:
+            self.encode_conv = None
+
+        self.num_anchors_per_location = num_anchors_per_location
+        self.num_class = num_class
+        self.code_size = code_size
+        self.model_cfg = model_cfg
+
+        self.conv_cls = nn.Conv2d(
+            input_channels, self.num_anchors_per_location * self.num_class,
+            kernel_size=1
+        )
+        self.conv_box = nn.Conv2d(
+            input_channels, self.num_anchors_per_location * self.code_size,
+            kernel_size=1
+        )
+
+        if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', None) is not None:
+            self.conv_dir_cls = nn.Conv2d(
+                input_channels,
+                self.num_anchors_per_location * self.model_cfg.NUM_DIR_BINS,
+                kernel_size=1
+            )
+        else:
+            self.conv_dir_cls = None
+        self.use_multihead = self.model_cfg.get('USE_MULTI_HEAD', False)
+        self.init_weights()
+
+    def init_weights(self):
+        pi = 0.01
+        nn.init.constant_(self.conv_cls.bias, -np.log((1 - pi) / pi))
+
+    def forward(self, spatial_features_2d):
+        ret_dict = {}
+
+        if self.encode_conv is not None:
+            spatial_features_2d = self.encode_conv(spatial_features_2d)
+
+        cls_preds = self.conv_cls(spatial_features_2d)
+        box_preds = self.conv_box(spatial_features_2d)
+
+        if not self.use_multihead:
+            box_preds = box_preds.permute(0, 2, 3, 1).contiguous()
+            cls_preds = cls_preds.permute(0, 2, 3, 1).contiguous()
+        else:
+            H, W = box_preds.shape[2:]
+            batch_size = box_preds.shape[0]
+            box_preds = box_preds.view(-1, self.num_anchors_per_location,
+                                       self.code_size, H, W).permute(0, 1, 3, 4, 2).contiguous()
+            cls_preds = cls_preds.view(-1, self.num_anchors_per_location,
+                                       self.num_class, H, W).permute(0, 1, 3, 4, 2).contiguous()
+            box_preds = box_preds.view(batch_size, -1, self.code_size)
+            cls_preds = cls_preds.view(batch_size, -1, self.num_class).unsqueeze(-1)
+
+        if self.conv_dir_cls is not None:
+            dir_cls_preds = self.conv_dir_cls(spatial_features_2d)
+            if self.use_multihead:
+                dir_cls_preds = dir_cls_preds.view(
+                    -1, self.num_anchors_per_location, self.model_cfg.NUM_DIR_BINS, H, W).permute(0, 1, 3, 4, 2).contiguous()
+                dir_cls_preds = dir_cls_preds.view(batch_size, -1, self.model_cfg.NUM_DIR_BINS)
+            else:
+                dir_cls_preds = dir_cls_preds.permute(0, 2, 3, 1).contiguous()
+        
+        else:
+            dir_cls_preds = None
+
+        ret_dict['cls_preds'] = cls_preds
+        ret_dict['box_preds'] = box_preds
+        ret_dict['dir_cls_preds'] = dir_cls_preds
+
+        return ret_dict
+
+class AnchorHeadMulti(AnchorHeadTemplate):
+    def __init__(self, model_cfg, input_channels, num_class, grid_size, point_cloud_range, predict_boxes_when_training=True):
+        super().__init__(
+            model_cfg=model_cfg, num_class=num_class, grid_size=grid_size, point_cloud_range=point_cloud_range, predict_boxes_when_training=predict_boxes_when_training
+        )
+        self.model_cfg = model_cfg
+        self.make_multihead(input_channels)
+        
+
+    def make_multihead(self, input_channels):
+        rpn_head_cfgs = self.model_cfg.RPN_HEAD_CFGS
+        rpn_heads = []
+        class_names = []
+        for rpn_head_cfg in rpn_head_cfgs:
+            class_names.extend(rpn_head_cfg['head_cls_name'])
+        for rpn_head_cfg in rpn_head_cfgs:
+            num_anchors_per_location = sum([self.num_anchors_per_location[class_names.index(head_cls)] for head_cls in rpn_head_cfg['head_cls_name']])
+            rpn_head = SingleHead(self.model_cfg, input_channels, self.num_class, num_anchors_per_location, self.box_coder.code_size, rpn_head_cfg)
+            rpn_heads.append(rpn_head)
+        self.rpn_heads = nn.ModuleList(rpn_heads)
+
+    def forward(self, data_dict):
+        spatial_features_2d = data_dict['spatial_features_2d']
+
+        ret_dicts = []
+        for rpn_head in self.rpn_heads:
+            ret_dicts.append(rpn_head(spatial_features_2d))
+
+        cls_preds = torch.cat([ret_dict['cls_preds'] for ret_dict in ret_dicts], dim=1)
+        box_preds = torch.cat([ret_dict['box_preds'] for ret_dict in ret_dicts], dim=1)
+        ret = {
+            'cls_preds': cls_preds,
+            'box_preds': box_preds,
+
+        }
+        if self.model_cfg.get('USE_DIRECTION_CLASSIFIER', False):
+            dir_cls_preds = torch.cat([ret_dict['dir_cls_preds'] for ret_dict in ret_dicts], dim=1)
+            ret['dir_cls_preds'] = dir_cls_preds
+        else:
+            dir_cls_preds = None
+ 
+        self.forward_ret_dict.update(ret)
+       
+        if self.training:
+            targets_dict = self.assign_targets(
+                gt_boxes=data_dict['gt_boxes']
+            )
+            self.forward_ret_dict.update(targets_dict)
+        else:
+            batch_cls_preds, batch_box_preds = self.generate_predicted_boxes(
+                batch_size=data_dict['batch_size'],
+                cls_preds=cls_preds, box_preds=box_preds, dir_cls_preds=dir_cls_preds
+            )
+            data_dict['batch_cls_preds'] = batch_cls_preds
+            data_dict['batch_box_preds'] = batch_box_preds
+            data_dict['cls_preds_normalized'] = False
+
+        return data_dict
--- a/tools/cfgs/kitti_models/second.yaml
+++ b/tools/cfgs/kitti_models/second.yaml
+CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
+
+DATA_CONFIG: 
+    _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
+
+
+MODEL:
+    NAME: SECONDNet
+
+    VFE:
+        NAME: MeanVFE
+
+    BACKBONE_3D:
+        NAME: VoxelBackBone8x
+
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+
+    DENSE_HEAD:
+        NAME: AnchorHeadSingle
+        CLASS_AGNOSTIC: False
+
+        USE_DIRECTION_CLASSIFIER: True
+        DIR_OFFSET: 0.78539
+        DIR_LIMIT_OFFSET: 0.0
+        NUM_DIR_BINS: 2
+
+        ANCHOR_GENERATOR_CONFIG: [
+            {
+                'anchor_sizes': [[3.9, 1.6, 1.56]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.78],
+                'align_center': False,
+                'feature_map_stride': 8
+            },
+            {
+                'anchor_sizes': [[0.8, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 8
+            },
+            {
+                'anchor_sizes': [[1.76, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-0.6],
+                'align_center': False,
+                'feature_map_stride': 8
+            }
+        ]
+
+        TARGET_ASSIGNER_CONFIG:
+            NAME: AxisAlignedTargetAssigner
+            POS_FRACTION: -1.0
+            SAMPLE_SIZE: 512
+            MATCHED_THRESHOLDS: [0.6, 0.5, 0.5]
+            UNMATCHED_THRESHOLDS: [0.45, 0.35, 0.35]
+            NORM_BY_NUM_EXAMPLES: False
+            MATCH_HEIGHT: False
+            BOX_CODER: ResidualCoder
+
+        LOSS_CONFIG:
+            LOSS_WEIGHTS: {
+                'cls_weight': 1.0,
+                'loc_weight': 2.0,
+                'dir_weight': 0.2,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+
+        EVAL_METRIC: kitti
+
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.01
+            NMS_PRE_MAXSIZE: 4096
+            NMS_POST_MAXSIZE: 500
+
+
+OPTIMIZATION:
+    OPTIMIZER: adam_onecycle
+    LR: 0.003
+    WEIGHT_DECAY: 0.01
+    MOMENTUM: 0.9
+
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+
+    GRAD_NORM_CLIP: 10
--- a/tools/cfgs/kitti_models/second_multihead.yaml
+++ b/tools/cfgs/kitti_models/second_multihead.yaml
+CLASS_NAMES: ['Car', 'Pedestrian', 'Cyclist']
+DATA_CONFIG: 
+    _BASE_CONFIG_: cfgs/dataset_configs/kitti_dataset.yaml
+
+
+MODEL:
+    NAME: SECONDNet
+
+    VFE:
+        NAME: MeanVFE
+
+    BACKBONE_3D:
+        NAME: VoxelBackBone8x
+
+    MAP_TO_BEV:
+        NAME: HeightCompression
+        NUM_BEV_FEATURES: 256
+
+    BACKBONE_2D:
+        NAME: BaseBEVBackbone
+
+        LAYER_NUMS: [5, 5]
+        LAYER_STRIDES: [1, 2]
+        NUM_FILTERS: [128, 256]
+        UPSAMPLE_STRIDES: [1, 2]
+        NUM_UPSAMPLE_FILTERS: [256, 256]
+
+    DENSE_HEAD:
+        NAME: AnchorHeadMulti
+        CLASS_AGNOSTIC: False
+
+        USE_DIRECTION_CLASSIFIER: True
+        DIR_OFFSET: 0.78539
+        DIR_LIMIT_OFFSET: 0.0
+        NUM_DIR_BINS: 2
+        
+        USE_MULTI_HEAD: True
+        ANCHOR_GENERATOR_CONFIG: [
+            {
+                'anchor_sizes': [[3.9, 1.6, 1.56]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.6],
+                'align_center': False,
+                'feature_map_stride': 16
+            },
+            {
+                'anchor_sizes': [[0.8, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.6],
+                'align_center': False,
+                'feature_map_stride': 8
+            },
+            {
+                'anchor_sizes': [[1.76, 0.6, 1.73]],
+                'anchor_rotations': [0, 1.57],
+                'anchor_bottom_heights': [-1.6],
+                'align_center': False,
+                'feature_map_stride': 8
+            }
+        ]
+
+        RPN_HEAD_CFGS: [
+            {
+                'head_cls_name': ['Car'],
+                'stride': 2,
+                'layer_num': 2
+            },
+            {
+                'head_cls_name': ['Pedestrian', 'Cyclist'],
+                'stride': 1,
+                'layer_num': 2
+            }, 
+
+
+        ]
+
+
+        TARGET_ASSIGNER_CONFIG:
+           NAME: AxisAlignedTargetAssigner
+           POS_FRACTION: -1.0
+           SAMPLE_SIZE: 512
+           MATCHED_THRESHOLDS: [0.6, 0.5, 0.5]
+           UNMATCHED_THRESHOLDS: [0.45, 0.35, 0.35]
+           NORM_BY_NUM_EXAMPLES: False
+           MATCH_HEIGHT: False
+           BOX_CODER: ResidualCoder
+
+        LOSS_CONFIG:
+            LOSS_WEIGHTS: {
+                'cls_weight': 1.0,
+                'loc_weight': 2.0,
+                'dir_weight': 0.2,
+                'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
+            }
+
+    POST_PROCESSING:
+        RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
+        MULTI_CLASSES_NMS: False
+        SCORE_THRESH: 0.1
+        OUTPUT_RAW_SCORE: False
+
+        EVAL_METRIC: kitti
+
+        NMS_CONFIG:
+            MULTI_CLASSES_NMS: False
+            NMS_TYPE: nms_gpu
+            NMS_THRESH: 0.1
+            NMS_PRE_MAXSIZE: 4096
+            NMS_POST_MAXSIZE: 500
+
+
+OPTIMIZATION:
+    OPTIMIZER: adam_onecycle
+    LR: 0.003
+    WEIGHT_DECAY: 0.01
+    MOMENTUM: 0.9
+
+    MOMS: [0.95, 0.85]
+    PCT_START: 0.4
+    DIV_FACTOR: 10
+    DECAY_STEP_LIST: [35, 45]
+    LR_DECAY: 0.1
+    LR_CLIP: 0.0000001
+
+    LR_WARMUP: False
+    WARMUP_EPOCH: 1
+
+    GRAD_NORM_CLIP: 10