cbgs_bevfusion.yaml 4.64 KB
Newer Older
chenshi3's avatar
chenshi3 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
CLASS_NAMES: ['car','truck', 'construction_vehicle', 'bus', 'trailer',
              'barrier', 'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone']

DATA_CONFIG:
  _BASE_CONFIG_: cfgs/dataset_configs/nuscenes_dataset.yaml
  POINT_CLOUD_RANGE: [-54.0, -54.0, -5.0, 54.0, 54.0, 3.0]
  CAMERA_CONFIG:
      USE_CAMERA: True
      IMAGE:
        FINAL_DIM: [256,704]
        RESIZE_LIM_TRAIN: [0.38, 0.55]
        RESIZE_LIM_TEST: [0.48, 0.48]

  DATA_AUGMENTOR:
    DISABLE_AUG_LIST: ['placeholder']
    AUG_CONFIG_LIST:
      - NAME: random_world_flip
        ALONG_AXIS_LIST: ['x', 'y']

      - NAME: random_world_rotation
        WORLD_ROT_ANGLE: [-0.78539816, 0.78539816]

      - NAME: random_world_scaling
        WORLD_SCALE_RANGE: [0.9, 1.1]

      - NAME: random_world_translation
        NOISE_TRANSLATE_STD: [0.5, 0.5, 0.5]
      
      - NAME: imgaug
        ROT_LIM: [-5.4, 5.4]
31
        RAND_FLIP: True
chenshi3's avatar
chenshi3 committed
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

  DATA_PROCESSOR:
    - NAME: mask_points_and_boxes_outside_range
      REMOVE_OUTSIDE_BOXES: True

    - NAME: shuffle_points
      SHUFFLE_ENABLED: {
        'train': True,
        'test': True
      }

    - NAME: transform_points_to_voxels
      VOXEL_SIZE: [0.075, 0.075, 0.2]
      MAX_POINTS_PER_VOXEL: 10
      MAX_NUMBER_OF_VOXELS: {
        'train': 120000,
        'test': 160000
      }

    - NAME: image_calibrate
    
    - NAME: image_normalize
      mean: [0.485, 0.456, 0.406]
      std: [0.229, 0.224, 0.225]


MODEL:
  NAME: BevFusion

  VFE:
    NAME: MeanVFE

  BACKBONE_3D:
    NAME: VoxelResBackBone8x
    USE_BIAS: False

  MAP_TO_BEV:
    NAME: HeightCompression
    NUM_BEV_FEATURES: 256
  
  IMAGE_BACKBONE:
    NAME: SwinTransformer
    EMBED_DIMS: 96
    DEPTHS: [2, 2, 6, 2]
    NUM_HEADS: [3, 6, 12, 24]
    WINDOW_SIZE: 7
    MLP_RATIO: 4
    DROP_RATE: 0.
    ATTN_DROP_RATE: 0.
    DROP_PATH_RATE: 0.2
    PATCH_NORM: True
    OUT_INDICES: [1, 2, 3]
    WITH_CP: False
    CONVERT_WEIGHTS: True
    INIT_CFG:
      type: Pretrained
88
89
      # checkpoint: swint-nuimages-pretrained.pth
      checkpoint: /slurm_data/shichen/package/tomopc_cu111/OpenPCDet/tmp/pretrained/swint-nuimages-pretrained.pth
chenshi3's avatar
chenshi3 committed
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
  
  NECK:
    NAME: GeneralizedLSSFPN
    IN_CHANNELS: [192, 384, 768]
    OUT_CHANNELS: 256
    START_LEVEL: 0
    END_LEVEL: -1
    NUM_OUTS: 3
  
  VTRANSFORM:
    NAME: DepthLSSTransform
    IMAGE_SIZE: [256, 704]
    IN_CHANNEL: 256
    OUT_CHANNEL: 80
    FEATURE_SIZE: [32, 88]
    XBOUND: [-54.0, 54.0, 0.3]
    YBOUND: [-54.0, 54.0, 0.3]
    ZBOUND: [-10.0, 10.0, 20.0]
    DBOUND: [1.0, 60.0, 0.5]
    DOWNSAMPLE: 2
  
  FUSER:
112
    NAME: ConvFuser
chenshi3's avatar
chenshi3 committed
113
114
115
116
117
118
119
120
121
122
    IN_CHANNEL: 336
    OUT_CHANNEL: 256
  
  BACKBONE_2D:
    NAME: BaseBEVBackbone
    LAYER_NUMS: [5, 5]
    LAYER_STRIDES: [1, 2]
    NUM_FILTERS: [128, 256]
    UPSAMPLE_STRIDES: [1, 2]
    NUM_UPSAMPLE_FILTERS: [256, 256]
123
    USE_CONV_FOR_NO_STRIDE: True
chenshi3's avatar
chenshi3 committed
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170


  DENSE_HEAD:
    CLASS_AGNOSTIC: False
    NAME: TransFusionHead

    USE_BIAS_BEFORE_NORM: False

    NUM_PROPOSALS: 200
    HIDDEN_CHANNEL: 128
    NUM_CLASSES: 10
    NUM_HEADS: 8
    NMS_KERNEL_SIZE: 3
    FFN_CHANNEL: 256
    DROPOUT: 0.1
    BN_MOMENTUM: 0.1
    ACTIVATION: relu

    NUM_HM_CONV: 2
    SEPARATE_HEAD_CFG:
      HEAD_ORDER: ['center', 'height', 'dim', 'rot', 'vel']
      HEAD_DICT: {
          'center': {'out_channels': 2, 'num_conv': 2},
          'height': {'out_channels': 1, 'num_conv': 2},
          'dim': {'out_channels': 3, 'num_conv': 2},
          'rot': {'out_channels': 2, 'num_conv': 2},
          'vel': {'out_channels': 2, 'num_conv': 2},
      }
  
    TARGET_ASSIGNER_CONFIG:
      FEATURE_MAP_STRIDE: 8
      DATASET: nuScenes
      GAUSSIAN_OVERLAP: 0.1
      MIN_RADIUS: 2
      HUNGARIAN_ASSIGNER:
        cls_cost: {'gamma': 2.0, 'alpha': 0.25, 'weight': 0.15}
        reg_cost: {'weight': 0.25}
        iou_cost: {'weight': 0.25}
    
    LOSS_CONFIG:
      LOSS_WEIGHTS: {
              'cls_weight': 1.0,
              'bbox_weight': 0.25,
              'hm_weight': 1.0,
              'code_weights': [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2]
          }
      LOSS_CLS:
171
        use_sigmoid: True
chenshi3's avatar
chenshi3 committed
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
        gamma: 2.0
        alpha: 0.25
      
    POST_PROCESSING:
      SCORE_THRESH: 0.0
      POST_CENTER_RANGE: [-61.2, -61.2, -10.0, 61.2, 61.2, 10.0]

  POST_PROCESSING:
    RECALL_THRESH_LIST: [0.3, 0.5, 0.7]
    SCORE_THRESH: 0.1
    OUTPUT_RAW_SCORE: False

    EVAL_METRIC: kitti



OPTIMIZATION:
  BATCH_SIZE_PER_GPU: 3
  NUM_EPOCHS: 6

  OPTIMIZER: adam_cosineanneal
  LR: 0.0001
  WEIGHT_DECAY: 0.01
  MOMENTUM: 0.9
  BETAS: [0.9, 0.999]

  MOMS: [0.9, 0.8052631]
  PCT_START: 0.4
  WARMUP_ITER: 500

  DECAY_STEP_LIST: [35, 45]
  LR_WARMUP: False
  WARMUP_EPOCH: 1

  GRAD_NORM_CLIP: 35

  LOSS_SCALE_FP16: 32