ssn_hv_secfpn_sbn-all_16xb2-2x_lyft-3d.py 9.19 KB
Newer Older
twang's avatar
twang committed
1
_base_ = [
2
    '../_base_/models/pointpillars_hv_fpn_lyft.py',
twang's avatar
twang committed
3
    '../_base_/datasets/lyft-3d.py',
4
    '../_base_/schedules/schedule-2x.py',
twang's avatar
twang committed
5
6
7
8
9
10
11
12
13
    '../_base_/default_runtime.py',
]
point_cloud_range = [-100, -100, -5, 100, 100, 3]
# Note that the order of class names should be consistent with
# the following anchors' order
class_names = [
    'bicycle', 'motorcycle', 'pedestrian', 'animal', 'car',
    'emergency_vehicle', 'bus', 'other_vehicle', 'truck'
]
14
backend_args = None
twang's avatar
twang committed
15
16

train_pipeline = [
17
18
19
20
21
22
23
24
25
26
    dict(
        type='LoadPointsFromFile',
        coord_type='LIDAR',
        load_dim=5,
        use_dim=5,
        backend_args=backend_args),
    dict(
        type='LoadPointsFromMultiSweeps',
        sweeps_num=10,
        backend_args=backend_args),
twang's avatar
twang committed
27
28
29
30
31
32
33
34
35
36
37
38
39
40
    dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
    dict(
        type='GlobalRotScaleTrans',
        rot_range=[-0.3925, 0.3925],
        scale_ratio_range=[0.95, 1.05],
        translation_std=[0, 0, 0]),
    dict(
        type='RandomFlip3D',
        sync_2d=False,
        flip_ratio_bev_horizontal=0.5,
        flip_ratio_bev_vertical=0.5),
    dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
    dict(type='PointShuffle'),
41
42
43
    dict(
        type='Pack3DDetInputs',
        keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
twang's avatar
twang committed
44
45
]
test_pipeline = [
46
47
48
49
50
51
52
53
54
55
    dict(
        type='LoadPointsFromFile',
        coord_type='LIDAR',
        load_dim=5,
        use_dim=5,
        backend_args=backend_args),
    dict(
        type='LoadPointsFromMultiSweeps',
        sweeps_num=10,
        backend_args=backend_args),
twang's avatar
twang committed
56
57
58
59
60
61
62
63
64
65
66
67
68
    dict(
        type='MultiScaleFlipAug3D',
        img_scale=(1333, 800),
        pts_scale_ratio=1,
        flip=False,
        transforms=[
            dict(
                type='GlobalRotScaleTrans',
                rot_range=[0, 0],
                scale_ratio_range=[1., 1.],
                translation_std=[0, 0, 0]),
            dict(type='RandomFlip3D'),
            dict(
69
70
71
                type='PointsRangeFilter', point_cloud_range=point_cloud_range)
        ]),
    dict(type='Pack3DDetInputs', keys=['points'])
twang's avatar
twang committed
72
]
73
74
75
76
train_dataloader = dict(
    batch_size=2, num_workers=4, dataset=dict(pipeline=train_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
twang's avatar
twang committed
77
78
79

# model settings
model = dict(
80
81
    data_preprocessor=dict(
        voxel_layer=dict(point_cloud_range=[-100, -100, -5, 100, 100, 3])),
twang's avatar
twang committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
    pts_voxel_encoder=dict(
        feat_channels=[32, 64],
        point_cloud_range=[-100, -100, -5, 100, 100, 3]),
    pts_middle_encoder=dict(output_shape=[800, 800]),
    pts_neck=dict(
        _delete_=True,
        type='SECONDFPN',
        norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01),
        in_channels=[64, 128, 256],
        upsample_strides=[1, 2, 4],
        out_channels=[128, 128, 128]),
    pts_bbox_head=dict(
        _delete_=True,
        type='ShapeAwareHead',
        num_classes=9,
        in_channels=384,
        feat_channels=384,
        use_direction_classifier=True,
        anchor_generator=dict(
            type='AlignedAnchor3DRangeGeneratorPerCls',
            ranges=[[-100, -100, -1.0709302, 100, 100, -1.0709302],
                    [-100, -100, -1.3220503, 100, 100, -1.3220503],
                    [-100, -100, -0.9122268, 100, 100, -0.9122268],
                    [-100, -100, -1.8012227, 100, 100, -1.8012227],
                    [-100, -100, -1.0715024, 100, 100, -1.0715024],
                    [-100, -100, -0.8871424, 100, 100, -0.8871424],
                    [-100, -100, -0.3519405, 100, 100, -0.3519405],
                    [-100, -100, -0.6276341, 100, 100, -0.6276341],
                    [-100, -100, -0.3033737, 100, 100, -0.3033737]],
            sizes=[
112
113
114
115
116
117
118
119
120
                [1.76, 0.63, 1.44],  # bicycle
                [2.35, 0.96, 1.59],  # motorcycle
                [0.80, 0.76, 1.76],  # pedestrian
                [0.73, 0.35, 0.50],  # animal
                [4.75, 1.92, 1.71],  # car
                [6.52, 2.42, 2.34],  # emergency vehicle
                [12.70, 2.92, 3.42],  # bus
                [8.17, 2.75, 3.20],  # other vehicle
                [10.24, 2.84, 3.44]  # truck
twang's avatar
twang committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
            ],
            custom_values=[],
            rotations=[0, 1.57],
            reshape_out=False),
        tasks=[
            dict(
                num_class=2,
                class_names=['bicycle', 'motorcycle'],
                shared_conv_channels=(64, 64),
                shared_conv_strides=(1, 1),
                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
            dict(
                num_class=2,
                class_names=['pedestrian', 'animal'],
                shared_conv_channels=(64, 64),
                shared_conv_strides=(1, 1),
                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
            dict(
                num_class=2,
                class_names=['car', 'emergency_vehicle'],
                shared_conv_channels=(64, 64, 64),
                shared_conv_strides=(2, 1, 1),
                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01)),
            dict(
                num_class=3,
                class_names=['bus', 'other_vehicle', 'truck'],
                shared_conv_channels=(64, 64, 64),
                shared_conv_strides=(2, 1, 1),
                norm_cfg=dict(type='naiveSyncBN2d', eps=1e-3, momentum=0.01))
        ],
        assign_per_class=True,
        diff_rad_by_sin=True,
153
        dir_offset=-0.7854,  # -pi/4
twang's avatar
twang committed
154
155
156
        dir_limit_offset=0,
        bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=7),
        loss_cls=dict(
157
            type='mmdet.FocalLoss',
twang's avatar
twang committed
158
159
160
161
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
162
163
        loss_bbox=dict(
            type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
twang's avatar
twang committed
164
        loss_dir=dict(
165
166
            type='mmdet.CrossEntropyLoss', use_sigmoid=False,
            loss_weight=0.2)),
167
168
169
170
171
172
    # model training and testing settings
    train_cfg=dict(
        _delete_=True,
        pts=dict(
            assigner=[
                dict(  # bicycle
173
                    type='Max3DIoUAssigner',
174
175
176
177
178
179
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # motorcycle
180
                    type='Max3DIoUAssigner',
181
182
183
184
185
186
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # pedestrian
187
                    type='Max3DIoUAssigner',
188
189
190
191
192
193
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # animal
194
                    type='Max3DIoUAssigner',
195
196
197
198
199
200
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # car
201
                    type='Max3DIoUAssigner',
202
203
204
205
206
207
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.6,
                    neg_iou_thr=0.45,
                    min_pos_iou=0.45,
                    ignore_iof_thr=-1),
                dict(  # emergency vehicle
208
                    type='Max3DIoUAssigner',
209
210
211
212
213
214
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # bus
215
                    type='Max3DIoUAssigner',
216
217
218
219
220
221
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.6,
                    neg_iou_thr=0.45,
                    min_pos_iou=0.45,
                    ignore_iof_thr=-1),
                dict(  # other vehicle
222
                    type='Max3DIoUAssigner',
223
224
225
226
227
228
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.55,
                    neg_iou_thr=0.4,
                    min_pos_iou=0.4,
                    ignore_iof_thr=-1),
                dict(  # truck
229
                    type='Max3DIoUAssigner',
230
231
232
233
234
235
236
237
238
239
                    iou_calculator=dict(type='BboxOverlapsNearest3D'),
                    pos_iou_thr=0.6,
                    neg_iou_thr=0.45,
                    min_pos_iou=0.45,
                    ignore_iof_thr=-1)
            ],
            allowed_border=0,
            code_weight=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
            pos_weight=-1,
            debug=False)))
240
241
242
243
244
# Default setting for scaling LR automatically
#   - `enable` means enable scaling LR automatically
#       or not by default.
#   - `base_batch_size` = (16 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)