h3dnet.py 10 KB
Newer Older
encore-zhou's avatar
encore-zhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
primitive_z_cfg = dict(
    type='PrimitiveHead',
    num_dims=2,
    num_classes=18,
    primitive_mode='z',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_moudule_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_cls_loss=dict(
        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
    train_cfg=dict(
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2))

primitive_xy_cfg = dict(
    type='PrimitiveHead',
    num_dims=1,
    num_classes=18,
    primitive_mode='xy',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_moudule_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=0.5,
        loss_dst_weight=0.5),
    semantic_cls_loss=dict(
        type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
    train_cfg=dict(
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2))

primitive_line_cfg = dict(
    type='PrimitiveHead',
    num_dims=0,
    num_classes=18,
    primitive_mode='line',
    upper_thresh=100.0,
    surface_thresh=0.5,
    vote_moudule_cfg=dict(
        in_channels=256,
        vote_per_seed=1,
        gt_per_seed=1,
        conv_channels=(256, 256),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        norm_feats=True,
        vote_loss=dict(
            type='ChamferDistance',
            mode='l1',
            reduction='none',
            loss_dst_weight=10.0)),
    vote_aggregation_cfg=dict(
        num_point=1024,
        radius=0.3,
        num_sample=16,
        mlp_channels=[256, 128, 128, 128],
        use_xyz=True,
        normalize_xyz=True),
    feat_channels=(128, 128),
    conv_cfg=dict(type='Conv1d'),
    norm_cfg=dict(type='BN1d'),
    objectness_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.4, 0.6],
        reduction='mean',
        loss_weight=30.0),
    center_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=1.0,
        loss_dst_weight=1.0),
    semantic_reg_loss=dict(
        type='ChamferDistance',
        mode='l1',
        reduction='sum',
        loss_src_weight=1.0,
        loss_dst_weight=1.0),
    semantic_cls_loss=dict(
        type='CrossEntropyLoss', reduction='sum', loss_weight=2.0),
    train_cfg=dict(
        dist_thresh=0.2,
        var_thresh=1e-2,
        lower_thresh=1e-6,
        num_point=100,
        num_point_line=10,
        line_thresh=0.2))

proposal_module_cfg = dict(
    suface_matching_cfg=dict(
        num_point=256 * 6,
        radius=0.5,
        num_sample=32,
        mlp_channels=[128 + 6, 128, 64, 32],
        use_xyz=True,
        normalize_xyz=True),
    line_matching_cfg=dict(
        num_point=256 * 12,
        radius=0.5,
        num_sample=32,
        mlp_channels=[128 + 12, 128, 64, 32],
        use_xyz=True,
        normalize_xyz=True),
    primitive_refine_channels=[128, 128, 128],
    upper_thresh=100.0,
    surface_thresh=0.5,
    line_thresh=0.5,
    train_cfg=dict(
        far_threshold=0.6,
        near_threshold=0.3,
        mask_surface_threshold=0.3,
        label_surface_threshold=0.3,
        mask_line_threshold=0.3,
        label_line_threshold=0.3),
    cues_objectness_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.3, 0.7],
        reduction='mean',
        loss_weight=5.0),
    cues_semantic_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.3, 0.7],
        reduction='mean',
        loss_weight=5.0),
    proposal_objectness_loss=dict(
        type='CrossEntropyLoss',
        class_weight=[0.2, 0.8],
        reduction='none',
        loss_weight=5.0),
    primitive_center_loss=dict(
        type='MSELoss', reduction='none', loss_weight=1.0))

model = dict(
    type='H3DNet',
    backbone=dict(
        type='MultiBackbone',
        num_streams=4,
        suffixes=['net0', 'net1', 'net2', 'net3'],
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d', eps=1e-5, momentum=0.01),
        act_cfg=dict(type='ReLU'),
        backbones=dict(
            type='PointNet2SASSG',
            in_channels=4,
            num_points=(2048, 1024, 512, 256),
            radius=(0.2, 0.4, 0.8, 1.2),
            num_samples=(64, 32, 16, 16),
            sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
                         (128, 128, 256)),
            fp_channels=((256, 256), (256, 256)),
            norm_cfg=dict(type='BN2d'),
            pool_mod='max')),
    rpn_head=dict(
        type='VoteHead',
        vote_moudule_cfg=dict(
            in_channels=256,
            vote_per_seed=1,
            gt_per_seed=3,
            conv_channels=(256, 256),
            conv_cfg=dict(type='Conv1d'),
            norm_cfg=dict(type='BN1d'),
            norm_feats=True,
            vote_loss=dict(
                type='ChamferDistance',
                mode='l1',
                reduction='none',
                loss_dst_weight=10.0)),
        vote_aggregation_cfg=dict(
            num_point=256,
            radius=0.3,
            num_sample=16,
            mlp_channels=[256, 128, 128, 128],
            use_xyz=True,
            normalize_xyz=True),
        feat_channels=(128, 128),
        conv_cfg=dict(type='Conv1d'),
        norm_cfg=dict(type='BN1d'),
        objectness_loss=dict(
            type='CrossEntropyLoss',
            class_weight=[0.2, 0.8],
            reduction='sum',
            loss_weight=5.0),
        center_loss=dict(
            type='ChamferDistance',
            mode='l2',
            reduction='sum',
            loss_src_weight=10.0,
            loss_dst_weight=10.0),
        dir_class_loss=dict(
            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
        dir_res_loss=dict(
            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
        size_class_loss=dict(
            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
        size_res_loss=dict(
            type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
        semantic_loss=dict(
            type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
    roi_head=dict(
        type='H3DRoIHead',
        primitive_list=[primitive_z_cfg, primitive_xy_cfg, primitive_line_cfg],
        bbox_head=dict(
            type='H3DBboxHead',
            gt_per_seed=3,
            num_proposal=256,
            proposal_module_cfg=proposal_module_cfg,
            feat_channels=(128, 128),
            conv_cfg=dict(type='Conv1d'),
            norm_cfg=dict(type='BN1d'),
            objectness_loss=dict(
                type='CrossEntropyLoss',
                class_weight=[0.2, 0.8],
                reduction='sum',
                loss_weight=5.0),
            center_loss=dict(
                type='ChamferDistance',
                mode='l2',
                reduction='sum',
                loss_src_weight=10.0,
                loss_dst_weight=10.0),
            dir_class_loss=dict(
                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
            dir_res_loss=dict(
                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
            size_class_loss=dict(
                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1),
            size_res_loss=dict(
                type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
            semantic_loss=dict(
                type='CrossEntropyLoss', reduction='sum', loss_weight=0.1))))

# model training and testing settings
train_cfg = dict(
    rpn=dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
    rpn_proposal=dict(use_nms=False),
    rcnn=dict(pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'))

test_cfg = dict(
    rpn=dict(
        sample_mod='seed',
        nms_thr=0.25,
        score_thr=0.05,
        per_class_proposal=True,
        use_nms=False),
    rcnn=dict(
        sample_mod='seed',
        nms_thr=0.25,
        score_thr=0.05,
        per_class_proposal=True))