groupfree3d.py 2.67 KB
Newer Older
hjin2902's avatar
hjin2902 committed
1
2
model = dict(
    type='GroupFree3DNet',
jshilong's avatar
jshilong committed
3
    data_preprocessor=dict(type='Det3DDataPreprocessor'),
hjin2902's avatar
hjin2902 committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
    backbone=dict(
        type='PointNet2SASSG',
        in_channels=3,
        num_points=(2048, 1024, 512, 256),
        radius=(0.2, 0.4, 0.8, 1.2),
        num_samples=(64, 32, 16, 16),
        sa_channels=((64, 64, 128), (128, 128, 256), (128, 128, 256),
                     (128, 128, 256)),
        fp_channels=((256, 256), (256, 288)),
        norm_cfg=dict(type='BN2d'),
        sa_cfg=dict(
            type='PointSAModule',
            pool_mod='max',
            use_xyz=True,
            normalize_xyz=True)),
    bbox_head=dict(
        type='GroupFree3DHead',
        in_channels=288,
        num_decoder_layers=6,
        num_proposal=256,
        transformerlayers=dict(
            type='BaseTransformerLayer',
            attn_cfgs=dict(
                type='GroupFree3DMHA',
                embed_dims=288,
                num_heads=8,
                attn_drop=0.1,
                dropout_layer=dict(type='Dropout', drop_prob=0.1)),
            ffn_cfgs=dict(
                embed_dims=288,
                feedforward_channels=2048,
                ffn_drop=0.1,
                act_cfg=dict(type='ReLU', inplace=True)),
            operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn',
                             'norm')),
        pred_layer_cfg=dict(
            in_channels=288, shared_conv_channels=(288, 288), bias=True),
        sampling_objectness_loss=dict(
jshilong's avatar
jshilong committed
42
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
43
44
45
46
47
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=8.0),
        objectness_loss=dict(
jshilong's avatar
jshilong committed
48
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
49
50
51
52
53
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        center_loss=dict(
jshilong's avatar
jshilong committed
54
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
55
        dir_class_loss=dict(
jshilong's avatar
jshilong committed
56
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
57
        dir_res_loss=dict(
jshilong's avatar
jshilong committed
58
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
59
        size_class_loss=dict(
jshilong's avatar
jshilong committed
60
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
61
        size_res_loss=dict(
jshilong's avatar
jshilong committed
62
63
64
65
            type='mmdet.SmoothL1Loss',
            beta=1.0,
            reduction='sum',
            loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
66
        semantic_loss=dict(
jshilong's avatar
jshilong committed
67
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
hjin2902's avatar
hjin2902 committed
68
    # model training and testing settings
jshilong's avatar
jshilong committed
69
    train_cfg=dict(sample_mode='kps'),
hjin2902's avatar
hjin2902 committed
70
    test_cfg=dict(
jshilong's avatar
jshilong committed
71
        sample_mode='kps',
hjin2902's avatar
hjin2902 committed
72
73
74
75
        nms_thr=0.25,
        score_thr=0.0,
        per_class_proposal=True,
        prediction_stages='last'))