groupfree3d_8x4_scannet-3d-18class-L6-O256.py 7.29 KB
Newer Older
hjin2902's avatar
hjin2902 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
_base_ = [
    '../_base_/datasets/scannet-3d-18class.py',
    '../_base_/models/groupfree3d.py', '../_base_/schedules/schedule_3x.py',
    '../_base_/default_runtime.py'
]

# model settings
model = dict(
    bbox_head=dict(
        num_classes=18,
        size_cls_agnostic=False,
        bbox_coder=dict(
            type='GroupFree3DBBoxCoder',
            num_sizes=18,
            num_dir_bins=1,
            with_rot=False,
            size_cls_agnostic=False,
            mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
                        [1.876858, 1.8425595, 1.1931566],
                        [0.61328, 0.6148609, 0.7182701],
                        [1.3955007, 1.5121545, 0.83443564],
                        [0.97949594, 1.0675149, 0.6329687],
                        [0.531663, 0.5955577, 1.7500148],
                        [0.9624706, 0.72462326, 1.1481868],
                        [0.83221924, 1.0490936, 1.6875663],
                        [0.21132214, 0.4206159, 0.5372846],
                        [1.4440073, 1.8970833, 0.26985747],
                        [1.0294262, 1.4040797, 0.87554324],
                        [1.3766412, 0.65521795, 1.6813129],
                        [0.6650819, 0.71111923, 1.298853],
                        [0.41999173, 0.37906948, 1.7513971],
                        [0.59359556, 0.5912492, 0.73919016],
                        [0.50867593, 0.50656086, 0.30136237],
                        [1.1511526, 1.0546296, 0.49706793],
                        [0.47535285, 0.49249494, 0.5802117]]),
        sampling_objectness_loss=dict(
jshilong's avatar
jshilong committed
37
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
38
39
40
41
42
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=8.0),
        objectness_loss=dict(
jshilong's avatar
jshilong committed
43
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
44
45
46
47
48
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        center_loss=dict(
jshilong's avatar
jshilong committed
49
50
51
52
            type='mmdet.SmoothL1Loss',
            beta=0.04,
            reduction='sum',
            loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
53
        dir_class_loss=dict(
jshilong's avatar
jshilong committed
54
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
55
        dir_res_loss=dict(
jshilong's avatar
jshilong committed
56
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
57
        size_class_loss=dict(
jshilong's avatar
jshilong committed
58
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
59
        size_res_loss=dict(
jshilong's avatar
jshilong committed
60
            type='mmdet.SmoothL1Loss',
hjin2902's avatar
hjin2902 committed
61
62
63
64
            beta=1.0 / 9.0,
            reduction='sum',
            loss_weight=10.0 / 9.0),
        semantic_loss=dict(
jshilong's avatar
jshilong committed
65
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
hjin2902's avatar
hjin2902 committed
66
    test_cfg=dict(
jshilong's avatar
jshilong committed
67
        sample_mode='kps',
hjin2902's avatar
hjin2902 committed
68
69
70
71
72
73
74
75
76
77
78
79
        nms_thr=0.25,
        score_thr=0.0,
        per_class_proposal=True,
        prediction_stages='last_three'))

# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
               'bookshelf', 'picture', 'counter', 'desk', 'curtain',
               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
               'garbagebin')
jshilong's avatar
jshilong committed
80
81
82

metainfo = dict(CLASSES=class_names)

hjin2902's avatar
hjin2902 committed
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
train_pipeline = [
    dict(
        type='LoadPointsFromFile',
        coord_type='DEPTH',
        load_dim=6,
        use_dim=[0, 1, 2]),
    dict(
        type='LoadAnnotations3D',
        with_bbox_3d=True,
        with_label_3d=True,
        with_mask_3d=True,
        with_seg_3d=True),
    dict(type='GlobalAlignment', rotation_axis=2),
    dict(
        type='PointSegClassMapping',
        valid_cat_ids=(3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34,
                       36, 39)),
100
    dict(type='PointSample', num_points=50000),
hjin2902's avatar
hjin2902 committed
101
102
103
104
105
106
107
108
109
110
    dict(
        type='RandomFlip3D',
        sync_2d=False,
        flip_ratio_bev_horizontal=0.5,
        flip_ratio_bev_vertical=0.5),
    dict(
        type='GlobalRotScaleTrans',
        rot_range=[-0.087266, 0.087266],
        scale_ratio_range=[1.0, 1.0]),
    dict(
jshilong's avatar
jshilong committed
111
        type='Pack3DDetInputs',
hjin2902's avatar
hjin2902 committed
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
        keys=[
            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
            'pts_instance_mask'
        ])
]
test_pipeline = [
    dict(
        type='LoadPointsFromFile',
        coord_type='DEPTH',
        load_dim=6,
        use_dim=[0, 1, 2]),
    dict(type='GlobalAlignment', rotation_axis=2),
    dict(
        type='MultiScaleFlipAug3D',
        img_scale=(1333, 800),
        pts_scale_ratio=1,
        flip=False,
        transforms=[
            dict(
                type='GlobalRotScaleTrans',
                rot_range=[0, 0],
                scale_ratio_range=[1., 1.],
                translation_std=[0, 0, 0]),
            dict(
                type='RandomFlip3D',
                sync_2d=False,
                flip_ratio_bev_horizontal=0.5,
                flip_ratio_bev_vertical=0.5),
140
            dict(type='PointSample', num_points=50000),
jshilong's avatar
jshilong committed
141
142
        ]),
    dict(type='Pack3DDetInputs', keys=['points'])
hjin2902's avatar
hjin2902 committed
143
144
]

jshilong's avatar
jshilong committed
145
146
147
148
149
train_dataloader = dict(
    batch_size=8,
    num_workers=4,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
150
        type='RepeatDataset',
151
        times=5,
hjin2902's avatar
hjin2902 committed
152
153
154
        dataset=dict(
            type=dataset_type,
            data_root=data_root,
jshilong's avatar
jshilong committed
155
            ann_file='scannet_infos_train.pkl',
hjin2902's avatar
hjin2902 committed
156
157
            pipeline=train_pipeline,
            filter_empty_gt=False,
jshilong's avatar
jshilong committed
158
            metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
159
160
            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
jshilong's avatar
jshilong committed
161
162
163
164
165
166
            box_type_3d='Depth')))
val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
167
168
        type=dataset_type,
        data_root=data_root,
jshilong's avatar
jshilong committed
169
        ann_file='scannet_infos_val.pkl',
hjin2902's avatar
hjin2902 committed
170
        pipeline=test_pipeline,
jshilong's avatar
jshilong committed
171
        metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
172
        test_mode=True,
jshilong's avatar
jshilong committed
173
174
175
176
177
178
        box_type_3d='Depth'))
test_dataloader = dict(
    batch_size=1,
    num_workers=1,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
179
180
        type=dataset_type,
        data_root=data_root,
jshilong's avatar
jshilong committed
181
        ann_file='scannet_infos_val.pkl',
hjin2902's avatar
hjin2902 committed
182
        pipeline=test_pipeline,
jshilong's avatar
jshilong committed
183
        metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
184
185
        test_mode=True,
        box_type_3d='Depth'))
jshilong's avatar
jshilong committed
186
187
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
hjin2902's avatar
hjin2902 committed
188
189
190

# optimizer
lr = 0.006
jshilong's avatar
jshilong committed
191
192
193
194
optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
    clip_grad=dict(max_norm=0.1, norm_type=2),
hjin2902's avatar
hjin2902 committed
195
196
197
198
199
200
201
202
203
204
205
    paramwise_cfg=dict(
        custom_keys={
            'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_self_posembeds': dict(
                lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_cross_posembeds': dict(
                lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_query_proj': dict(lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
        }))

jshilong's avatar
jshilong committed
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# learning rate
param_scheduler = [
    dict(
        type='MultiStepLR',
        begin=0,
        end=80,
        by_epoch=True,
        milestones=[56, 68],
        gamma=0.1)
]

# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
hjin2902's avatar
hjin2902 committed
221

jshilong's avatar
jshilong committed
222
223
default_hooks = dict(
    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))