"configs/foveabox/fovea_r50_fpn_4gpu_1x.py" did not exist on "53be832965394768569f5bd450502358fe8a9c7d"
groupfree3d_head-L12-O256_4xb8_scannet-seg.py 7.19 KB
Newer Older
hjin2902's avatar
hjin2902 committed
1
_base_ = [
2
3
    '../_base_/datasets/scannet-3d.py', '../_base_/models/groupfree3d.py',
    '../_base_/schedules/schedule-3x.py', '../_base_/default_runtime.py'
hjin2902's avatar
hjin2902 committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
]

# model settings
model = dict(
    bbox_head=dict(
        num_classes=18,
        num_decoder_layers=12,
        size_cls_agnostic=False,
        bbox_coder=dict(
            type='GroupFree3DBBoxCoder',
            num_sizes=18,
            num_dir_bins=1,
            with_rot=False,
            size_cls_agnostic=False,
            mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
                        [1.876858, 1.8425595, 1.1931566],
                        [0.61328, 0.6148609, 0.7182701],
                        [1.3955007, 1.5121545, 0.83443564],
                        [0.97949594, 1.0675149, 0.6329687],
                        [0.531663, 0.5955577, 1.7500148],
                        [0.9624706, 0.72462326, 1.1481868],
                        [0.83221924, 1.0490936, 1.6875663],
                        [0.21132214, 0.4206159, 0.5372846],
                        [1.4440073, 1.8970833, 0.26985747],
                        [1.0294262, 1.4040797, 0.87554324],
                        [1.3766412, 0.65521795, 1.6813129],
                        [0.6650819, 0.71111923, 1.298853],
                        [0.41999173, 0.37906948, 1.7513971],
                        [0.59359556, 0.5912492, 0.73919016],
                        [0.50867593, 0.50656086, 0.30136237],
                        [1.1511526, 1.0546296, 0.49706793],
                        [0.47535285, 0.49249494, 0.5802117]]),
        sampling_objectness_loss=dict(
jshilong's avatar
jshilong committed
37
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
38
39
40
41
42
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=8.0),
        objectness_loss=dict(
jshilong's avatar
jshilong committed
43
            type='mmdet.FocalLoss',
hjin2902's avatar
hjin2902 committed
44
45
46
47
48
            use_sigmoid=True,
            gamma=2.0,
            alpha=0.25,
            loss_weight=1.0),
        center_loss=dict(
jshilong's avatar
jshilong committed
49
50
51
52
            type='mmdet.SmoothL1Loss',
            beta=0.04,
            reduction='sum',
            loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
53
        dir_class_loss=dict(
jshilong's avatar
jshilong committed
54
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
55
        dir_res_loss=dict(
jshilong's avatar
jshilong committed
56
            type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
hjin2902's avatar
hjin2902 committed
57
        size_class_loss=dict(
jshilong's avatar
jshilong committed
58
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
hjin2902's avatar
hjin2902 committed
59
        size_res_loss=dict(
jshilong's avatar
jshilong committed
60
            type='mmdet.SmoothL1Loss',
hjin2902's avatar
hjin2902 committed
61
62
63
64
            beta=1.0 / 9.0,
            reduction='sum',
            loss_weight=10.0 / 9.0),
        semantic_loss=dict(
jshilong's avatar
jshilong committed
65
            type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
hjin2902's avatar
hjin2902 committed
66
    test_cfg=dict(
jshilong's avatar
jshilong committed
67
        sample_mode='kps',
hjin2902's avatar
hjin2902 committed
68
69
70
71
72
73
74
75
76
77
78
79
        nms_thr=0.25,
        score_thr=0.0,
        per_class_proposal=True,
        prediction_stages='last_three'))

# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
               'bookshelf', 'picture', 'counter', 'desk', 'curtain',
               'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
               'garbagebin')
jshilong's avatar
jshilong committed
80
81
82

metainfo = dict(CLASSES=class_names)

hjin2902's avatar
hjin2902 committed
83
84
85
86
87
88
89
90
91
92
93
94
95
train_pipeline = [
    dict(
        type='LoadPointsFromFile',
        coord_type='DEPTH',
        load_dim=6,
        use_dim=[0, 1, 2]),
    dict(
        type='LoadAnnotations3D',
        with_bbox_3d=True,
        with_label_3d=True,
        with_mask_3d=True,
        with_seg_3d=True),
    dict(type='GlobalAlignment', rotation_axis=2),
96
    dict(type='PointSegClassMapping'),
97
    dict(type='PointSample', num_points=50000),
hjin2902's avatar
hjin2902 committed
98
99
100
101
102
103
104
105
106
107
    dict(
        type='RandomFlip3D',
        sync_2d=False,
        flip_ratio_bev_horizontal=0.5,
        flip_ratio_bev_vertical=0.5),
    dict(
        type='GlobalRotScaleTrans',
        rot_range=[-0.087266, 0.087266],
        scale_ratio_range=[1.0, 1.0]),
    dict(
jshilong's avatar
jshilong committed
108
        type='Pack3DDetInputs',
hjin2902's avatar
hjin2902 committed
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
        keys=[
            'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
            'pts_instance_mask'
        ])
]
test_pipeline = [
    dict(
        type='LoadPointsFromFile',
        coord_type='DEPTH',
        load_dim=6,
        use_dim=[0, 1, 2]),
    dict(type='GlobalAlignment', rotation_axis=2),
    dict(
        type='MultiScaleFlipAug3D',
        img_scale=(1333, 800),
        pts_scale_ratio=1,
        flip=False,
        transforms=[
            dict(
                type='GlobalRotScaleTrans',
                rot_range=[0, 0],
                scale_ratio_range=[1., 1.],
                translation_std=[0, 0, 0]),
            dict(
                type='RandomFlip3D',
                sync_2d=False,
                flip_ratio_bev_horizontal=0.5,
                flip_ratio_bev_vertical=0.5),
137
            dict(type='PointSample', num_points=50000),
jshilong's avatar
jshilong committed
138
139
        ]),
    dict(type='Pack3DDetInputs', keys=['points'])
hjin2902's avatar
hjin2902 committed
140
141
]

jshilong's avatar
jshilong committed
142
143
144
145
146
train_dataloader = dict(
    batch_size=8,
    num_workers=4,
    sampler=dict(type='DefaultSampler', shuffle=True),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
147
        type='RepeatDataset',
148
        times=5,
hjin2902's avatar
hjin2902 committed
149
150
151
        dataset=dict(
            type=dataset_type,
            data_root=data_root,
jshilong's avatar
jshilong committed
152
            ann_file='scannet_infos_train.pkl',
hjin2902's avatar
hjin2902 committed
153
154
            pipeline=train_pipeline,
            filter_empty_gt=False,
jshilong's avatar
jshilong committed
155
            metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
156
157
            # we use box_type_3d='LiDAR' in kitti and nuscenes dataset
            # and box_type_3d='Depth' in sunrgbd and scannet dataset.
jshilong's avatar
jshilong committed
158
159
160
161
162
163
            box_type_3d='Depth')))
val_dataloader = dict(
    batch_size=1,
    num_workers=1,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
164
165
        type=dataset_type,
        data_root=data_root,
jshilong's avatar
jshilong committed
166
        ann_file='scannet_infos_val.pkl',
hjin2902's avatar
hjin2902 committed
167
        pipeline=test_pipeline,
jshilong's avatar
jshilong committed
168
        metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
169
        test_mode=True,
jshilong's avatar
jshilong committed
170
171
172
173
174
175
        box_type_3d='Depth'))
test_dataloader = dict(
    batch_size=1,
    num_workers=1,
    sampler=dict(type='DefaultSampler', shuffle=False),
    dataset=dict(
hjin2902's avatar
hjin2902 committed
176
177
        type=dataset_type,
        data_root=data_root,
jshilong's avatar
jshilong committed
178
        ann_file='scannet_infos_val.pkl',
hjin2902's avatar
hjin2902 committed
179
        pipeline=test_pipeline,
jshilong's avatar
jshilong committed
180
        metainfo=metainfo,
hjin2902's avatar
hjin2902 committed
181
182
        test_mode=True,
        box_type_3d='Depth'))
jshilong's avatar
jshilong committed
183
184
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
hjin2902's avatar
hjin2902 committed
185
186
187

# optimizer
lr = 0.006
jshilong's avatar
jshilong committed
188
189
190
191
optim_wrapper = dict(
    type='OptimWrapper',
    optimizer=dict(type='AdamW', lr=lr, weight_decay=0.0005),
    clip_grad=dict(max_norm=0.1, norm_type=2),
hjin2902's avatar
hjin2902 committed
192
193
194
195
196
197
198
199
200
201
202
    paramwise_cfg=dict(
        custom_keys={
            'bbox_head.decoder_layers': dict(lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_self_posembeds': dict(
                lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_cross_posembeds': dict(
                lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_query_proj': dict(lr_mult=0.1, decay_mult=1.0),
            'bbox_head.decoder_key_proj': dict(lr_mult=0.1, decay_mult=1.0)
        }))

jshilong's avatar
jshilong committed
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# learning rate
param_scheduler = [
    dict(
        type='MultiStepLR',
        begin=0,
        end=80,
        by_epoch=True,
        milestones=[56, 68],
        gamma=0.1)
]

# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=80, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
hjin2902's avatar
hjin2902 committed
218

jshilong's avatar
jshilong committed
219
220
default_hooks = dict(
    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=10))