fast_mask_rcnn_r101_fpn_1x.py 4.79 KB
Newer Older
Kai Chen's avatar
Kai Chen committed
1
2
3
# model settings
model = dict(
    type='FastRCNN',
4
    pretrained='torchvision://resnet101',
Kai Chen's avatar
Kai Chen committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    backbone=dict(
        type='ResNet',
        depth=101,
        num_stages=4,
        out_indices=(0, 1, 2, 3),
        frozen_stages=1,
        style='pytorch'),
    neck=dict(
        type='FPN',
        in_channels=[256, 512, 1024, 2048],
        out_channels=256,
        num_outs=5),
    bbox_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    bbox_head=dict(
        type='SharedFCBBoxHead',
        num_fcs=2,
        in_channels=256,
        fc_out_channels=1024,
        roi_feat_size=7,
        num_classes=81,
        target_means=[0., 0., 0., 0.],
        target_stds=[0.1, 0.1, 0.2, 0.2],
Jiangmiao Pang's avatar
Jiangmiao Pang committed
31
32
33
34
        reg_class_agnostic=False,
        loss_cls=dict(
            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),
Kai Chen's avatar
Kai Chen committed
35
36
37
38
39
40
41
42
43
44
    mask_roi_extractor=dict(
        type='SingleRoIExtractor',
        roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
        out_channels=256,
        featmap_strides=[4, 8, 16, 32]),
    mask_head=dict(
        type='FCNMaskHead',
        num_convs=4,
        in_channels=256,
        conv_out_channels=256,
Jiangmiao Pang's avatar
Jiangmiao Pang committed
45
46
47
        num_classes=81,
        loss_mask=dict(
            type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)))
Kai Chen's avatar
Kai Chen committed
48
49
50
51
# model training and testing settings
train_cfg = dict(
    rcnn=dict(
        assigner=dict(
52
            type='MaxIoUAssigner',
Kai Chen's avatar
Kai Chen committed
53
54
55
56
57
            pos_iou_thr=0.5,
            neg_iou_thr=0.5,
            min_pos_iou=0.5,
            ignore_iof_thr=-1),
        sampler=dict(
58
            type='RandomSampler',
Kai Chen's avatar
Kai Chen committed
59
60
61
            num=512,
            pos_fraction=0.25,
            neg_pos_ub=-1,
62
            add_gt_as_proposals=True),
Kai Chen's avatar
Kai Chen committed
63
64
65
66
67
68
69
70
71
72
73
74
75
76
        mask_size=28,
        pos_weight=-1,
        debug=False))
test_cfg = dict(
    rcnn=dict(
        score_thr=0.05,
        nms=dict(type='nms', iou_thr=0.5),
        max_per_img=100,
        mask_thr_binary=0.5))
# dataset settings
dataset_type = 'CocoDataset'
data_root = 'data/coco/'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadProposals', num_max_proposals=2000),
    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
    dict(type='RandomFlip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='Pad', size_divisor=32),
    dict(type='DefaultFormatBundle'),
    dict(
        type='Collect',
        keys=['img', 'proposals', 'gt_bboxes', 'gt_labels', 'gt_masks']),
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadProposals', num_max_proposals=None),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(1333, 800),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(type='Normalize', **img_norm_cfg),
            dict(type='Pad', size_divisor=32),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img', 'proposals']),
        ])
]
Kai Chen's avatar
Kai Chen committed
106
107
108
109
110
111
112
data = dict(
    imgs_per_gpu=2,
    workers_per_gpu=2,
    train=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_train2017.json',
        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',
113
114
        img_prefix=data_root + 'train2017/',
        pipeline=train_pipeline),
Kai Chen's avatar
Kai Chen committed
115
116
117
118
    val=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
119
120
        img_prefix=data_root + 'val2017/',
        pipeline=test_pipeline),
Kai Chen's avatar
Kai Chen committed
121
122
123
124
    test=dict(
        type=dataset_type,
        ann_file=data_root + 'annotations/instances_val2017.json',
        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',
125
126
        img_prefix=data_root + 'val2017/',
        pipeline=test_pipeline))
Kai Chen's avatar
Kai Chen committed
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# optimizer
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config = dict(
    policy='step',
    warmup='linear',
    warmup_iters=500,
    warmup_ratio=1.0 / 3,
    step=[8, 11])
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
    interval=50,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook')
    ])
# yapf:enable
# runtime settings
total_epochs = 12
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/fast_mask_rcnn_r101_fpn_1x'
load_from = None
resume_from = None
workflow = [('train', 1)]