test_optimizer.py 7.61 KB
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
from mmcv.runner import build_optimizer_constructor


class SubModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(2, 2, kernel_size=1, groups=2)
        self.gn = nn.GroupNorm(2, 2)
        self.fc = nn.Linear(2, 2)
        self.param1 = nn.Parameter(torch.ones(1))

    def forward(self, x):
        return x


class ExampleModel(nn.Module):

    def __init__(self):
        super().__init__()
        self.param1 = nn.Parameter(torch.ones(1))
        self.conv1 = nn.Conv2d(3, 4, kernel_size=1, bias=False)
        self.conv2 = nn.Conv2d(4, 2, kernel_size=1)
        self.bn = nn.BatchNorm2d(2)
        self.sub = SubModel()
        self.fc = nn.Linear(2, 1)

    def forward(self, x):
        return x


class PseudoDataParallel(nn.Module):

    def __init__(self):
        super().__init__()
        self.module = ExampleModel()

    def forward(self, x):
        return x


base_lr = 0.01
base_wd = 0.0001
momentum = 0.9


def check_optimizer(optimizer,
                    model,
                    prefix='',
                    bias_lr_mult=1,
                    bias_decay_mult=1,
                    norm_decay_mult=1,
                    dwconv_decay_mult=1):
    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    model_parameters = list(model.parameters())
    assert len(param_groups) == len(model_parameters)
    for i, param in enumerate(model_parameters):
        param_group = param_groups[i]
        assert torch.equal(param_group['params'][0], param)
        assert param_group['momentum'] == momentum
    # param1
    param1 = param_groups[0]
    assert param1['lr'] == base_lr
    assert param1['weight_decay'] == base_wd
    # conv1.weight
    conv1_weight = param_groups[1]
    assert conv1_weight['lr'] == base_lr
    assert conv1_weight['weight_decay'] == base_wd
    # conv2.weight
    conv2_weight = param_groups[2]
    assert conv2_weight['lr'] == base_lr
    assert conv2_weight['weight_decay'] == base_wd
    # conv2.bias
    conv2_bias = param_groups[3]
    assert conv2_bias['lr'] == base_lr * bias_lr_mult
    assert conv2_bias['weight_decay'] == base_wd * bias_decay_mult
    # bn.weight
    bn_weight = param_groups[4]
    assert bn_weight['lr'] == base_lr
    assert bn_weight['weight_decay'] == base_wd * norm_decay_mult
    # bn.bias
    bn_bias = param_groups[5]
    assert bn_bias['lr'] == base_lr
    assert bn_bias['weight_decay'] == base_wd * norm_decay_mult
    # sub.param1
    sub_param1 = param_groups[6]
    assert sub_param1['lr'] == base_lr
    assert sub_param1['weight_decay'] == base_wd
    # sub.conv1.weight
    sub_conv1_weight = param_groups[7]
    assert sub_conv1_weight['lr'] == base_lr
    assert sub_conv1_weight['weight_decay'] == base_wd * dwconv_decay_mult
    # sub.conv1.bias
    sub_conv1_bias = param_groups[8]
    assert sub_conv1_bias['lr'] == base_lr * bias_lr_mult
    assert sub_conv1_bias['weight_decay'] == base_wd * dwconv_decay_mult
    # sub.gn.weight
    sub_gn_weight = param_groups[9]
    assert sub_gn_weight['lr'] == base_lr
    assert sub_gn_weight['weight_decay'] == base_wd * norm_decay_mult
    # sub.gn.bias
    sub_gn_bias = param_groups[10]
    assert sub_gn_bias['lr'] == base_lr
    assert sub_gn_bias['weight_decay'] == base_wd * norm_decay_mult
    # sub.fc1.weight
    sub_fc_weight = param_groups[11]
    assert sub_fc_weight['lr'] == base_lr
    assert sub_fc_weight['weight_decay'] == base_wd
    # sub.fc1.bias
    sub_fc_bias = param_groups[12]
    assert sub_fc_bias['lr'] == base_lr * bias_lr_mult
    assert sub_fc_bias['weight_decay'] == base_wd * bias_decay_mult
    # fc1.weight
    fc_weight = param_groups[13]
    assert fc_weight['lr'] == base_lr
    assert fc_weight['weight_decay'] == base_wd
    # fc1.bias
    fc_bias = param_groups[14]
    assert fc_bias['lr'] == base_lr * bias_lr_mult
    assert fc_bias['weight_decay'] == base_wd * bias_decay_mult


def check_tsm_optimizer(optimizer, model, fc_lr5=True):
    param_groups = optimizer.param_groups
    assert isinstance(optimizer, torch.optim.SGD)
    assert optimizer.defaults['lr'] == base_lr
    assert optimizer.defaults['momentum'] == momentum
    assert optimizer.defaults['weight_decay'] == base_wd
    model_parameters = list(model.parameters())
    # first_conv_weight
    first_conv_weight = param_groups[0]
    assert torch.equal(first_conv_weight['params'][0], model_parameters[1])
    assert first_conv_weight['lr'] == base_lr
    assert first_conv_weight['weight_decay'] == base_wd
    # first_conv_bias
    first_conv_bias = param_groups[1]
    assert first_conv_bias['params'] == []
    assert first_conv_bias['lr'] == base_lr * 2
    assert first_conv_bias['weight_decay'] == 0
    # normal_weight
    normal_weight = param_groups[2]
    assert torch.equal(normal_weight['params'][0], model_parameters[2])
    assert torch.equal(normal_weight['params'][1], model_parameters[7])
    assert normal_weight['lr'] == base_lr
    assert normal_weight['weight_decay'] == base_wd
    # normal_bias
    normal_bias = param_groups[3]
    assert torch.equal(normal_bias['params'][0], model_parameters[3])
    assert torch.equal(normal_bias['params'][1], model_parameters[8])
    assert normal_bias['lr'] == base_lr * 2
    assert normal_bias['weight_decay'] == 0
    # bn
    bn = param_groups[4]
    assert torch.equal(bn['params'][0], model_parameters[4])
    assert torch.equal(bn['params'][1], model_parameters[5])
    assert torch.equal(bn['params'][2], model_parameters[9])
    assert torch.equal(bn['params'][3], model_parameters[10])
    assert bn['lr'] == base_lr
    assert bn['weight_decay'] == 0
    # normal linear weight
    assert torch.equal(normal_weight['params'][2], model_parameters[11])
    # normal linear bias
    assert torch.equal(normal_bias['params'][2], model_parameters[12])
    # fc_lr5
    lr5_weight = param_groups[5]
    lr10_bias = param_groups[6]
    assert lr5_weight['lr'] == base_lr * 5
    assert lr5_weight['weight_decay'] == base_wd
    assert lr10_bias['lr'] == base_lr * 10
    assert lr10_bias['weight_decay'] == 0
    if fc_lr5:
        # lr5_weight
        assert torch.equal(lr5_weight['params'][0], model_parameters[13])
        # lr10_bias
        assert torch.equal(lr10_bias['params'][0], model_parameters[14])
    else:
        # lr5_weight
        assert lr5_weight['params'] == []
        # lr10_bias
        assert lr10_bias['params'] == []
        assert torch.equal(normal_weight['params'][3], model_parameters[13])
        assert torch.equal(normal_bias['params'][3], model_parameters[14])


def test_tsm_optimizer_constructor():
    model = ExampleModel()
    optimizer_cfg = dict(
        type='SGD', lr=base_lr, weight_decay=base_wd, momentum=momentum)
    # fc_lr5 is True
    paramwise_cfg = dict(fc_lr5=True)
    optim_constructor_cfg = dict(
        type='TSMOptimizerConstructor',
        optimizer_cfg=optimizer_cfg,
        paramwise_cfg=paramwise_cfg)
    optim_constructor = build_optimizer_constructor(optim_constructor_cfg)
    optimizer = optim_constructor(model)
    check_tsm_optimizer(optimizer, model, **paramwise_cfg)

    # fc_lr5 is False
    paramwise_cfg = dict(fc_lr5=False)
    optim_constructor_cfg = dict(
        type='TSMOptimizerConstructor',
        optimizer_cfg=optimizer_cfg,
        paramwise_cfg=paramwise_cfg)
    optim_constructor = build_optimizer_constructor(optim_constructor_cfg)
    optimizer = optim_constructor(model)
    check_tsm_optimizer(optimizer, model, **paramwise_cfg)