optimizer.py 5.64 KB
Newer Older
LDOUBLEV's avatar
LDOUBLEV committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
littletomatodonkey's avatar
littletomatodonkey committed
17
import math
LDOUBLEV's avatar
LDOUBLEV committed
18
import paddle.fluid as fluid
littletomatodonkey's avatar
littletomatodonkey committed
19
from paddle.fluid.regularizer import L2Decay
littletomatodonkey's avatar
littletomatodonkey committed
20
21
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import paddle.fluid.layers.ops as ops
littletomatodonkey's avatar
littletomatodonkey committed
22

tink2123's avatar
tink2123 committed
23
24
25
from ppocr.utils.utility import initial_logger

logger = initial_logger()
LDOUBLEV's avatar
LDOUBLEV committed
26
27


littletomatodonkey's avatar
littletomatodonkey committed
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
def cosine_decay_with_warmup(learning_rate,
                             step_each_epoch,
                             epochs=500,
                             warmup_minibatch=1000):
    """Applies cosine decay to the learning rate.
    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
    decrease lr for every mini-batch and start with warmup.
    """
    global_step = _decay_step_counter()
    lr = fluid.layers.tensor.create_global_var(
        shape=[1],
        value=0.0,
        dtype='float32',
        persistable=True,
        name="learning_rate")

    warmup_minibatch = fluid.layers.fill_constant(
        shape=[1],
        dtype='float32',
        value=float(warmup_minibatch),
        force_cpu=True)

    with fluid.layers.control_flow.Switch() as switch:
        with switch.case(global_step < warmup_minibatch):
            decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
        with switch.default():
            decayed_lr = learning_rate * \
                (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
    return lr


LDOUBLEV's avatar
LDOUBLEV committed
61
62
63
64
65
66
67
68
69
70
71
def AdamDecay(params, parameter_list=None):
    """
    define optimizer function
    args:
        params(dict): the super parameters
        parameter_list (list): list of Variable names to update to minimize loss
    return:
    """
    base_lr = params['base_lr']
    beta1 = params['beta1']
    beta2 = params['beta2']
littletomatodonkey's avatar
littletomatodonkey committed
72
73
    l2_decay = params.get("l2_decay", 0.0)

tink2123's avatar
tink2123 committed
74
    if 'decay' in params:
littletomatodonkey's avatar
littletomatodonkey committed
75
76
77
        supported_decay_mode = [
            "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
        ]
tink2123's avatar
tink2123 committed
78
79
        params = params['decay']
        decay_mode = params['function']
licx's avatar
licx committed
80
81
82
        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
            supported_decay_mode, decay_mode)

tink2123's avatar
tink2123 committed
83
        if decay_mode == "cosine_decay":
licx's avatar
licx committed
84
85
            step_each_epoch = params['step_each_epoch']
            total_epoch = params['total_epoch']
tink2123's avatar
tink2123 committed
86
87
88
89
            base_lr = fluid.layers.cosine_decay(
                learning_rate=base_lr,
                step_each_epoch=step_each_epoch,
                epochs=total_epoch)
littletomatodonkey's avatar
littletomatodonkey committed
90
91
92
93
94
95
96
97
98
        elif decay_mode == "cosine_decay_warmup":
            step_each_epoch = params['step_each_epoch']
            total_epoch = params['total_epoch']
            warmup_minibatch = params.get("warmup_minibatch", 1000)
            base_lr = cosine_decay_with_warmup(
                learning_rate=base_lr,
                step_each_epoch=step_each_epoch,
                epochs=total_epoch,
                warmup_minibatch=warmup_minibatch)
licx's avatar
licx committed
99
100
101
102
103
104
105
106
107
        elif decay_mode == "piecewise_decay":
            boundaries = params["boundaries"]
            decay_rate = params["decay_rate"]
            values = [
                base_lr * decay_rate**idx
                for idx in range(len(boundaries) + 1)
            ]
            base_lr = fluid.layers.piecewise_decay(boundaries, values)

LDOUBLEV's avatar
LDOUBLEV committed
108
109
110
111
    optimizer = fluid.optimizer.Adam(
        learning_rate=base_lr,
        beta1=beta1,
        beta2=beta2,
littletomatodonkey's avatar
littletomatodonkey committed
112
        regularization=L2Decay(regularization_coeff=l2_decay),
LDOUBLEV's avatar
LDOUBLEV committed
113
114
        parameter_list=parameter_list)
    return optimizer
licx's avatar
licx committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153


def RMSProp(params, parameter_list=None):
    """
    define optimizer function
    args:
        params(dict): the super parameters
        parameter_list (list): list of Variable names to update to minimize loss
    return:
    """
    base_lr = params.get("base_lr", 0.001)
    l2_decay = params.get("l2_decay", 0.00005)

    if 'decay' in params:
        supported_decay_mode = ["cosine_decay", "piecewise_decay"]
        params = params['decay']
        decay_mode = params['function']
        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
            supported_decay_mode, decay_mode)

        if decay_mode == "cosine_decay":
            step_each_epoch = params['step_each_epoch']
            total_epoch = params['total_epoch']
            base_lr = fluid.layers.cosine_decay(
                learning_rate=base_lr,
                step_each_epoch=step_each_epoch,
                epochs=total_epoch)
        elif decay_mode == "piecewise_decay":
            boundaries = params["boundaries"]
            decay_rate = params["decay_rate"]
            values = [
                base_lr * decay_rate**idx
                for idx in range(len(boundaries) + 1)
            ]
            base_lr = fluid.layers.piecewise_decay(boundaries, values)

    optimizer = fluid.optimizer.RMSProp(
        learning_rate=base_lr,
        regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
littletomatodonkey's avatar
littletomatodonkey committed
154
155

    return optimizer