optimization_test.py 5.95 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import unittest
20
import os
21

thomwolf's avatar
thomwolf committed
22
23
import torch

thomwolf's avatar
thomwolf committed
24
25
26
from pytorch_transformers import (AdamW, ConstantLRSchedule, WarmupConstantSchedule,
                                  WarmupCosineSchedule, WarmupCosineWithHardRestartsSchedule, WarmupLinearSchedule)

27
from .tokenization_tests_commons import TemporaryDirectory
28

lukovnikov's avatar
lukovnikov committed
29

thomwolf's avatar
thomwolf committed
30
31
32
33
34
35
36
def unwrap_schedule(scheduler, num_steps=10):
    lrs = []
    for _ in range(num_steps):
        scheduler.step()
        lrs.append(scheduler.get_lr())
    return lrs

37
38
39
40
41
42
43
44
45
46
47
48
49
50
def unwrap_and_save_reload_schedule(scheduler, num_steps=10):
    lrs = []
    for step in range(num_steps):
        scheduler.step()
        lrs.append(scheduler.get_lr())
        if step == num_steps // 2:
            with TemporaryDirectory() as tmpdirname:
                file_name = os.path.join(tmpdirname, 'schedule.bin')
                torch.save(scheduler.state_dict(), file_name)

                state_dict = torch.load(file_name)
                scheduler.load_state_dict(state_dict)
    return lrs

51
52
53
54
55
56
57
class OptimizationTest(unittest.TestCase):

    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

thomwolf's avatar
thomwolf committed
58
    def test_adam_w(self):
59
        w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True)
thomwolf's avatar
thomwolf committed
60
        target = torch.tensor([0.4, 0.2, -0.5])
thomwolf's avatar
thomwolf committed
61
        criterion = torch.nn.MSELoss()
thomwolf's avatar
thomwolf committed
62
        # No warmup, constant schedule, no gradient clipping
thomwolf's avatar
thomwolf committed
63
        optimizer = AdamW(params=[w], lr=2e-1, weight_decay=0.0)
64
        for _ in range(100):
thomwolf's avatar
thomwolf committed
65
            loss = criterion(w, target)
66
67
            loss.backward()
            optimizer.step()
thomwolf's avatar
thomwolf committed
68
69
            w.grad.detach_() # No zero_grad() function on simple tensors. we do it ourselves.
            w.grad.zero_()
70
71
72
        self.assertListAlmostEqual(w.tolist(), [0.4, 0.2, -0.5], tol=1e-2)


lukovnikov's avatar
lukovnikov committed
73
class ScheduleInitTest(unittest.TestCase):
thomwolf's avatar
thomwolf committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
    m = torch.nn.Linear(50, 50)
    optimizer = AdamW(m.parameters(), lr=10.)
    num_steps = 10

    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

    def test_constant_scheduler(self):
        scheduler = ConstantLRSchedule(self.optimizer)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [10.] * self.num_steps
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

90
91
92
93
        scheduler = ConstantLRSchedule(self.optimizer)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
94
95
96
97
98
99
100
    def test_warmup_constant_scheduler(self):
        scheduler = WarmupConstantSchedule(self.optimizer, warmup_steps=4)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [2.5, 5.0, 7.5, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

101
102
103
104
        scheduler = WarmupConstantSchedule(self.optimizer, warmup_steps=4)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
105
106
107
108
109
110
111
    def test_warmup_linear_scheduler(self):
        scheduler = WarmupLinearSchedule(self.optimizer, warmup_steps=2, t_total=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 8.75, 7.5, 6.25, 5.0, 3.75, 2.5, 1.25, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

112
113
114
115
        scheduler = WarmupLinearSchedule(self.optimizer, warmup_steps=2, t_total=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
116
117
118
119
120
121
122
    def test_warmup_cosine_scheduler(self):
        scheduler = WarmupCosineSchedule(self.optimizer, warmup_steps=2, t_total=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 9.61, 8.53, 6.91, 5.0, 3.08, 1.46, 0.38, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs], expected_learning_rates, tol=1e-2)

123
124
125
126
        scheduler = WarmupCosineSchedule(self.optimizer, warmup_steps=2, t_total=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
127
128
129
130
131
132
    def test_warmup_cosine_hard_restart_scheduler(self):
        scheduler = WarmupCosineWithHardRestartsSchedule(self.optimizer, warmup_steps=2, cycles=2, t_total=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 8.53, 5.0, 1.46, 10.0, 8.53, 5.0, 1.46, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs], expected_learning_rates, tol=1e-2)
lukovnikov's avatar
lukovnikov committed
133

134
135
136
        scheduler = WarmupCosineWithHardRestartsSchedule(self.optimizer, warmup_steps=2, cycles=2, t_total=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])
lukovnikov's avatar
lukovnikov committed
137

138
139
if __name__ == "__main__":
    unittest.main()