test_optimization.py 6.42 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Aymeric Augustin's avatar
Aymeric Augustin committed
15
from __future__ import absolute_import, division, print_function
16

17
import os
Aymeric Augustin's avatar
Aymeric Augustin committed
18
import unittest
19

20
from transformers import is_torch_available
thomwolf's avatar
thomwolf committed
21

Aymeric Augustin's avatar
Aymeric Augustin committed
22
23
24
25
from .tokenization_tests_commons import TemporaryDirectory
from .utils import require_torch


26
if is_torch_available():
thomwolf's avatar
thomwolf committed
27
28
    import torch

29
30
31
32
33
34
35
36
    from transformers import (
        AdamW,
        get_constant_schedule,
        get_constant_schedule_with_warmup,
        get_cosine_schedule_with_warmup,
        get_cosine_with_hard_restarts_schedule_with_warmup,
        get_linear_schedule_with_warmup,
    )
thomwolf's avatar
thomwolf committed
37

lukovnikov's avatar
lukovnikov committed
38

thomwolf's avatar
thomwolf committed
39
40
41
42
43
44
45
def unwrap_schedule(scheduler, num_steps=10):
    lrs = []
    for _ in range(num_steps):
        scheduler.step()
        lrs.append(scheduler.get_lr())
    return lrs

46

47
48
49
50
51
52
53
def unwrap_and_save_reload_schedule(scheduler, num_steps=10):
    lrs = []
    for step in range(num_steps):
        scheduler.step()
        lrs.append(scheduler.get_lr())
        if step == num_steps // 2:
            with TemporaryDirectory() as tmpdirname:
54
                file_name = os.path.join(tmpdirname, "schedule.bin")
55
56
57
58
59
60
                torch.save(scheduler.state_dict(), file_name)

                state_dict = torch.load(file_name)
                scheduler.load_state_dict(state_dict)
    return lrs

61

62
@require_torch
63
64
65
66
67
68
class OptimizationTest(unittest.TestCase):
    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

thomwolf's avatar
thomwolf committed
69
    def test_adam_w(self):
70
        w = torch.tensor([0.1, -0.2, -0.1], requires_grad=True)
thomwolf's avatar
thomwolf committed
71
        target = torch.tensor([0.4, 0.2, -0.5])
thomwolf's avatar
thomwolf committed
72
        criterion = torch.nn.MSELoss()
thomwolf's avatar
thomwolf committed
73
        # No warmup, constant schedule, no gradient clipping
thomwolf's avatar
thomwolf committed
74
        optimizer = AdamW(params=[w], lr=2e-1, weight_decay=0.0)
75
        for _ in range(100):
thomwolf's avatar
thomwolf committed
76
            loss = criterion(w, target)
77
78
            loss.backward()
            optimizer.step()
79
            w.grad.detach_()  # No zero_grad() function on simple tensors. we do it ourselves.
thomwolf's avatar
thomwolf committed
80
            w.grad.zero_()
81
82
83
        self.assertListAlmostEqual(w.tolist(), [0.4, 0.2, -0.5], tol=1e-2)


84
@require_torch
lukovnikov's avatar
lukovnikov committed
85
class ScheduleInitTest(unittest.TestCase):
thomwolf's avatar
thomwolf committed
86
    m = torch.nn.Linear(50, 50) if is_torch_available() else None
87
    optimizer = AdamW(m.parameters(), lr=10.0) if is_torch_available() else None
thomwolf's avatar
thomwolf committed
88
89
90
91
92
93
94
95
    num_steps = 10

    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

    def test_constant_scheduler(self):
96
        scheduler = get_constant_schedule(self.optimizer)
thomwolf's avatar
thomwolf committed
97
        lrs = unwrap_schedule(scheduler, self.num_steps)
98
        expected_learning_rates = [10.0] * self.num_steps
thomwolf's avatar
thomwolf committed
99
100
101
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

102
        scheduler = get_constant_schedule(self.optimizer)
103
104
105
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
106
    def test_warmup_constant_scheduler(self):
107
        scheduler = get_constant_schedule_with_warmup(self.optimizer, num_warmup_steps=4)
thomwolf's avatar
thomwolf committed
108
109
110
111
112
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [2.5, 5.0, 7.5, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

113
        scheduler = get_constant_schedule_with_warmup(self.optimizer, num_warmup_steps=4)
114
115
116
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
117
    def test_warmup_linear_scheduler(self):
118
        scheduler = get_linear_schedule_with_warmup(self.optimizer, num_warmup_steps=2, num_training_steps=10)
thomwolf's avatar
thomwolf committed
119
120
121
122
123
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 8.75, 7.5, 6.25, 5.0, 3.75, 2.5, 1.25, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

124
        scheduler = get_linear_schedule_with_warmup(self.optimizer, num_warmup_steps=2, num_training_steps=10)
125
126
127
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
128
    def test_warmup_cosine_scheduler(self):
129
        scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=2, num_training_steps=10)
thomwolf's avatar
thomwolf committed
130
131
132
133
134
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 9.61, 8.53, 6.91, 5.0, 3.08, 1.46, 0.38, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs], expected_learning_rates, tol=1e-2)

135
        scheduler = get_cosine_schedule_with_warmup(self.optimizer, num_warmup_steps=2, num_training_steps=10)
136
137
138
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

thomwolf's avatar
thomwolf committed
139
    def test_warmup_cosine_hard_restart_scheduler(self):
140
141
142
        scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            self.optimizer, num_warmup_steps=2, num_cycles=2, num_training_steps=10
        )
thomwolf's avatar
thomwolf committed
143
144
145
146
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [5.0, 10.0, 8.53, 5.0, 1.46, 10.0, 8.53, 5.0, 1.46, 0.0]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs], expected_learning_rates, tol=1e-2)
lukovnikov's avatar
lukovnikov committed
147

148
149
150
        scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            self.optimizer, num_warmup_steps=2, num_cycles=2, num_training_steps=10
        )
151
152
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])
lukovnikov's avatar
lukovnikov committed
153

154

155
156
if __name__ == "__main__":
    unittest.main()