test_trainer.py 15.1 KB
Newer Older
Julien Chaumond's avatar
Julien Chaumond committed
1
2
import unittest

3
import datasets
Sylvain Gugger's avatar
Sylvain Gugger committed
4
5
import numpy as np

Julien Chaumond's avatar
Julien Chaumond committed
6
from transformers import AutoTokenizer, TrainingArguments, is_torch_available
7
from transformers.testing_utils import get_tests_dir, require_torch, slow
Julien Chaumond's avatar
Julien Chaumond committed
8
9
10
11


if is_torch_available():
    import torch
12
13
    from torch.utils.data import IterableDataset

Julien Chaumond's avatar
Julien Chaumond committed
14
15
16
17
    from transformers import (
        AutoModelForSequenceClassification,
        GlueDataset,
        GlueDataTrainingArguments,
18
19
        LineByLineTextDataset,
        Trainer,
Julien Chaumond's avatar
Julien Chaumond committed
20
21
22
    )


23
PATH_SAMPLE_TEXT = f"{get_tests_dir()}/fixtures/sample_text.txt"
Julien Chaumond's avatar
Julien Chaumond committed
24
25


Sylvain Gugger's avatar
Sylvain Gugger committed
26
class RegressionDataset:
Sylvain Gugger's avatar
Sylvain Gugger committed
27
    def __init__(self, a=2, b=3, length=64, seed=42, label_names=None):
Sylvain Gugger's avatar
Sylvain Gugger committed
28
        np.random.seed(seed)
Sylvain Gugger's avatar
Sylvain Gugger committed
29
        self.label_names = ["labels"] if label_names is None else label_names
Sylvain Gugger's avatar
Sylvain Gugger committed
30
31
        self.length = length
        self.x = np.random.normal(size=(length,)).astype(np.float32)
Sylvain Gugger's avatar
Sylvain Gugger committed
32
33
        self.ys = [a * self.x + b + np.random.normal(scale=0.1, size=(length,)) for _ in self.label_names]
        self.ys = [y.astype(np.float32) for y in self.ys]
Julien Chaumond's avatar
Julien Chaumond committed
34

Sylvain Gugger's avatar
Sylvain Gugger committed
35
36
37
38
    def __len__(self):
        return self.length

    def __getitem__(self, i):
Sylvain Gugger's avatar
Sylvain Gugger committed
39
40
41
        result = {name: y[i] for name, y in zip(self.label_names, self.ys)}
        result["input_x"] = self.x[i]
        return result
Sylvain Gugger's avatar
Sylvain Gugger committed
42
43
44
45
46
47
48
49
50
51


class AlmostAccuracy:
    def __init__(self, thresh=0.25):
        self.thresh = thresh

    def __call__(self, eval_pred):
        predictions, labels = eval_pred
        true = np.abs(predictions - labels) <= self.thresh
        return {"accuracy": true.astype(np.float32).mean().item()}
52

Julien Chaumond's avatar
Julien Chaumond committed
53

54
55
56
57
58
59
60
61
62
63
64
65
66
if is_torch_available():

    class SampleIterableDataset(IterableDataset):
        def __init__(self, file_path):
            self.file_path = file_path

        def parse_file(self):
            f = open(self.file_path, "r")
            return f.readlines()

        def __iter__(self):
            return iter(self.parse_file())

Sylvain Gugger's avatar
Sylvain Gugger committed
67
    class RegressionModel(torch.nn.Module):
68
        def __init__(self, a=0, b=0, double_output=False):
Sylvain Gugger's avatar
Sylvain Gugger committed
69
70
71
            super().__init__()
            self.a = torch.nn.Parameter(torch.tensor(a).float())
            self.b = torch.nn.Parameter(torch.tensor(b).float())
72
73
            self.double_output = double_output
            self.config = None
Sylvain Gugger's avatar
Sylvain Gugger committed
74

Sylvain Gugger's avatar
Sylvain Gugger committed
75
        def forward(self, input_x=None, labels=None, **kwargs):
Sylvain Gugger's avatar
Sylvain Gugger committed
76
77
            y = input_x * self.a + self.b
            if labels is None:
78
                return (y, y) if self.double_output else (y,)
Sylvain Gugger's avatar
Sylvain Gugger committed
79
            loss = torch.nn.functional.mse_loss(y, labels)
80
            return (loss, y, y) if self.double_output else (loss, y)
Sylvain Gugger's avatar
Sylvain Gugger committed
81

82
    def get_regression_trainer(a=0, b=0, double_output=False, train_len=64, eval_len=64, **kwargs):
Sylvain Gugger's avatar
Sylvain Gugger committed
83
84
85
        label_names = kwargs.get("label_names", None)
        train_dataset = RegressionDataset(length=train_len, label_names=label_names)
        eval_dataset = RegressionDataset(length=eval_len, label_names=label_names)
86
        model = RegressionModel(a, b, double_output)
Sylvain Gugger's avatar
Sylvain Gugger committed
87
88
89
90
91
92
93
94
95
96
97
98
99
100
        compute_metrics = kwargs.pop("compute_metrics", None)
        data_collator = kwargs.pop("data_collator", None)
        optimizers = kwargs.pop("optimizers", (None, None))
        args = TrainingArguments("./regression", **kwargs)
        return Trainer(
            model,
            args,
            data_collator=data_collator,
            train_dataset=train_dataset,
            eval_dataset=eval_dataset,
            compute_metrics=compute_metrics,
            optimizers=optimizers,
        )

101

Julien Chaumond's avatar
Julien Chaumond committed
102
103
@require_torch
class TrainerIntegrationTest(unittest.TestCase):
Sylvain Gugger's avatar
Sylvain Gugger committed
104
105
106
    def setUp(self):
        args = TrainingArguments(".")
        self.n_epochs = args.num_train_epochs
107
108
109
110
111
112
113
114
115
116
117
118
119
120
        self.batch_size = args.train_batch_size
        trainer = get_regression_trainer(learning_rate=0.1)
        trainer.train()
        self.default_trained_model = (trainer.model.a, trainer.model.b)

        trainer = get_regression_trainer(learning_rate=0.1, seed=314)
        trainer.train()
        self.alternate_trained_model = (trainer.model.a, trainer.model.b)

    def check_trained_model(self, model, alternate_seed=False):
        # Checks a training seeded with learning_rate = 0.1
        (a, b) = self.alternate_trained_model if alternate_seed else self.default_trained_model
        self.assertTrue(torch.allclose(model.a, a))
        self.assertTrue(torch.allclose(model.b, b))
Sylvain Gugger's avatar
Sylvain Gugger committed
121
122
123
124
125

    def test_reproducible_training(self):
        # Checks that training worked, model trained and seed made a reproducible training.
        trainer = get_regression_trainer(learning_rate=0.1)
        trainer.train()
Sylvain Gugger's avatar
Sylvain Gugger committed
126
        self.check_trained_model(trainer.model)
Sylvain Gugger's avatar
Sylvain Gugger committed
127
128
129
130

        # Checks that a different seed gets different (reproducible) results.
        trainer = get_regression_trainer(learning_rate=0.1, seed=314)
        trainer.train()
Sylvain Gugger's avatar
Sylvain Gugger committed
131
        self.check_trained_model(trainer.model, alternate_seed=True)
Sylvain Gugger's avatar
Sylvain Gugger committed
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

    def test_number_of_steps_in_training(self):
        # Regular training has n_epochs * len(train_dl) steps
        trainer = get_regression_trainer(learning_rate=0.1)
        train_output = trainer.train()
        self.assertEqual(train_output.global_step, self.n_epochs * 64 / self.batch_size)

        # Check passing num_train_epochs works (and a float version too):
        trainer = get_regression_trainer(learning_rate=0.1, num_train_epochs=1.5)
        train_output = trainer.train()
        self.assertEqual(train_output.global_step, int(1.5 * 64 / self.batch_size))

        # If we pass a max_steps, num_train_epochs is ignored
        trainer = get_regression_trainer(learning_rate=0.1, max_steps=10)
        train_output = trainer.train()
        self.assertEqual(train_output.global_step, 10)

    def test_train_and_eval_dataloaders(self):
150
        n_gpu = max(1, torch.cuda.device_count())
Sylvain Gugger's avatar
Sylvain Gugger committed
151
        trainer = get_regression_trainer(learning_rate=0.1, per_device_train_batch_size=16)
152
        self.assertEqual(trainer.get_train_dataloader().batch_size, 16 * n_gpu)
Sylvain Gugger's avatar
Sylvain Gugger committed
153
        trainer = get_regression_trainer(learning_rate=0.1, per_device_eval_batch_size=16)
154
        self.assertEqual(trainer.get_eval_dataloader().batch_size, 16 * n_gpu)
Sylvain Gugger's avatar
Sylvain Gugger committed
155
156
157
158
159

        # Check drop_last works
        trainer = get_regression_trainer(
            train_len=66, eval_len=74, learning_rate=0.1, per_device_train_batch_size=16, per_device_eval_batch_size=32
        )
160
161
        self.assertEqual(len(trainer.get_train_dataloader()), 66 // (16 * n_gpu) + 1)
        self.assertEqual(len(trainer.get_eval_dataloader()), 74 // (32 * n_gpu) + 1)
Sylvain Gugger's avatar
Sylvain Gugger committed
162
163
164
165
166
167
168
169
170

        trainer = get_regression_trainer(
            train_len=66,
            eval_len=74,
            learning_rate=0.1,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=32,
            dataloader_drop_last=True,
        )
171
172
        self.assertEqual(len(trainer.get_train_dataloader()), 66 // (16 * n_gpu))
        self.assertEqual(len(trainer.get_eval_dataloader()), 74 // (32 * n_gpu))
Sylvain Gugger's avatar
Sylvain Gugger committed
173

174
        # Check passing a new dataset for evaluation wors
Sylvain Gugger's avatar
Sylvain Gugger committed
175
        new_eval_dataset = RegressionDataset(length=128)
176
        self.assertEqual(len(trainer.get_eval_dataloader(new_eval_dataset)), 128 // (32 * n_gpu))
Sylvain Gugger's avatar
Sylvain Gugger committed
177
178
179
180
181

    def test_evaluate(self):
        trainer = get_regression_trainer(a=1.5, b=2.5, compute_metrics=AlmostAccuracy())
        results = trainer.evaluate()

Sylvain Gugger's avatar
Sylvain Gugger committed
182
        x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
Sylvain Gugger's avatar
Sylvain Gugger committed
183
184
185
186
187
188
189
190
191
192
        pred = 1.5 * x + 2.5
        expected_loss = ((pred - y) ** 2).mean()
        self.assertAlmostEqual(results["eval_loss"], expected_loss)
        expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
        self.assertAlmostEqual(results["eval_accuracy"], expected_acc)

        # With a number of elements not a round multiple of the batch size
        trainer = get_regression_trainer(a=1.5, b=2.5, eval_len=66, compute_metrics=AlmostAccuracy())
        results = trainer.evaluate()

Sylvain Gugger's avatar
Sylvain Gugger committed
193
        x, y = trainer.eval_dataset.x, trainer.eval_dataset.ys[0]
Sylvain Gugger's avatar
Sylvain Gugger committed
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
        pred = 1.5 * x + 2.5
        expected_loss = ((pred - y) ** 2).mean()
        self.assertAlmostEqual(results["eval_loss"], expected_loss)
        expected_acc = AlmostAccuracy()((pred, y))["accuracy"]
        self.assertAlmostEqual(results["eval_accuracy"], expected_acc)

    def test_predict(self):
        trainer = get_regression_trainer(a=1.5, b=2.5)
        preds = trainer.predict(trainer.eval_dataset).predictions
        x = trainer.eval_dataset.x
        self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))

        # With a number of elements not a round multiple of the batch size
        trainer = get_regression_trainer(a=1.5, b=2.5, eval_len=66)
        preds = trainer.predict(trainer.eval_dataset).predictions
        x = trainer.eval_dataset.x
        self.assertTrue(np.allclose(preds, 1.5 * x + 2.5))

212
213
214
215
216
217
218
219
        # With more than one output of the model
        trainer = get_regression_trainer(a=1.5, b=2.5, double_output=True)
        preds = trainer.predict(trainer.eval_dataset).predictions
        x = trainer.eval_dataset.x
        self.assertTrue(len(preds), 2)
        self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
        self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))

Sylvain Gugger's avatar
Sylvain Gugger committed
220
221
222
223
224
225
226
227
228
229
230
231
        # With more than one output/label of the model
        trainer = get_regression_trainer(a=1.5, b=2.5, double_output=True, label_names=["labels", "labels_2"])
        outputs = trainer.predict(trainer.eval_dataset)
        preds = outputs.predictions
        labels = outputs.label_ids
        x = trainer.eval_dataset.x
        self.assertTrue(len(preds), 2)
        self.assertTrue(np.allclose(preds[0], 1.5 * x + 2.5))
        self.assertTrue(np.allclose(preds[1], 1.5 * x + 2.5))
        self.assertTrue(np.array_equal(labels[0], trainer.eval_dataset.ys[0]))
        self.assertTrue(np.array_equal(labels[1], trainer.eval_dataset.ys[1]))

232
    def test_trainer_with_datasets(self):
Sylvain Gugger's avatar
Sylvain Gugger committed
233
234
235
        np.random.seed(42)
        x = np.random.normal(size=(64,)).astype(np.float32)
        y = 2.0 * x + 3.0 + np.random.normal(scale=0.1, size=(64,))
236
        train_dataset = datasets.Dataset.from_dict({"input_x": x, "label": y})
Sylvain Gugger's avatar
Sylvain Gugger committed
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253

        # Base training. Should have the same results as test_reproducible_training
        model = RegressionModel()
        args = TrainingArguments("./regression", learning_rate=0.1)
        trainer = Trainer(model, args, train_dataset=train_dataset)
        trainer.train()
        self.check_trained_model(trainer.model)

        # Can return tensors.
        train_dataset.set_format(type="torch")
        model = RegressionModel()
        trainer = Trainer(model, args, train_dataset=train_dataset)
        trainer.train()
        self.check_trained_model(trainer.model)

        # Adding one column not used by the model should have no impact
        z = np.random.normal(size=(64,)).astype(np.float32)
254
        train_dataset = datasets.Dataset.from_dict({"input_x": x, "label": y, "extra": z})
Sylvain Gugger's avatar
Sylvain Gugger committed
255
256
257
258
259
260
261
262
263
264
265
266
267
268
        model = RegressionModel()
        trainer = Trainer(model, args, train_dataset=train_dataset)
        trainer.train()
        self.check_trained_model(trainer.model)

    def test_custom_optimizer(self):
        train_dataset = RegressionDataset()
        args = TrainingArguments("./regression")
        model = RegressionModel()
        optimizer = torch.optim.SGD(model.parameters(), lr=1.0)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: 1.0)
        trainer = Trainer(model, args, train_dataset=train_dataset, optimizers=(optimizer, lr_scheduler))
        trainer.train()

269
270
271
        (a, b) = self.default_trained_model
        self.assertFalse(torch.allclose(trainer.model.a, a))
        self.assertFalse(torch.allclose(trainer.model.b, b))
Sylvain Gugger's avatar
Sylvain Gugger committed
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
        self.assertEqual(trainer.optimizer.state_dict()["param_groups"][0]["lr"], 1.0)

    def test_model_init(self):
        train_dataset = RegressionDataset()
        args = TrainingArguments("./regression", learning_rate=0.1)
        trainer = Trainer(args=args, train_dataset=train_dataset, model_init=lambda: RegressionModel())
        trainer.train()
        self.check_trained_model(trainer.model)

        # Re-training should restart from scratch, thus lead the same results.
        trainer.train()
        self.check_trained_model(trainer.model)

        # Re-training should restart from scratch, thus lead the same results and new seed should be used.
        trainer.args.seed = 314
        trainer.train()
        self.check_trained_model(trainer.model, alternate_seed=True)

290
    @slow
Julien Chaumond's avatar
Julien Chaumond committed
291
292
293
294
295
    def test_trainer_eval_mrpc(self):
        MODEL_ID = "bert-base-cased-finetuned-mrpc"
        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
        data_args = GlueDataTrainingArguments(
296
            task_name="mrpc", data_dir=f"{get_tests_dir()}/fixtures/tests_samples/MRPC", overwrite_cache=True
Julien Chaumond's avatar
Julien Chaumond committed
297
        )
298
        eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev")
Julien Chaumond's avatar
Julien Chaumond committed
299
300
301
302

        training_args = TrainingArguments(output_dir="./examples", no_cuda=True)
        trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset)
        result = trainer.evaluate()
303
        self.assertLess(result["eval_loss"], 0.2)
Julien Chaumond's avatar
Julien Chaumond committed
304

305
    @slow
Julien Chaumond's avatar
Julien Chaumond committed
306
307
308
309
    def test_trainer_eval_lm(self):
        MODEL_ID = "distilroberta-base"
        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
        dataset = LineByLineTextDataset(
Lysandre's avatar
Lysandre committed
310
311
312
            tokenizer=tokenizer,
            file_path=PATH_SAMPLE_TEXT,
            block_size=tokenizer.max_len_single_sentence,
Julien Chaumond's avatar
Julien Chaumond committed
313
314
        )
        self.assertEqual(len(dataset), 31)
315
316
317
318
319
320
321
322
323

    def test_trainer_iterable_dataset(self):
        MODEL_ID = "sshleifer/tiny-distilbert-base-cased"
        model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
        train_dataset = SampleIterableDataset(PATH_SAMPLE_TEXT)
        training_args = TrainingArguments(output_dir="./examples", no_cuda=True)
        trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset)
        loader = trainer.get_train_dataloader()
        self.assertIsInstance(loader, torch.utils.data.DataLoader)
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

    def test_num_train_epochs_in_training(self):
        # len(train_dl) < gradient_accumulation_steps shouldn't give ``ZeroDivisionError`` when ``max_steps`` is given.
        # It should give 1 update step for each epoch.
        trainer = get_regression_trainer(
            max_steps=3, train_len=64, per_device_train_batch_size=16, gradient_accumulation_steps=5
        )
        train_output = trainer.train()
        self.assertEqual(train_output.global_step, 3)

        # Even ``max_steps`` is not specified, we still expect 1 update step for each epoch if
        # len(train_dl) < gradient_accumulation_steps.
        trainer = get_regression_trainer(train_len=64, per_device_train_batch_size=16, gradient_accumulation_steps=5)
        train_output = trainer.train()
        self.assertEqual(train_output.global_step, int(self.n_epochs))
Marcin Zab艂ocki's avatar
Marcin Zab艂ocki committed
339
340
341
342
343
344
345
346
347
348
349
350
351

    def test_flos_extraction(self):
        trainer = get_regression_trainer(learning_rate=0.1)

        def assert_flos_extraction(trainer, wrapped_model_to_check):
            self.assertEqual(trainer.model, trainer._actual_model(wrapped_model_to_check))
            self.assertGreaterEqual(getattr(trainer._actual_model(wrapped_model_to_check).config, "total_flos", 0), 0)

        # with plain model
        assert_flos_extraction(trainer, trainer.model)

        # with enforced DataParallel
        assert_flos_extraction(trainer, torch.nn.DataParallel(trainer.model))