test_accelerate_examples.py 13.1 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse
import json
import logging
import os
21
import shutil
22
import sys
23
import tempfile
24
import unittest
Zachary Mueller's avatar
Zachary Mueller committed
25
from unittest import mock
26
27

import torch
28
from accelerate.utils import write_basic_config
29

30
from transformers.testing_utils import TestCasePlus, get_gpu_count, run_command, slow, torch_device
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from transformers.utils import is_apex_available


logging.basicConfig(level=logging.DEBUG)

logger = logging.getLogger()


def get_setup_file():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f")
    args = parser.parse_args()
    return args.f


def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results


def is_cuda_and_apex_available():
    is_using_cuda = torch.cuda.is_available() and torch_device == "cuda"
    return is_using_cuda and is_apex_available()


62
63
64
65
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)


66
class ExamplesTestsNoTrainer(TestCasePlus):
67
68
69
70
71
72
73
74
75
76
77
78
    @classmethod
    def setUpClass(cls):
        # Write Accelerate config, will pick up on CPU, GPU, and multi-GPU
        cls.tmpdir = tempfile.mkdtemp()
        cls.configPath = os.path.join(cls.tmpdir, "default_config.yml")
        write_basic_config(save_location=cls.configPath)
        cls._launch_args = ["accelerate", "launch", "--config_file", cls.configPath]

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdir)

Zachary Mueller's avatar
Zachary Mueller committed
79
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
80
81
82
    def test_run_glue_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
83
            {self.examples_dir}/pytorch/text-classification/run_glue_no_trainer.py
84
85
86
87
88
89
90
91
92
            --model_name_or_path distilbert-base-uncased
            --output_dir {tmp_dir}
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
            --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --learning_rate=1e-4
            --seed=42
            --checkpointing_steps epoch
93
94
            --with_tracking
        """.split()
95
96
97
98

        if is_cuda_and_apex_available():
            testargs.append("--fp16")

Zachary Mueller's avatar
Zachary Mueller committed
99
        run_command(self._launch_args + testargs)
100
101
102
103
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer")))
104

Zachary Mueller's avatar
Zachary Mueller committed
105
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
106
107
108
    def test_run_clm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
109
            {self.examples_dir}/pytorch/language-modeling/run_clm_no_trainer.py
110
111
112
113
114
115
116
117
118
            --model_name_or_path distilgpt2
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --block_size 128
            --per_device_train_batch_size 5
            --per_device_eval_batch_size 5
            --num_train_epochs 2
            --output_dir {tmp_dir}
            --checkpointing_steps epoch
119
120
            --with_tracking
        """.split()
121
122
123
124
125

        if torch.cuda.device_count() > 1:
            # Skipping because there are not enough batches to train the model + would need a drop_last to work.
            return

Zachary Mueller's avatar
Zachary Mueller committed
126
        run_command(self._launch_args + testargs)
127
128
129
130
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 100)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer")))
131

Zachary Mueller's avatar
Zachary Mueller committed
132
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
133
134
135
    def test_run_mlm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
136
            {self.examples_dir}/pytorch/language-modeling/run_mlm_no_trainer.py
137
138
139
140
141
142
            --model_name_or_path distilroberta-base
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
            --num_train_epochs=1
            --checkpointing_steps epoch
143
            --with_tracking
144
145
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
146
        run_command(self._launch_args + testargs)
147
148
149
150
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 42)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer")))
151

Zachary Mueller's avatar
Zachary Mueller committed
152
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
153
154
155
156
157
158
    def test_run_ner_no_trainer(self):
        # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
        epochs = 7 if get_gpu_count() > 1 else 2

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
159
            {self.examples_dir}/pytorch/token-classification/run_ner_no_trainer.py
160
161
162
163
164
165
166
167
168
169
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/conll/sample.json
            --validation_file tests/fixtures/tests_samples/conll/sample.json
            --output_dir {tmp_dir}
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=2
            --num_train_epochs={epochs}
            --seed 7
            --checkpointing_steps epoch
170
            --with_tracking
171
172
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
173
        run_command(self._launch_args + testargs)
174
175
176
177
178
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
        self.assertLess(result["train_loss"], 0.5)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer")))
179

180
    @unittest.skip(reason="Fix me @muellerzr")
Zachary Mueller's avatar
Zachary Mueller committed
181
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
182
183
184
    def test_run_squad_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
185
            {self.examples_dir}/pytorch/question-answering/run_qa_no_trainer.py
186
            --model_name_or_path bert-base-uncased
187
            --version_2_with_negative
188
189
190
            --train_file tests/fixtures/tests_samples/SQUAD/sample.json
            --validation_file tests/fixtures/tests_samples/SQUAD/sample.json
            --output_dir {tmp_dir}
191
            --seed=42
192
193
194
195
196
197
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
198
            --with_tracking
199
200
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
201
        run_command(self._launch_args + testargs)
202
203
        result = get_results(tmp_dir)
        # Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
204
205
        self.assertGreaterEqual(result["eval_f1"], 28)
        self.assertGreaterEqual(result["eval_exact"], 28)
206
207
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer")))
208

Zachary Mueller's avatar
Zachary Mueller committed
209
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
210
211
212
    def test_run_swag_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
213
            {self.examples_dir}/pytorch/multiple-choice/run_swag_no_trainer.py
214
215
216
217
218
219
220
221
222
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/swag/sample.json
            --validation_file tests/fixtures/tests_samples/swag/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=20
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
223
            --with_tracking
224
225
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
226
        run_command(self._launch_args + testargs)
227
228
229
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.8)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer")))
230
231

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
232
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
233
234
235
    def test_run_summarization_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
236
            {self.examples_dir}/pytorch/summarization/run_summarization_no_trainer.py
237
238
239
240
241
242
243
244
245
246
            --model_name_or_path t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
247
            --with_tracking
248
249
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
250
        run_command(self._launch_args + testargs)
251
252
253
254
255
256
257
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_rouge1"], 10)
        self.assertGreaterEqual(result["eval_rouge2"], 2)
        self.assertGreaterEqual(result["eval_rougeL"], 7)
        self.assertGreaterEqual(result["eval_rougeLsum"], 7)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer")))
258
259

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
260
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
261
262
263
    def test_run_translation_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
264
            {self.examples_dir}/pytorch/translation/run_translation_no_trainer.py
265
266
267
268
269
270
271
272
            --model_name_or_path sshleifer/student_marian_en_ro_6_1
            --source_lang en
            --target_lang ro
            --train_file tests/fixtures/tests_samples/wmt16/sample.json
            --validation_file tests/fixtures/tests_samples/wmt16/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
273
            --num_beams=6
274
275
276
277
278
279
            --learning_rate=3e-3
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --source_lang en_XX
            --target_lang ro_RO
            --checkpointing_steps epoch
280
            --with_tracking
281
282
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
283
        run_command(self._launch_args + testargs)
284
285
286
287
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_bleu"], 30)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "translation_no_trainer")))
288
289
290
291
292
293
294
295

    @slow
    def test_run_semantic_segmentation_no_trainer(self):
        stream_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(stream_handler)

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
296
            {self.examples_dir}/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
297
298
299
300
301
302
303
304
305
306
            --dataset_name huggingface/semantic-segmentation-test-sample
            --output_dir {tmp_dir}
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
307
        run_command(self._launch_args + testargs)
308
309
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10)
310

Zachary Mueller's avatar
Zachary Mueller committed
311
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
312
313
314
    def test_run_image_classification_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
315
            {self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
316
317
318
319
320
321
322
323
            --model_name_or_path google/vit-base-patch16-224-in21k
            --dataset_name hf-internal-testing/cats_vs_dogs_sample
            --learning_rate 1e-4
            --per_device_train_batch_size 2
            --per_device_eval_batch_size 1
            --max_train_steps 2
            --train_val_split 0.1
            --seed 42
324
325
            --output_dir {tmp_dir}
            --with_tracking
326
            --checkpointing_steps 1
327
328
        """.split()

329
330
331
        if is_cuda_and_apex_available():
            testargs.append("--fp16")

Zachary Mueller's avatar
Zachary Mueller committed
332
        run_command(self._launch_args + testargs)
333
        result = get_results(tmp_dir)
334
        # The base model scores a 25%
Zachary Mueller's avatar
Zachary Mueller committed
335
        self.assertGreaterEqual(result["eval_accuracy"], 0.6)
336
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1")))
337
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer")))