test_accelerate_examples.py 13 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse
import json
import logging
import os
21
import shutil
22
import sys
23
import tempfile
Zachary Mueller's avatar
Zachary Mueller committed
24
from unittest import mock
25

26
from accelerate.utils import write_basic_config
27

28
29
30
31
32
33
34
35
from transformers.testing_utils import (
    TestCasePlus,
    backend_device_count,
    is_torch_fp16_available_on_device,
    run_command,
    slow,
    torch_device,
)
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60


logging.basicConfig(level=logging.DEBUG)

logger = logging.getLogger()


def get_setup_file():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f")
    args = parser.parse_args()
    return args.f


def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results


61
62
63
64
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)


65
class ExamplesTestsNoTrainer(TestCasePlus):
66
67
68
69
70
71
72
73
74
75
76
77
    @classmethod
    def setUpClass(cls):
        # Write Accelerate config, will pick up on CPU, GPU, and multi-GPU
        cls.tmpdir = tempfile.mkdtemp()
        cls.configPath = os.path.join(cls.tmpdir, "default_config.yml")
        write_basic_config(save_location=cls.configPath)
        cls._launch_args = ["accelerate", "launch", "--config_file", cls.configPath]

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdir)

Zachary Mueller's avatar
Zachary Mueller committed
78
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
79
80
81
    def test_run_glue_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
82
            {self.examples_dir}/pytorch/text-classification/run_glue_no_trainer.py
83
84
85
86
87
88
89
90
            --model_name_or_path distilbert-base-uncased
            --output_dir {tmp_dir}
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
            --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --learning_rate=1e-4
            --seed=42
91
            --num_warmup_steps=2
92
            --checkpointing_steps epoch
93
94
            --with_tracking
        """.split()
95

96
        if is_torch_fp16_available_on_device(torch_device):
97
98
            testargs.append("--fp16")

Zachary Mueller's avatar
Zachary Mueller committed
99
        run_command(self._launch_args + testargs)
100
101
102
103
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer")))
104

Zachary Mueller's avatar
Zachary Mueller committed
105
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
106
107
108
    def test_run_clm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
109
            {self.examples_dir}/pytorch/language-modeling/run_clm_no_trainer.py
110
111
112
113
114
115
116
117
118
            --model_name_or_path distilgpt2
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --block_size 128
            --per_device_train_batch_size 5
            --per_device_eval_batch_size 5
            --num_train_epochs 2
            --output_dir {tmp_dir}
            --checkpointing_steps epoch
119
120
            --with_tracking
        """.split()
121

122
        if backend_device_count(torch_device) > 1:
123
124
125
            # Skipping because there are not enough batches to train the model + would need a drop_last to work.
            return

Zachary Mueller's avatar
Zachary Mueller committed
126
        run_command(self._launch_args + testargs)
127
128
129
130
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 100)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer")))
131

Zachary Mueller's avatar
Zachary Mueller committed
132
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
133
134
135
    def test_run_mlm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
136
            {self.examples_dir}/pytorch/language-modeling/run_mlm_no_trainer.py
137
138
139
140
141
142
            --model_name_or_path distilroberta-base
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
            --num_train_epochs=1
            --checkpointing_steps epoch
143
            --with_tracking
144
145
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
146
        run_command(self._launch_args + testargs)
147
148
149
150
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 42)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer")))
151

Zachary Mueller's avatar
Zachary Mueller committed
152
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
153
154
    def test_run_ner_no_trainer(self):
        # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
155
        epochs = 7 if backend_device_count(torch_device) > 1 else 2
156
157
158

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
159
            {self.examples_dir}/pytorch/token-classification/run_ner_no_trainer.py
160
161
162
163
164
165
166
167
168
169
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/conll/sample.json
            --validation_file tests/fixtures/tests_samples/conll/sample.json
            --output_dir {tmp_dir}
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=2
            --num_train_epochs={epochs}
            --seed 7
            --checkpointing_steps epoch
170
            --with_tracking
171
172
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
173
        run_command(self._launch_args + testargs)
174
175
176
177
178
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
        self.assertLess(result["train_loss"], 0.5)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer")))
179

Zachary Mueller's avatar
Zachary Mueller committed
180
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
181
182
183
    def test_run_squad_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
184
            {self.examples_dir}/pytorch/question-answering/run_qa_no_trainer.py
185
            --model_name_or_path bert-base-uncased
186
            --version_2_with_negative
187
188
189
            --train_file tests/fixtures/tests_samples/SQUAD/sample.json
            --validation_file tests/fixtures/tests_samples/SQUAD/sample.json
            --output_dir {tmp_dir}
190
            --seed=42
191
192
193
194
195
196
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
197
            --with_tracking
198
199
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
200
        run_command(self._launch_args + testargs)
201
202
        result = get_results(tmp_dir)
        # Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
203
204
        self.assertGreaterEqual(result["eval_f1"], 28)
        self.assertGreaterEqual(result["eval_exact"], 28)
205
206
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer")))
207

Zachary Mueller's avatar
Zachary Mueller committed
208
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
209
210
211
    def test_run_swag_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
212
            {self.examples_dir}/pytorch/multiple-choice/run_swag_no_trainer.py
213
214
215
216
217
218
219
220
221
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/swag/sample.json
            --validation_file tests/fixtures/tests_samples/swag/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=20
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
222
            --with_tracking
223
224
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
225
        run_command(self._launch_args + testargs)
226
227
228
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.8)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer")))
229
230

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
231
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
232
233
234
    def test_run_summarization_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
235
            {self.examples_dir}/pytorch/summarization/run_summarization_no_trainer.py
236
237
238
239
240
241
242
243
244
245
            --model_name_or_path t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
246
            --with_tracking
247
248
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
249
        run_command(self._launch_args + testargs)
250
251
252
253
254
255
256
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_rouge1"], 10)
        self.assertGreaterEqual(result["eval_rouge2"], 2)
        self.assertGreaterEqual(result["eval_rougeL"], 7)
        self.assertGreaterEqual(result["eval_rougeLsum"], 7)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer")))
257
258

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
259
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
260
261
262
    def test_run_translation_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
263
            {self.examples_dir}/pytorch/translation/run_translation_no_trainer.py
264
265
266
267
268
269
270
271
            --model_name_or_path sshleifer/student_marian_en_ro_6_1
            --source_lang en
            --target_lang ro
            --train_file tests/fixtures/tests_samples/wmt16/sample.json
            --validation_file tests/fixtures/tests_samples/wmt16/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
272
            --num_beams=6
273
274
275
276
277
278
            --learning_rate=3e-3
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --source_lang en_XX
            --target_lang ro_RO
            --checkpointing_steps epoch
279
            --with_tracking
280
281
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
282
        run_command(self._launch_args + testargs)
283
284
285
286
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_bleu"], 30)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "translation_no_trainer")))
287
288
289
290
291
292
293
294

    @slow
    def test_run_semantic_segmentation_no_trainer(self):
        stream_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(stream_handler)

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
295
            {self.examples_dir}/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
296
297
298
299
300
301
302
303
304
305
            --dataset_name huggingface/semantic-segmentation-test-sample
            --output_dir {tmp_dir}
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
306
        run_command(self._launch_args + testargs)
307
308
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10)
309

Zachary Mueller's avatar
Zachary Mueller committed
310
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
311
312
313
    def test_run_image_classification_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
314
            {self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
315
316
317
318
319
320
321
322
            --model_name_or_path google/vit-base-patch16-224-in21k
            --dataset_name hf-internal-testing/cats_vs_dogs_sample
            --learning_rate 1e-4
            --per_device_train_batch_size 2
            --per_device_eval_batch_size 1
            --max_train_steps 2
            --train_val_split 0.1
            --seed 42
323
324
            --output_dir {tmp_dir}
            --with_tracking
325
            --checkpointing_steps 1
326
327
        """.split()

328
        if is_torch_fp16_available_on_device(torch_device):
329
330
            testargs.append("--fp16")

Zachary Mueller's avatar
Zachary Mueller committed
331
        run_command(self._launch_args + testargs)
332
        result = get_results(tmp_dir)
333
        # The base model scores a 25%
334
        self.assertGreaterEqual(result["eval_accuracy"], 0.4)
335
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1")))
336
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer")))