test_accelerate_examples.py 12.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# coding=utf-8
# Copyright 2018 HuggingFace Inc..
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


import argparse
import json
import logging
import os
21
import shutil
22
import sys
23
import tempfile
Zachary Mueller's avatar
Zachary Mueller committed
24
from unittest import mock
25

26
from accelerate.utils import write_basic_config
27

28
29
30
31
32
33
34
from transformers.testing_utils import (
    TestCasePlus,
    backend_device_count,
    run_command,
    slow,
    torch_device,
)
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59


logging.basicConfig(level=logging.DEBUG)

logger = logging.getLogger()


def get_setup_file():
    parser = argparse.ArgumentParser()
    parser.add_argument("-f")
    args = parser.parse_args()
    return args.f


def get_results(output_dir):
    results = {}
    path = os.path.join(output_dir, "all_results.json")
    if os.path.exists(path):
        with open(path, "r") as f:
            results = json.load(f)
    else:
        raise ValueError(f"can't find {path}")
    return results


60
61
62
63
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)


64
class ExamplesTestsNoTrainer(TestCasePlus):
65
66
67
68
69
70
71
72
73
74
75
76
    @classmethod
    def setUpClass(cls):
        # Write Accelerate config, will pick up on CPU, GPU, and multi-GPU
        cls.tmpdir = tempfile.mkdtemp()
        cls.configPath = os.path.join(cls.tmpdir, "default_config.yml")
        write_basic_config(save_location=cls.configPath)
        cls._launch_args = ["accelerate", "launch", "--config_file", cls.configPath]

    @classmethod
    def tearDownClass(cls):
        shutil.rmtree(cls.tmpdir)

Zachary Mueller's avatar
Zachary Mueller committed
77
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
78
79
80
    def test_run_glue_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
81
            {self.examples_dir}/pytorch/text-classification/run_glue_no_trainer.py
82
83
84
85
86
87
88
89
            --model_name_or_path distilbert-base-uncased
            --output_dir {tmp_dir}
            --train_file ./tests/fixtures/tests_samples/MRPC/train.csv
            --validation_file ./tests/fixtures/tests_samples/MRPC/dev.csv
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --learning_rate=1e-4
            --seed=42
90
            --num_warmup_steps=2
91
            --checkpointing_steps epoch
92
93
            --with_tracking
        """.split()
94

Zachary Mueller's avatar
Zachary Mueller committed
95
        run_command(self._launch_args + testargs)
96
97
98
99
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "glue_no_trainer")))
100

Zachary Mueller's avatar
Zachary Mueller committed
101
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
102
103
104
    def test_run_clm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
105
            {self.examples_dir}/pytorch/language-modeling/run_clm_no_trainer.py
106
107
108
109
110
111
112
113
114
            --model_name_or_path distilgpt2
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --block_size 128
            --per_device_train_batch_size 5
            --per_device_eval_batch_size 5
            --num_train_epochs 2
            --output_dir {tmp_dir}
            --checkpointing_steps epoch
115
116
            --with_tracking
        """.split()
117

118
        if backend_device_count(torch_device) > 1:
119
120
121
            # Skipping because there are not enough batches to train the model + would need a drop_last to work.
            return

Zachary Mueller's avatar
Zachary Mueller committed
122
        run_command(self._launch_args + testargs)
123
124
125
126
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 100)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "clm_no_trainer")))
127

Zachary Mueller's avatar
Zachary Mueller committed
128
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
129
130
131
    def test_run_mlm_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
132
            {self.examples_dir}/pytorch/language-modeling/run_mlm_no_trainer.py
133
134
135
136
137
138
            --model_name_or_path distilroberta-base
            --train_file ./tests/fixtures/sample_text.txt
            --validation_file ./tests/fixtures/sample_text.txt
            --output_dir {tmp_dir}
            --num_train_epochs=1
            --checkpointing_steps epoch
139
            --with_tracking
140
141
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
142
        run_command(self._launch_args + testargs)
143
144
145
146
        result = get_results(tmp_dir)
        self.assertLess(result["perplexity"], 42)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "mlm_no_trainer")))
147

Zachary Mueller's avatar
Zachary Mueller committed
148
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
149
150
    def test_run_ner_no_trainer(self):
        # with so little data distributed training needs more epochs to get the score on par with 0/1 gpu
151
        epochs = 7 if backend_device_count(torch_device) > 1 else 2
152
153
154

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
155
            {self.examples_dir}/pytorch/token-classification/run_ner_no_trainer.py
156
157
158
159
160
161
162
163
164
165
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/conll/sample.json
            --validation_file tests/fixtures/tests_samples/conll/sample.json
            --output_dir {tmp_dir}
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=2
            --num_train_epochs={epochs}
            --seed 7
            --checkpointing_steps epoch
166
            --with_tracking
167
168
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
169
        run_command(self._launch_args + testargs)
170
171
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.75)
Zach Mueller's avatar
Zach Mueller committed
172
        self.assertLess(result["train_loss"], 0.6)
173
174
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "ner_no_trainer")))
175

Zachary Mueller's avatar
Zachary Mueller committed
176
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
177
178
179
    def test_run_squad_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
180
            {self.examples_dir}/pytorch/question-answering/run_qa_no_trainer.py
181
            --model_name_or_path bert-base-uncased
182
            --version_2_with_negative
183
184
185
            --train_file tests/fixtures/tests_samples/SQUAD/sample.json
            --validation_file tests/fixtures/tests_samples/SQUAD/sample.json
            --output_dir {tmp_dir}
186
            --seed=42
187
188
189
190
191
192
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
193
            --with_tracking
194
195
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
196
        run_command(self._launch_args + testargs)
197
198
        result = get_results(tmp_dir)
        # Because we use --version_2_with_negative the testing script uses SQuAD v2 metrics.
199
200
        self.assertGreaterEqual(result["eval_f1"], 28)
        self.assertGreaterEqual(result["eval_exact"], 28)
201
202
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "qa_no_trainer")))
203

Zachary Mueller's avatar
Zachary Mueller committed
204
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
205
206
207
    def test_run_swag_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
208
            {self.examples_dir}/pytorch/multiple-choice/run_swag_no_trainer.py
209
210
211
212
213
214
215
216
217
            --model_name_or_path bert-base-uncased
            --train_file tests/fixtures/tests_samples/swag/sample.json
            --validation_file tests/fixtures/tests_samples/swag/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=20
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
218
            --with_tracking
219
220
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
221
        run_command(self._launch_args + testargs)
222
223
224
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_accuracy"], 0.8)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "swag_no_trainer")))
225
226

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
227
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
228
229
230
    def test_run_summarization_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
231
            {self.examples_dir}/pytorch/summarization/run_summarization_no_trainer.py
232
233
234
235
236
237
238
239
240
241
            --model_name_or_path t5-small
            --train_file tests/fixtures/tests_samples/xsum/sample.json
            --validation_file tests/fixtures/tests_samples/xsum/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
242
            --with_tracking
243
244
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
245
        run_command(self._launch_args + testargs)
246
247
248
249
250
251
252
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_rouge1"], 10)
        self.assertGreaterEqual(result["eval_rouge2"], 2)
        self.assertGreaterEqual(result["eval_rougeL"], 7)
        self.assertGreaterEqual(result["eval_rougeLsum"], 7)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "summarization_no_trainer")))
253
254

    @slow
Zachary Mueller's avatar
Zachary Mueller committed
255
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
256
257
258
    def test_run_translation_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
259
            {self.examples_dir}/pytorch/translation/run_translation_no_trainer.py
260
261
262
263
264
265
266
267
            --model_name_or_path sshleifer/student_marian_en_ro_6_1
            --source_lang en
            --target_lang ro
            --train_file tests/fixtures/tests_samples/wmt16/sample.json
            --validation_file tests/fixtures/tests_samples/wmt16/sample.json
            --output_dir {tmp_dir}
            --max_train_steps=50
            --num_warmup_steps=8
268
            --num_beams=6
269
270
271
272
273
274
            --learning_rate=3e-3
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --source_lang en_XX
            --target_lang ro_RO
            --checkpointing_steps epoch
275
            --with_tracking
276
277
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
278
        run_command(self._launch_args + testargs)
279
280
281
282
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_bleu"], 30)
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "epoch_0")))
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "translation_no_trainer")))
283
284
285
286
287
288
289
290

    @slow
    def test_run_semantic_segmentation_no_trainer(self):
        stream_handler = logging.StreamHandler(sys.stdout)
        logger.addHandler(stream_handler)

        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
291
            {self.examples_dir}/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py
292
293
294
295
296
297
298
299
300
301
            --dataset_name huggingface/semantic-segmentation-test-sample
            --output_dir {tmp_dir}
            --max_train_steps=10
            --num_warmup_steps=2
            --learning_rate=2e-4
            --per_device_train_batch_size=2
            --per_device_eval_batch_size=1
            --checkpointing_steps epoch
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
302
        run_command(self._launch_args + testargs)
303
304
        result = get_results(tmp_dir)
        self.assertGreaterEqual(result["eval_overall_accuracy"], 0.10)
305

Zachary Mueller's avatar
Zachary Mueller committed
306
    @mock.patch.dict(os.environ, {"WANDB_MODE": "offline"})
307
308
309
    def test_run_image_classification_no_trainer(self):
        tmp_dir = self.get_auto_remove_tmp_dir()
        testargs = f"""
310
            {self.examples_dir}/pytorch/image-classification/run_image_classification_no_trainer.py
311
312
313
314
315
316
317
318
            --model_name_or_path google/vit-base-patch16-224-in21k
            --dataset_name hf-internal-testing/cats_vs_dogs_sample
            --learning_rate 1e-4
            --per_device_train_batch_size 2
            --per_device_eval_batch_size 1
            --max_train_steps 2
            --train_val_split 0.1
            --seed 42
319
320
            --output_dir {tmp_dir}
            --with_tracking
321
            --checkpointing_steps 1
322
323
        """.split()

Zachary Mueller's avatar
Zachary Mueller committed
324
        run_command(self._launch_args + testargs)
325
        result = get_results(tmp_dir)
326
        # The base model scores a 25%
327
        self.assertGreaterEqual(result["eval_accuracy"], 0.4)
328
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "step_1")))
329
        self.assertTrue(os.path.exists(os.path.join(tmp_dir, "image_classification_no_trainer")))