Unverified Commit 190df585 authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[github CI] add a multi-gpu job for all example tests (#8341)



* add a multi-gpu job for all example tests

* run only ported tests

* rename

* explain why env is re-activated on each step

* mark all unported/checked tests with @require_torch_non_multigpu_but_fix_me

* style

* Apply suggestions from code review
Co-authored-by: default avatarSam Shleifer <sshleifer@gmail.com>
Co-authored-by: default avatarSam Shleifer <sshleifer@gmail.com>
parent a39218b7
# configuration notes:
#
# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise
# the step uses the system-wide python interpreter.
name: Self-hosted runner (scheduled) name: Self-hosted runner (scheduled)
on: on:
...@@ -227,7 +232,7 @@ jobs: ...@@ -227,7 +232,7 @@ jobs:
python -c "import torch; print('Cuda available:', torch.cuda.is_available())" python -c "import torch; print('Cuda available:', torch.cuda.is_available())"
python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())"
- name: Run all tests on GPU - name: Run all tests on multi-GPU
env: env:
OMP_NUM_THREADS: 1 OMP_NUM_THREADS: 1
RUN_SLOW: yes RUN_SLOW: yes
...@@ -238,8 +243,20 @@ jobs: ...@@ -238,8 +243,20 @@ jobs:
- name: Failure short reports - name: Failure short reports
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_torch_multiple_gpu_failures_short.txt run: cat reports/tests_torch_multiple_gpu_failures_short.txt
- name: Run all pipeline tests on GPU - name: Run examples tests on multi-GPU
env:
OMP_NUM_THREADS: 1
RUN_SLOW: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples
- name: Failure short reports
if: ${{ always() }}
run: cat reports/examples_torch_multiple_gpu_failures_short.txt
- name: Run all pipeline tests on multi-GPU
if: ${{ always() }} if: ${{ always() }}
env: env:
TF_FORCE_GPU_ALLOW_GROWTH: "true" TF_FORCE_GPU_ALLOW_GROWTH: "true"
...@@ -306,7 +323,7 @@ jobs: ...@@ -306,7 +323,7 @@ jobs:
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))" TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))"
TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))" TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))"
- name: Run all tests on GPU - name: Run all tests on multi-GPU
env: env:
OMP_NUM_THREADS: 1 OMP_NUM_THREADS: 1
RUN_SLOW: yes RUN_SLOW: yes
...@@ -318,7 +335,7 @@ jobs: ...@@ -318,7 +335,7 @@ jobs:
if: ${{ always() }} if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_failures_short.txt run: cat reports/tests_tf_multiple_gpu_failures_short.txt
- name: Run all pipeline tests on GPU - name: Run all pipeline tests on multi-GPU
if: ${{ always() }} if: ${{ always() }}
env: env:
TF_FORCE_GPU_ALLOW_GROWTH: "true" TF_FORCE_GPU_ALLOW_GROWTH: "true"
......
...@@ -4,7 +4,7 @@ import sys ...@@ -4,7 +4,7 @@ import sys
from unittest.mock import patch from unittest.mock import patch
import run_glue_with_pabee import run_glue_with_pabee
from transformers.testing_utils import TestCasePlus from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
...@@ -20,6 +20,7 @@ def get_setup_file(): ...@@ -20,6 +20,7 @@ def get_setup_file():
class PabeeTests(TestCasePlus): class PabeeTests(TestCasePlus):
@require_torch_non_multigpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
......
...@@ -5,7 +5,7 @@ import unittest ...@@ -5,7 +5,7 @@ import unittest
from unittest.mock import patch from unittest.mock import patch
import run_glue_deebert import run_glue_deebert
from transformers.testing_utils import slow from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
...@@ -26,6 +26,7 @@ class DeeBertTests(unittest.TestCase): ...@@ -26,6 +26,7 @@ class DeeBertTests(unittest.TestCase):
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_glue_deebert_train(self): def test_glue_deebert_train(self):
train_args = """ train_args = """
......
...@@ -16,6 +16,7 @@ from transformers.configuration_dpr import DPRConfig ...@@ -16,6 +16,7 @@ from transformers.configuration_dpr import DPRConfig
from transformers.configuration_rag import RagConfig from transformers.configuration_rag import RagConfig
from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available
from transformers.retrieval_rag import CustomHFIndex from transformers.retrieval_rag import CustomHFIndex
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me
from transformers.tokenization_bart import BartTokenizer from transformers.tokenization_bart import BartTokenizer
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
...@@ -178,6 +179,7 @@ class RagRetrieverTest(TestCase): ...@@ -178,6 +179,7 @@ class RagRetrieverTest(TestCase):
retriever.init_retrieval(port) retriever.init_retrieval(port)
return retriever return retriever
@require_torch_non_multigpu_but_fix_me
def test_pytorch_distributed_retriever_retrieve(self): def test_pytorch_distributed_retriever_retrieve(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True) retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True)
...@@ -193,6 +195,7 @@ class RagRetrieverTest(TestCase): ...@@ -193,6 +195,7 @@ class RagRetrieverTest(TestCase):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]]) self.assertListEqual(doc_ids.tolist(), [[1], [0]])
@require_torch_non_multigpu_but_fix_me
def test_custom_hf_index_retriever_retrieve(self): def test_custom_hf_index_retriever_retrieve(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False) retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False)
...@@ -208,6 +211,7 @@ class RagRetrieverTest(TestCase): ...@@ -208,6 +211,7 @@ class RagRetrieverTest(TestCase):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]]) self.assertListEqual(doc_ids.tolist(), [[1], [0]])
@require_torch_non_multigpu_but_fix_me
def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self):
n_docs = 1 n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True) retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True)
......
...@@ -13,7 +13,7 @@ from distillation import BartSummarizationDistiller, distill_main ...@@ -13,7 +13,7 @@ from distillation import BartSummarizationDistiller, distill_main
from finetune import SummarizationModule, main from finetune import SummarizationModule, main
from transformers import MarianMTModel from transformers import MarianMTModel
from transformers.file_utils import cached_path from transformers.file_utils import cached_path
from transformers.testing_utils import TestCasePlus, require_torch_gpu, slow from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow
from utils import load_json from utils import load_json
...@@ -32,6 +32,7 @@ class TestMbartCc25Enro(TestCasePlus): ...@@ -32,6 +32,7 @@ class TestMbartCc25Enro(TestCasePlus):
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me
def test_model_download(self): def test_model_download(self):
"""This warms up the cache so that we can time the next test without including download time, which varies between machines.""" """This warms up the cache so that we can time the next test without including download time, which varies between machines."""
MarianMTModel.from_pretrained(MARIAN_MODEL) MarianMTModel.from_pretrained(MARIAN_MODEL)
...@@ -39,6 +40,7 @@ class TestMbartCc25Enro(TestCasePlus): ...@@ -39,6 +40,7 @@ class TestMbartCc25Enro(TestCasePlus):
# @timeout_decorator.timeout(1200) # @timeout_decorator.timeout(1200)
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me
def test_train_mbart_cc25_enro_script(self): def test_train_mbart_cc25_enro_script(self):
env_vars_to_replace = { env_vars_to_replace = {
"$MAX_LEN": 64, "$MAX_LEN": 64,
...@@ -127,6 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus): ...@@ -127,6 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
@timeout_decorator.timeout(600) @timeout_decorator.timeout(600)
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me
def test_opus_mt_distill_script(self): def test_opus_mt_distill_script(self):
data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro" data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro"
env_vars_to_replace = { env_vars_to_replace = {
......
...@@ -11,7 +11,7 @@ from save_len_file import save_len_file ...@@ -11,7 +11,7 @@ from save_len_file import save_len_file
from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir
from transformers import AutoTokenizer from transformers import AutoTokenizer
from transformers.modeling_bart import shift_tokens_right from transformers.modeling_bart import shift_tokens_right
from transformers.testing_utils import TestCasePlus, slow from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow
from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset
...@@ -30,6 +30,7 @@ class TestAll(TestCasePlus): ...@@ -30,6 +30,7 @@ class TestAll(TestCasePlus):
], ],
) )
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_seq2seq_dataset_truncation(self, tok_name): def test_seq2seq_dataset_truncation(self, tok_name):
tokenizer = AutoTokenizer.from_pretrained(tok_name) tokenizer = AutoTokenizer.from_pretrained(tok_name)
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
...@@ -69,6 +70,7 @@ class TestAll(TestCasePlus): ...@@ -69,6 +70,7 @@ class TestAll(TestCasePlus):
break # No need to test every batch break # No need to test every batch
@parameterized.expand([BART_TINY, BERT_BASE_CASED]) @parameterized.expand([BART_TINY, BERT_BASE_CASED])
@require_torch_non_multigpu_but_fix_me
def test_legacy_dataset_truncation(self, tok): def test_legacy_dataset_truncation(self, tok):
tokenizer = AutoTokenizer.from_pretrained(tok) tokenizer = AutoTokenizer.from_pretrained(tok)
tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir())
...@@ -93,6 +95,7 @@ class TestAll(TestCasePlus): ...@@ -93,6 +95,7 @@ class TestAll(TestCasePlus):
assert max_len_target > trunc_target # Truncated assert max_len_target > trunc_target # Truncated
break # No need to test every batch break # No need to test every batch
@require_torch_non_multigpu_but_fix_me
def test_pack_dataset(self): def test_pack_dataset(self):
tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
...@@ -111,6 +114,7 @@ class TestAll(TestCasePlus): ...@@ -111,6 +114,7 @@ class TestAll(TestCasePlus):
assert orig_paths == new_paths assert orig_paths == new_paths
@pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq")
@require_torch_non_multigpu_but_fix_me
def test_dynamic_batch_size(self): def test_dynamic_batch_size(self):
if not FAIRSEQ_AVAILABLE: if not FAIRSEQ_AVAILABLE:
return return
...@@ -135,6 +139,7 @@ class TestAll(TestCasePlus): ...@@ -135,6 +139,7 @@ class TestAll(TestCasePlus):
if failures: if failures:
raise AssertionError(f"too many tokens in {len(failures)} batches") raise AssertionError(f"too many tokens in {len(failures)} batches")
@require_torch_non_multigpu_but_fix_me
def test_sortish_sampler_reduces_padding(self): def test_sortish_sampler_reduces_padding(self):
ds, _, tokenizer = self._get_dataset(max_len=512) ds, _, tokenizer = self._get_dataset(max_len=512)
bs = 2 bs = 2
...@@ -174,6 +179,7 @@ class TestAll(TestCasePlus): ...@@ -174,6 +179,7 @@ class TestAll(TestCasePlus):
) )
return ds, max_tokens, tokenizer return ds, max_tokens, tokenizer
@require_torch_non_multigpu_but_fix_me
def test_distributed_sortish_sampler_splits_indices_between_procs(self): def test_distributed_sortish_sampler_splits_indices_between_procs(self):
ds, max_tokens, tokenizer = self._get_dataset() ds, max_tokens, tokenizer = self._get_dataset()
ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False))
...@@ -189,6 +195,7 @@ class TestAll(TestCasePlus): ...@@ -189,6 +195,7 @@ class TestAll(TestCasePlus):
PEGASUS_XSUM, PEGASUS_XSUM,
], ],
) )
@require_torch_non_multigpu_but_fix_me
def test_dataset_kwargs(self, tok_name): def test_dataset_kwargs(self, tok_name):
tokenizer = AutoTokenizer.from_pretrained(tok_name) tokenizer = AutoTokenizer.from_pretrained(tok_name)
if tok_name == MBART_TINY: if tok_name == MBART_TINY:
......
...@@ -19,7 +19,13 @@ import unittest ...@@ -19,7 +19,13 @@ import unittest
from parameterized import parameterized from parameterized import parameterized
from transformers import FSMTForConditionalGeneration, FSMTTokenizer from transformers import FSMTForConditionalGeneration, FSMTTokenizer
from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device from transformers.testing_utils import (
get_tests_dir,
require_torch,
require_torch_non_multigpu_but_fix_me,
slow,
torch_device,
)
from utils import calculate_bleu from utils import calculate_bleu
...@@ -48,6 +54,7 @@ class ModelEvalTester(unittest.TestCase): ...@@ -48,6 +54,7 @@ class ModelEvalTester(unittest.TestCase):
] ]
) )
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_bleu_scores(self, pair, min_bleu_score): def test_bleu_scores(self, pair, min_bleu_score):
# note: this test is not testing the best performance since it only evals a small batch # note: this test is not testing the best performance since it only evals a small batch
# but it should be enough to detect a regression in the output quality # but it should be enough to detect a regression in the output quality
......
...@@ -4,7 +4,7 @@ import unittest ...@@ -4,7 +4,7 @@ import unittest
from make_student import create_student_by_copying_alternating_layers from make_student import create_student_by_copying_alternating_layers
from transformers import AutoConfig from transformers import AutoConfig
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.testing_utils import require_torch from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me
TINY_BART = "sshleifer/bart-tiny-random" TINY_BART = "sshleifer/bart-tiny-random"
...@@ -17,23 +17,28 @@ class MakeStudentTester(unittest.TestCase): ...@@ -17,23 +17,28 @@ class MakeStudentTester(unittest.TestCase):
def teacher_config(self): def teacher_config(self):
return AutoConfig.from_pretrained(TINY_BART) return AutoConfig.from_pretrained(TINY_BART)
@require_torch_non_multigpu_but_fix_me
def test_valid_t5(self): def test_valid_t5(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1)
self.assertEqual(student.config.num_hidden_layers, 1) self.assertEqual(student.config.num_hidden_layers, 1)
@require_torch_non_multigpu_but_fix_me
def test_asymmetric_t5(self): def test_asymmetric_t5(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None)
@require_torch_non_multigpu_but_fix_me
def test_same_decoder_small_encoder(self): def test_same_decoder_small_encoder(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None)
self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.encoder_layers, 1)
self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers)
@require_torch_non_multigpu_but_fix_me
def test_small_enc_small_dec(self): def test_small_enc_small_dec(self):
student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1)
self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.encoder_layers, 1)
self.assertEqual(student.config.decoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1)
@require_torch_non_multigpu_but_fix_me
def test_raises_assert(self): def test_raises_assert(self):
with self.assertRaises(AssertionError): with self.assertRaises(AssertionError):
create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None)
...@@ -19,7 +19,14 @@ from run_eval import generate_summaries_or_translations, run_generate ...@@ -19,7 +19,14 @@ from run_eval import generate_summaries_or_translations, run_generate
from run_eval_search import run_search from run_eval_search import run_search
from transformers import AutoConfig, AutoModelForSeq2SeqLM from transformers import AutoConfig, AutoModelForSeq2SeqLM
from transformers.hf_api import HfApi from transformers.hf_api import HfApi
from transformers.testing_utils import CaptureStderr, CaptureStdout, TestCasePlus, require_torch_gpu, slow from transformers.testing_utils import (
CaptureStderr,
CaptureStdout,
TestCasePlus,
require_torch_gpu,
require_torch_non_multigpu_but_fix_me,
slow,
)
from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json
...@@ -126,6 +133,7 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -126,6 +133,7 @@ class TestSummarizationDistiller(TestCasePlus):
@slow @slow
@require_torch_gpu @require_torch_gpu
@require_torch_non_multigpu_but_fix_me
def test_hub_configs(self): def test_hub_configs(self):
"""I put require_torch_gpu cause I only want this to run with self-scheduled.""" """I put require_torch_gpu cause I only want this to run with self-scheduled."""
...@@ -143,10 +151,12 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -143,10 +151,12 @@ class TestSummarizationDistiller(TestCasePlus):
failures.append(m) failures.append(m)
assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" assert not failures, f"The following models could not be loaded through AutoConfig: {failures}"
@require_torch_non_multigpu_but_fix_me
def test_distill_no_teacher(self): def test_distill_no_teacher(self):
updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True)
self._test_distiller_cli(updates) self._test_distiller_cli(updates)
@require_torch_non_multigpu_but_fix_me
def test_distill_checkpointing_with_teacher(self): def test_distill_checkpointing_with_teacher(self):
updates = dict( updates = dict(
student_encoder_layers=2, student_encoder_layers=2,
...@@ -171,6 +181,7 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -171,6 +181,7 @@ class TestSummarizationDistiller(TestCasePlus):
convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new)
assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin"))
@require_torch_non_multigpu_but_fix_me
def test_loss_fn(self): def test_loss_fn(self):
model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True)
input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"]
...@@ -191,6 +202,7 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -191,6 +202,7 @@ class TestSummarizationDistiller(TestCasePlus):
# TODO: understand why this breaks # TODO: understand why this breaks
self.assertEqual(nll_loss, model_computed_loss) self.assertEqual(nll_loss, model_computed_loss)
@require_torch_non_multigpu_but_fix_me
def test_distill_mbart(self): def test_distill_mbart(self):
updates = dict( updates = dict(
student_encoder_layers=2, student_encoder_layers=2,
...@@ -215,6 +227,7 @@ class TestSummarizationDistiller(TestCasePlus): ...@@ -215,6 +227,7 @@ class TestSummarizationDistiller(TestCasePlus):
assert len(all_files) > 2 assert len(all_files) > 2
self.assertEqual(len(transformer_ckpts), 2) self.assertEqual(len(transformer_ckpts), 2)
@require_torch_non_multigpu_but_fix_me
def test_distill_t5(self): def test_distill_t5(self):
updates = dict( updates = dict(
student_encoder_layers=1, student_encoder_layers=1,
...@@ -296,18 +309,21 @@ class TestTheRest(TestCasePlus): ...@@ -296,18 +309,21 @@ class TestTheRest(TestCasePlus):
# test one model to quickly (no-@slow) catch simple problems and do an # test one model to quickly (no-@slow) catch simple problems and do an
# extensive testing of functionality with multiple models as @slow separately # extensive testing of functionality with multiple models as @slow separately
@require_torch_non_multigpu_but_fix_me
def test_run_eval(self): def test_run_eval(self):
self.run_eval_tester(T5_TINY) self.run_eval_tester(T5_TINY)
# any extra models should go into the list here - can be slow # any extra models should go into the list here - can be slow
@parameterized.expand([BART_TINY, MBART_TINY]) @parameterized.expand([BART_TINY, MBART_TINY])
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_run_eval_slow(self, model): def test_run_eval_slow(self, model):
self.run_eval_tester(model) self.run_eval_tester(model)
# testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart) # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart)
@parameterized.expand([T5_TINY, MBART_TINY]) @parameterized.expand([T5_TINY, MBART_TINY])
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_run_eval_search(self, model): def test_run_eval_search(self, model):
input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source" input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source"
output_file_name = input_file_name.parent / "utest_output.txt" output_file_name = input_file_name.parent / "utest_output.txt"
...@@ -358,6 +374,7 @@ class TestTheRest(TestCasePlus): ...@@ -358,6 +374,7 @@ class TestTheRest(TestCasePlus):
@parameterized.expand( @parameterized.expand(
[T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY], [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY],
) )
@require_torch_non_multigpu_but_fix_me
def test_finetune(self, model): def test_finetune(self, model):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()
task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization" task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization"
...@@ -409,6 +426,7 @@ class TestTheRest(TestCasePlus): ...@@ -409,6 +426,7 @@ class TestTheRest(TestCasePlus):
assert isinstance(example_batch, dict) assert isinstance(example_batch, dict)
assert len(example_batch) >= 4 assert len(example_batch) >= 4
@require_torch_non_multigpu_but_fix_me
def test_finetune_extra_model_args(self): def test_finetune_extra_model_args(self):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()
...@@ -459,6 +477,7 @@ class TestTheRest(TestCasePlus): ...@@ -459,6 +477,7 @@ class TestTheRest(TestCasePlus):
model = main(args) model = main(args)
assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute" assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute"
@require_torch_non_multigpu_but_fix_me
def test_finetune_lr_schedulers(self): def test_finetune_lr_schedulers(self):
args_d: dict = CHEAP_ARGS.copy() args_d: dict = CHEAP_ARGS.copy()
......
...@@ -4,7 +4,7 @@ import unittest ...@@ -4,7 +4,7 @@ import unittest
from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter
from transformers.file_utils import cached_property from transformers.file_utils import cached_property
from transformers.testing_utils import slow from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow
@unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.")
...@@ -15,10 +15,12 @@ class TatoebaConversionTester(unittest.TestCase): ...@@ -15,10 +15,12 @@ class TatoebaConversionTester(unittest.TestCase):
return TatoebaConverter(save_dir=tmp_dir) return TatoebaConverter(save_dir=tmp_dir)
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_resolver(self): def test_resolver(self):
self.resolver.convert_models(["heb-eng"]) self.resolver.convert_models(["heb-eng"])
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_model_card(self): def test_model_card(self):
content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True)
assert mmeta["long_pair"] == "heb-eng" assert mmeta["long_pair"] == "heb-eng"
...@@ -23,7 +23,7 @@ from unittest.mock import patch ...@@ -23,7 +23,7 @@ from unittest.mock import patch
import torch import torch
from transformers.file_utils import is_apex_available from transformers.file_utils import is_apex_available
from transformers.testing_utils import TestCasePlus, torch_device from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, torch_device
SRC_DIRS = [ SRC_DIRS = [
...@@ -67,6 +67,7 @@ def is_cuda_and_apex_available(): ...@@ -67,6 +67,7 @@ def is_cuda_and_apex_available():
class ExamplesTests(TestCasePlus): class ExamplesTests(TestCasePlus):
@require_torch_non_multigpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -99,6 +100,7 @@ class ExamplesTests(TestCasePlus): ...@@ -99,6 +100,7 @@ class ExamplesTests(TestCasePlus):
for value in result.values(): for value in result.values():
self.assertGreaterEqual(value, 0.75) self.assertGreaterEqual(value, 0.75)
@require_torch_non_multigpu_but_fix_me
def test_run_pl_glue(self): def test_run_pl_glue(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -136,6 +138,7 @@ class ExamplesTests(TestCasePlus): ...@@ -136,6 +138,7 @@ class ExamplesTests(TestCasePlus):
# self.assertGreaterEqual(v, 0.75, f"({k})") # self.assertGreaterEqual(v, 0.75, f"({k})")
# #
@require_torch_non_multigpu_but_fix_me
def test_run_clm(self): def test_run_clm(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -167,6 +170,7 @@ class ExamplesTests(TestCasePlus): ...@@ -167,6 +170,7 @@ class ExamplesTests(TestCasePlus):
result = run_clm.main() result = run_clm.main()
self.assertLess(result["perplexity"], 100) self.assertLess(result["perplexity"], 100)
@require_torch_non_multigpu_but_fix_me
def test_run_mlm(self): def test_run_mlm(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -192,6 +196,7 @@ class ExamplesTests(TestCasePlus): ...@@ -192,6 +196,7 @@ class ExamplesTests(TestCasePlus):
result = run_mlm.main() result = run_mlm.main()
self.assertLess(result["perplexity"], 42) self.assertLess(result["perplexity"], 42)
@require_torch_non_multigpu_but_fix_me
def test_run_ner(self): def test_run_ner(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -222,6 +227,7 @@ class ExamplesTests(TestCasePlus): ...@@ -222,6 +227,7 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertGreaterEqual(result["eval_precision"], 0.75)
self.assertLess(result["eval_loss"], 0.5) self.assertLess(result["eval_loss"], 0.5)
@require_torch_non_multigpu_but_fix_me
def test_run_squad(self): def test_run_squad(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -250,6 +256,7 @@ class ExamplesTests(TestCasePlus): ...@@ -250,6 +256,7 @@ class ExamplesTests(TestCasePlus):
self.assertGreaterEqual(result["f1"], 25) self.assertGreaterEqual(result["f1"], 25)
self.assertGreaterEqual(result["exact"], 21) self.assertGreaterEqual(result["exact"], 21)
@require_torch_non_multigpu_but_fix_me
def test_generation(self): def test_generation(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
......
...@@ -20,7 +20,7 @@ import unittest ...@@ -20,7 +20,7 @@ import unittest
from time import time from time import time
from unittest.mock import patch from unittest.mock import patch
from transformers.testing_utils import require_torch_tpu from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, require_torch_tpu
logging.basicConfig(level=logging.DEBUG) logging.basicConfig(level=logging.DEBUG)
...@@ -30,6 +30,7 @@ logger = logging.getLogger() ...@@ -30,6 +30,7 @@ logger = logging.getLogger()
@require_torch_tpu @require_torch_tpu
class TorchXLAExamplesTests(unittest.TestCase): class TorchXLAExamplesTests(unittest.TestCase):
@require_torch_non_multigpu_but_fix_me
def test_run_glue(self): def test_run_glue(self):
import xla_spawn import xla_spawn
...@@ -81,6 +82,7 @@ class TorchXLAExamplesTests(unittest.TestCase): ...@@ -81,6 +82,7 @@ class TorchXLAExamplesTests(unittest.TestCase):
# Assert that the script takes less than 300 seconds to make sure it doesn't hang. # Assert that the script takes less than 300 seconds to make sure it doesn't hang.
self.assertLess(end - start, 500) self.assertLess(end - start, 500)
@require_torch_non_multigpu_but_fix_me
def test_trainer_tpu(self): def test_trainer_tpu(self):
import xla_spawn import xla_spawn
......
...@@ -4,7 +4,7 @@ import unittest ...@@ -4,7 +4,7 @@ import unittest
from unittest.mock import patch from unittest.mock import patch
import run_ner_old as run_ner import run_ner_old as run_ner
from transformers.testing_utils import slow from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
...@@ -14,6 +14,7 @@ logger = logging.getLogger() ...@@ -14,6 +14,7 @@ logger = logging.getLogger()
class ExamplesTests(unittest.TestCase): class ExamplesTests(unittest.TestCase):
@slow @slow
@require_torch_non_multigpu_but_fix_me
def test_run_ner(self): def test_run_ner(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
...@@ -34,6 +35,7 @@ class ExamplesTests(unittest.TestCase): ...@@ -34,6 +35,7 @@ class ExamplesTests(unittest.TestCase):
result = run_ner.main() result = run_ner.main()
self.assertLess(result["eval_loss"], 1.5) self.assertLess(result["eval_loss"], 1.5)
@require_torch_non_multigpu_but_fix_me
def test_run_ner_pl(self): def test_run_ner_pl(self):
stream_handler = logging.StreamHandler(sys.stdout) stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler) logger.addHandler(stream_handler)
......
...@@ -227,6 +227,12 @@ def require_torch_non_multigpu(test_case): ...@@ -227,6 +227,12 @@ def require_torch_non_multigpu(test_case):
return test_case return test_case
# this is a decorator identical to require_torch_non_multigpu, but is used as a quick band-aid to
# allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one
# need to be ported and aren't so by design.
require_torch_non_multigpu_but_fix_me = require_torch_non_multigpu
def require_torch_tpu(test_case): def require_torch_tpu(test_case):
""" """
Decorator marking a test that requires a TPU (in PyTorch). Decorator marking a test that requires a TPU (in PyTorch).
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment