Unverified Commit 50378cbf authored by Hz, Ji's avatar Hz, Ji Committed by GitHub
Browse files

device agnostic models testing (#27146)

* device agnostic models testing

* add decorator `require_torch_fp16`

* make style

* apply review suggestion

* Oops, the fp16 decorator was misused
parent 77930f8a
...@@ -16,7 +16,13 @@ import unittest ...@@ -16,7 +16,13 @@ import unittest
from unittest import skip from unittest import skip
from transformers import is_torch_available from transformers import is_torch_available
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
slow,
torch_device,
)
from transformers.trainer_utils import set_seed from transformers.trainer_utils import set_seed
...@@ -363,7 +369,7 @@ class Jukebox5bModelTester(unittest.TestCase): ...@@ -363,7 +369,7 @@ class Jukebox5bModelTester(unittest.TestCase):
self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2]) self.assertIn(zs[2][0].detach().cpu().tolist(), [self.EXPECTED_OUTPUT_0, self.EXPECTED_OUTPUT_0_PT_2])
@slow @slow
@require_torch_gpu @require_torch_accelerator
@skip("Not enough GPU memory on CI runners") @skip("Not enough GPU memory on CI runners")
def test_slow_sampling(self): def test_slow_sampling(self):
model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval() model = JukeboxModel.from_pretrained(self.model_id, min_duration=0).eval()
...@@ -388,7 +394,8 @@ class Jukebox5bModelTester(unittest.TestCase): ...@@ -388,7 +394,8 @@ class Jukebox5bModelTester(unittest.TestCase):
torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0)) torch.testing.assert_allclose(zs[2][0].cpu(), torch.tensor(self.EXPECTED_GPU_OUTPUTS_0))
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_fp16_slow_sampling(self): def test_fp16_slow_sampling(self):
prior_id = "ArthurZ/jukebox_prior_0" prior_id = "ArthurZ/jukebox_prior_0"
model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device) model = JukeboxPrior.from_pretrained(prior_id, min_duration=0).eval().half().to(torch_device)
......
...@@ -21,7 +21,14 @@ import unittest ...@@ -21,7 +21,14 @@ import unittest
from transformers import LEDConfig, is_torch_available from transformers import LEDConfig, is_torch_available
from transformers.models.auto import get_values from transformers.models.auto import get_values
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -363,13 +370,13 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, ...@@ -363,13 +370,13 @@ class LEDModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = LEDForConditionalGeneration(config).eval().to(torch_device) model = LEDForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -21,7 +21,14 @@ from parameterized import parameterized ...@@ -21,7 +21,14 @@ from parameterized import parameterized
from pytest import mark from pytest import mark
from transformers import LlamaConfig, is_torch_available, set_seed from transformers import LlamaConfig, is_torch_available, set_seed
from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
require_flash_attn,
require_torch,
require_torch_accelerator,
require_torch_gpu,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -534,7 +541,7 @@ end ...@@ -534,7 +541,7 @@ end
""", """,
] ]
@require_torch_gpu @require_torch_accelerator
@slow @slow
def test_model_7b_logits(self): def test_model_7b_logits(self):
model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf").to(torch_device) model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf").to(torch_device)
......
...@@ -20,7 +20,14 @@ import tempfile ...@@ -20,7 +20,14 @@ import tempfile
import unittest import unittest
from transformers import M2M100Config, is_torch_available from transformers import M2M100Config, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -312,13 +319,13 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix ...@@ -312,13 +319,13 @@ class M2M100ModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = M2M100ForConditionalGeneration(config).eval().to(torch_device) model = M2M100ForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -20,7 +20,14 @@ import unittest ...@@ -20,7 +20,14 @@ import unittest
from huggingface_hub.hf_api import list_models from huggingface_hub.hf_api import list_models
from transformers import MarianConfig, is_torch_available from transformers import MarianConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -281,13 +288,13 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix ...@@ -281,13 +288,13 @@ class MarianModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs) self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = MarianMTModel(config).eval().to(torch_device) model = MarianMTModel(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
...@@ -620,9 +627,9 @@ class TestMarian_en_ROMANCE(MarianIntegrationTest): ...@@ -620,9 +627,9 @@ class TestMarian_en_ROMANCE(MarianIntegrationTest):
self._assert_generated_batch_equal_expected() self._assert_generated_batch_equal_expected()
@slow @slow
@require_torch
def test_pipeline(self): def test_pipeline(self):
device = 0 if torch_device == "cuda" else -1 pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=torch_device)
pipeline = TranslationPipeline(self.model, self.tokenizer, framework="pt", device=device)
output = pipeline(self.src_text) output = pipeline(self.src_text)
self.assertEqual(self.expected_text, [x["translation_text"] for x in output]) self.assertEqual(self.expected_text, [x["translation_text"] for x in output])
......
...@@ -23,7 +23,8 @@ from tests.test_modeling_common import floats_tensor ...@@ -23,7 +23,8 @@ from tests.test_modeling_common import floats_tensor
from transformers import Mask2FormerConfig, is_torch_available, is_vision_available from transformers import Mask2FormerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_fp16,
require_torch_multi_gpu, require_torch_multi_gpu,
require_vision, require_vision,
slow, slow,
...@@ -427,7 +428,8 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase): ...@@ -427,7 +428,8 @@ class Mask2FormerModelIntegrationTest(unittest.TestCase):
).to(torch_device) ).to(torch_device)
self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_fp16(self): def test_inference_fp16(self):
model = ( model = (
Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints) Mask2FormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
......
...@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor ...@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor
from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available from transformers import DetrConfig, MaskFormerConfig, SwinConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_fp16,
require_torch_multi_gpu, require_torch_multi_gpu,
require_vision, require_vision,
slow, slow,
...@@ -516,7 +517,8 @@ class MaskFormerModelIntegrationTest(unittest.TestCase): ...@@ -516,7 +517,8 @@ class MaskFormerModelIntegrationTest(unittest.TestCase):
).to(torch_device) ).to(torch_device)
self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(outputs.class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_fp16(self): def test_inference_fp16(self):
model = ( model = (
MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff") MaskFormerForInstanceSegmentation.from_pretrained("facebook/maskformer-resnet101-coco-stuff")
......
...@@ -20,7 +20,14 @@ import tempfile ...@@ -20,7 +20,14 @@ import tempfile
import unittest import unittest
from transformers import MBartConfig, is_torch_available from transformers import MBartConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -317,13 +324,13 @@ class MBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi ...@@ -317,13 +324,13 @@ class MBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixi
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = MBartForConditionalGeneration(config).eval().to(torch_device) model = MBartForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -17,7 +17,13 @@ ...@@ -17,7 +17,13 @@
import unittest import unittest
from transformers import MegaConfig, is_torch_available from transformers import MegaConfig, is_torch_available
from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device from transformers.testing_utils import (
TestCasePlus,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -619,12 +625,12 @@ class MegaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin ...@@ -619,12 +625,12 @@ class MegaModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs) self.model_tester.check_sequence_length_beyond_max_positions(*config_and_inputs)
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder() config, input_ids, _, attention_mask, *_ = self.model_tester.prepare_config_and_inputs_for_decoder()
# attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device) # attention_mask = torch.LongTensor(input_ids.ne(1)).to(torch_device)
model = MegaForCausalLM(config).eval().to(torch_device) model = MegaForCausalLM(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -22,7 +22,14 @@ import unittest ...@@ -22,7 +22,14 @@ import unittest
from pytest import mark from pytest import mark
from transformers import AutoTokenizer, MistralConfig, is_torch_available from transformers import AutoTokenizer, MistralConfig, is_torch_available
from transformers.testing_utils import require_flash_attn, require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
backend_empty_cache,
require_flash_attn,
require_torch,
require_torch_gpu,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -450,7 +457,7 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -450,7 +457,7 @@ class MistralIntegrationTest(unittest.TestCase):
torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4) torch.testing.assert_close(out[0, 0, :30], EXPECTED_SLICE, atol=1e-4, rtol=1e-4)
del model del model
torch.cuda.empty_cache() backend_empty_cache(torch_device)
gc.collect() gc.collect()
@slow @slow
...@@ -467,5 +474,5 @@ class MistralIntegrationTest(unittest.TestCase): ...@@ -467,5 +474,5 @@ class MistralIntegrationTest(unittest.TestCase):
self.assertEqual(EXPECTED_TEXT_COMPLETION, text) self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
del model del model
torch.cuda.empty_cache() backend_empty_cache(torch_device)
gc.collect() gc.collect()
...@@ -28,7 +28,13 @@ from transformers import ( ...@@ -28,7 +28,13 @@ from transformers import (
PretrainedConfig, PretrainedConfig,
T5Config, T5Config,
) )
from transformers.testing_utils import is_torch_available, require_torch, slow, torch_device from transformers.testing_utils import (
is_torch_available,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -1082,13 +1088,13 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, ...@@ -1082,13 +1088,13 @@ class MusicgenTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True) output_ids_generate = model.generate(do_sample=False, max_length=max_length, remove_invalid_values=True)
self.assertIsNotNone(output_ids_generate) self.assertIsNotNone(output_ids_generate)
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
for model_class in self.greedy_sample_model_classes: for model_class in self.greedy_sample_model_classes:
model = model_class(config).eval().to(torch_device) model = model_class(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
# greedy # greedy
model.generate(input_dict["input_ids"], attention_mask=input_dict["attention_mask"], max_new_tokens=10) model.generate(input_dict["input_ids"], attention_mask=input_dict["attention_mask"], max_new_tokens=10)
# sampling # sampling
......
...@@ -22,7 +22,14 @@ import unittest ...@@ -22,7 +22,14 @@ import unittest
import timeout_decorator # noqa import timeout_decorator # noqa
from transformers import MvpConfig, is_torch_available from transformers import MvpConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -374,12 +381,12 @@ class MvpHeadTests(unittest.TestCase): ...@@ -374,12 +381,12 @@ class MvpHeadTests(unittest.TestCase):
mvp_toks = tokenizer.encode(ex, return_tensors="pt").squeeze() mvp_toks = tokenizer.encode(ex, return_tensors="pt").squeeze()
assert_tensors_close(desired_result.long(), mvp_toks, prefix=ex) assert_tensors_close(desired_result.long(), mvp_toks, prefix=ex)
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_ids, batch_size = self._get_config_and_data() config, input_ids, batch_size = self._get_config_and_data()
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = MvpForConditionalGeneration(config).eval().to(torch_device) model = MvpForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
...@@ -505,13 +512,13 @@ class MvpModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin, ...@@ -505,13 +512,13 @@ class MvpModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMixin,
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = MvpForConditionalGeneration(config).eval().to(torch_device) model = MvpForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -24,6 +24,7 @@ from transformers.testing_utils import ( ...@@ -24,6 +24,7 @@ from transformers.testing_utils import (
require_sentencepiece, require_sentencepiece,
require_tokenizers, require_tokenizers,
require_torch, require_torch,
require_torch_fp16,
slow, slow,
torch_device, torch_device,
) )
...@@ -327,13 +328,13 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi ...@@ -327,13 +328,13 @@ class NllbMoeModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = NllbMoeForConditionalGeneration(config).eval().to(torch_device) model = NllbMoeForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor ...@@ -24,7 +24,8 @@ from tests.test_modeling_common import floats_tensor
from transformers import OneFormerConfig, is_torch_available, is_vision_available from transformers import OneFormerConfig, is_torch_available, is_vision_available
from transformers.testing_utils import ( from transformers.testing_utils import (
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_fp16,
require_torch_multi_gpu, require_torch_multi_gpu,
require_vision, require_vision,
slow, slow,
...@@ -540,7 +541,8 @@ class OneFormerModelIntegrationTest(unittest.TestCase): ...@@ -540,7 +541,8 @@ class OneFormerModelIntegrationTest(unittest.TestCase):
).to(torch_device) ).to(torch_device)
self.assertTrue(torch.allclose(class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE)) self.assertTrue(torch.allclose(class_queries_logits[0, :3, :3], expected_slice, atol=TOLERANCE))
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_fp16(self): def test_inference_fp16(self):
model = ( model = (
OneFormerForUniversalSegmentation.from_pretrained(self.model_checkpoints) OneFormerForUniversalSegmentation.from_pretrained(self.model_checkpoints)
......
...@@ -22,7 +22,7 @@ import unittest ...@@ -22,7 +22,7 @@ import unittest
import timeout_decorator # noqa import timeout_decorator # noqa
from transformers import OPTConfig, is_torch_available from transformers import OPTConfig, is_torch_available
from transformers.testing_utils import require_torch, require_torch_fp16, require_torch_gpu, slow, torch_device from transformers.testing_utils import require_torch, require_torch_accelerator, require_torch_fp16, slow, torch_device
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -514,7 +514,8 @@ class OPTGenerationTest(unittest.TestCase): ...@@ -514,7 +514,8 @@ class OPTGenerationTest(unittest.TestCase):
self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS) self.assertListEqual(predicted_outputs, EXPECTED_OUTPUTS)
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_batched_nan_fp16(self): def test_batched_nan_fp16(self):
# a bug manifested starting at models facebook/opt-1.3 and larger when running batched generations, # a bug manifested starting at models facebook/opt-1.3 and larger when running batched generations,
# therefore not using a tiny model, but the smallest model the problem was seen with which is opt-1.3b. # therefore not using a tiny model, but the smallest model the problem was seen with which is opt-1.3b.
......
...@@ -24,7 +24,14 @@ import numpy as np ...@@ -24,7 +24,14 @@ import numpy as np
import requests import requests
from transformers import Owlv2Config, Owlv2TextConfig, Owlv2VisionConfig from transformers import Owlv2Config, Owlv2TextConfig, Owlv2VisionConfig
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
require_vision,
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -869,7 +876,8 @@ class Owlv2ModelIntegrationTest(unittest.TestCase): ...@@ -869,7 +876,8 @@ class Owlv2ModelIntegrationTest(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4)) self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_one_shot_object_detection_fp16(self): def test_inference_one_shot_object_detection_fp16(self):
model_name = "google/owlv2-base-patch16" model_name = "google/owlv2-base-patch16"
model = Owlv2ForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device) model = Owlv2ForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
......
...@@ -24,7 +24,14 @@ import numpy as np ...@@ -24,7 +24,14 @@ import numpy as np
import requests import requests
from transformers import OwlViTConfig, OwlViTTextConfig, OwlViTVisionConfig from transformers import OwlViTConfig, OwlViTTextConfig, OwlViTVisionConfig
from transformers.testing_utils import require_torch, require_torch_gpu, require_vision, slow, torch_device from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
require_vision,
slow,
torch_device,
)
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -860,7 +867,8 @@ class OwlViTModelIntegrationTest(unittest.TestCase): ...@@ -860,7 +867,8 @@ class OwlViTModelIntegrationTest(unittest.TestCase):
self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4)) self.assertTrue(torch.allclose(outputs.target_pred_boxes[0, :3, :3], expected_slice_boxes, atol=1e-4))
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_one_shot_object_detection_fp16(self): def test_inference_one_shot_object_detection_fp16(self):
model_name = "google/owlvit-base-patch32" model_name = "google/owlvit-base-patch32"
model = OwlViTForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device) model = OwlViTForObjectDetection.from_pretrained(model_name, torch_dtype=torch.float16).to(torch_device)
......
...@@ -18,7 +18,14 @@ import tempfile ...@@ -18,7 +18,14 @@ import tempfile
import unittest import unittest
from transformers import PegasusConfig, is_torch_available from transformers import PegasusConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -280,13 +287,13 @@ class PegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi ...@@ -280,13 +287,13 @@ class PegasusModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common() config_and_inputs = self.model_tester.prepare_config_and_inputs_for_common()
self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs) self.model_tester.check_encoder_decoder_model_standalone(*config_and_inputs)
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = PegasusForConditionalGeneration(config).eval().to(torch_device) model = PegasusForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
...@@ -352,6 +359,7 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest): ...@@ -352,6 +359,7 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest):
return AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device) return AutoModelForSeq2SeqLM.from_pretrained(self.checkpoint_name).to(torch_device)
@slow @slow
@require_torch_fp16
def test_pegasus_xsum_summary(self): def test_pegasus_xsum_summary(self):
assert self.tokenizer.model_max_length == 512 assert self.tokenizer.model_max_length == 512
inputs = self.tokenizer(self.src_text, return_tensors="pt", truncation=True, max_length=512, padding=True).to( inputs = self.tokenizer(self.src_text, return_tensors="pt", truncation=True, max_length=512, padding=True).to(
...@@ -362,9 +370,6 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest): ...@@ -362,9 +370,6 @@ class PegasusXSUMIntegrationTest(AbstractSeq2SeqIntegrationTest):
decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True) decoded = self.tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
assert self.tgt_text == decoded assert self.tgt_text == decoded
if "cuda" not in torch_device:
return
# Demonstrate fp16 issue, Contributions welcome!
self.model.half() self.model.half()
translated_tokens_fp16 = self.model.generate(**inputs, max_length=10) translated_tokens_fp16 = self.model.generate(**inputs, max_length=10)
decoded_fp16 = self.tokenizer.batch_decode(translated_tokens_fp16, skip_special_tokens=True) decoded_fp16 = self.tokenizer.batch_decode(translated_tokens_fp16, skip_special_tokens=True)
......
...@@ -21,7 +21,14 @@ import tempfile ...@@ -21,7 +21,14 @@ import tempfile
import unittest import unittest
from transformers import is_torch_available from transformers import is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -274,13 +281,13 @@ class PegasusXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM ...@@ -274,13 +281,13 @@ class PegasusXModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterM
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = PegasusXForConditionalGeneration(config).eval().to(torch_device) model = PegasusXForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -21,7 +21,14 @@ import unittest ...@@ -21,7 +21,14 @@ import unittest
from parameterized import parameterized from parameterized import parameterized
from transformers import PersimmonConfig, is_torch_available, set_seed from transformers import PersimmonConfig, is_torch_available, set_seed
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
backend_empty_cache,
require_torch,
require_torch_accelerator,
require_torch_fp16,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -413,12 +420,13 @@ class PersimmonIntegrationTest(unittest.TestCase): ...@@ -413,12 +420,13 @@ class PersimmonIntegrationTest(unittest.TestCase):
# fmt: on # fmt: on
torch.testing.assert_close(out.cpu()[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5) torch.testing.assert_close(out.cpu()[0, 0, :30], EXPECTED_SLICE, atol=1e-5, rtol=1e-5)
torch.cuda.empty_cache() backend_empty_cache(torch_device)
del model del model
gc.collect() gc.collect()
@slow @slow
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_model_8b_chat_greedy_generation(self): def test_model_8b_chat_greedy_generation(self):
EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion.""" EXPECTED_TEXT_COMPLETION = """human: Simply put, the theory of relativity states that?\n\nadept: The theory of relativity states that the laws of physics are the same for all observers, regardless of their relative motion."""
prompt = "human: Simply put, the theory of relativity states that?\n\nadept:" prompt = "human: Simply put, the theory of relativity states that?\n\nadept:"
...@@ -433,6 +441,6 @@ class PersimmonIntegrationTest(unittest.TestCase): ...@@ -433,6 +441,6 @@ class PersimmonIntegrationTest(unittest.TestCase):
text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
self.assertEqual(EXPECTED_TEXT_COMPLETION, text) self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
torch.cuda.empty_cache() backend_empty_cache(torch_device)
del model del model
gc.collect() gc.collect()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment