Unverified Commit 50378cbf authored by Hz, Ji's avatar Hz, Ji Committed by GitHub
Browse files

device agnostic models testing (#27146)

* device agnostic models testing

* add decorator `require_torch_fp16`

* make style

* apply review suggestion

* Oops, the fp16 decorator was misused
parent 77930f8a
...@@ -20,7 +20,14 @@ import tempfile ...@@ -20,7 +20,14 @@ import tempfile
import unittest import unittest
from transformers import PLBartConfig, is_torch_available from transformers import PLBartConfig, is_torch_available
from transformers.testing_utils import require_sentencepiece, require_tokenizers, require_torch, slow, torch_device from transformers.testing_utils import (
require_sentencepiece,
require_tokenizers,
require_torch,
require_torch_fp16,
slow,
torch_device,
)
from transformers.utils import cached_property from transformers.utils import cached_property
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
...@@ -304,13 +311,13 @@ class PLBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix ...@@ -304,13 +311,13 @@ class PLBartModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMix
with torch.no_grad(): with torch.no_grad():
model(**inputs)[0] model(**inputs)[0]
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_ids = input_dict["input_ids"] input_ids = input_dict["input_ids"]
attention_mask = input_ids.ne(1).to(torch_device) attention_mask = input_ids.ne(1).to(torch_device)
model = PLBartForConditionalGeneration(config).eval().to(torch_device) model = PLBartForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": model.half()
model.half()
model.generate(input_ids, attention_mask=attention_mask) model.generate(input_ids, attention_mask=attention_mask)
model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -23,7 +23,8 @@ from transformers.models.auto import get_values ...@@ -23,7 +23,8 @@ from transformers.models.auto import get_values
from transformers.testing_utils import ( from transformers.testing_utils import (
require_accelerate, require_accelerate,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_fp16,
slow, slow,
torch_device, torch_device,
) )
...@@ -318,7 +319,8 @@ class PvtModelIntegrationTest(unittest.TestCase): ...@@ -318,7 +319,8 @@ class PvtModelIntegrationTest(unittest.TestCase):
@slow @slow
@require_accelerate @require_accelerate
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_fp16(self): def test_inference_fp16(self):
r""" r"""
A small test to make sure that inference work in half precision without any problem. A small test to make sure that inference work in half precision without any problem.
......
...@@ -22,7 +22,7 @@ import unittest ...@@ -22,7 +22,7 @@ import unittest
import requests import requests
from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
from transformers.testing_utils import require_torch, slow, torch_device from transformers.testing_utils import backend_empty_cache, require_torch, slow, torch_device
from transformers.utils import is_torch_available, is_vision_available from transformers.utils import is_torch_available, is_vision_available
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -478,7 +478,7 @@ class SamModelIntegrationTest(unittest.TestCase): ...@@ -478,7 +478,7 @@ class SamModelIntegrationTest(unittest.TestCase):
super().tearDown() super().tearDown()
# clean-up as much as possible GPU memory occupied by PyTorch # clean-up as much as possible GPU memory occupied by PyTorch
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_inference_mask_generation_no_point(self): def test_inference_mask_generation_no_point(self):
model = SamModel.from_pretrained("facebook/sam-vit-base") model = SamModel.from_pretrained("facebook/sam-vit-base")
...@@ -772,9 +772,7 @@ class SamModelIntegrationTest(unittest.TestCase): ...@@ -772,9 +772,7 @@ class SamModelIntegrationTest(unittest.TestCase):
torch.testing.assert_allclose(iou_scores, EXPECTED_IOU, atol=1e-4, rtol=1e-4) torch.testing.assert_allclose(iou_scores, EXPECTED_IOU, atol=1e-4, rtol=1e-4)
def test_dummy_pipeline_generation(self): def test_dummy_pipeline_generation(self):
generator = pipeline( generator = pipeline("mask-generation", model="facebook/sam-vit-base", device=torch_device)
"mask-generation", model="facebook/sam-vit-base", device=0 if torch.cuda.is_available() else -1
)
raw_image = prepare_image() raw_image = prepare_image()
_ = generator(raw_image, points_per_batch=64) _ = generator(raw_image, points_per_batch=64)
...@@ -26,6 +26,7 @@ from transformers.testing_utils import ( ...@@ -26,6 +26,7 @@ from transformers.testing_utils import (
require_sentencepiece, require_sentencepiece,
require_tokenizers, require_tokenizers,
require_torch, require_torch,
require_torch_fp16,
require_torchaudio, require_torchaudio,
slow, slow,
torch_device, torch_device,
...@@ -336,14 +337,14 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest ...@@ -336,14 +337,14 @@ class Speech2TextModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTest
def test_training_gradient_checkpointing_use_reentrant_false(self): def test_training_gradient_checkpointing_use_reentrant_false(self):
pass pass
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
input_features = input_dict["input_features"] input_features = input_dict["input_features"]
attention_mask = input_dict["attention_mask"] attention_mask = input_dict["attention_mask"]
model = Speech2TextForConditionalGeneration(config).eval().to(torch_device) model = Speech2TextForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": input_features = input_features.half()
input_features = input_features.half() model.half()
model.half()
model.generate(input_features, attention_mask=attention_mask) model.generate(input_features, attention_mask=attention_mask)
model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -19,7 +19,14 @@ import tempfile ...@@ -19,7 +19,14 @@ import tempfile
import unittest import unittest
from transformers import SwitchTransformersConfig, is_torch_available from transformers import SwitchTransformersConfig, is_torch_available
from transformers.testing_utils import require_tokenizers, require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
require_tokenizers,
require_torch,
require_torch_accelerator,
require_torch_bf16,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -1017,7 +1024,8 @@ class SwitchTransformerRouterTest(unittest.TestCase): ...@@ -1017,7 +1024,8 @@ class SwitchTransformerRouterTest(unittest.TestCase):
@require_torch @require_torch
@require_tokenizers @require_tokenizers
class SwitchTransformerModelIntegrationTests(unittest.TestCase): class SwitchTransformerModelIntegrationTests(unittest.TestCase):
@require_torch_gpu @require_torch_accelerator
@require_torch_bf16
def test_small_logits(self): def test_small_logits(self):
r""" r"""
Logits testing to check implementation consistency between `t5x` implementation Logits testing to check implementation consistency between `t5x` implementation
......
...@@ -22,7 +22,8 @@ from transformers import ViTConfig ...@@ -22,7 +22,8 @@ from transformers import ViTConfig
from transformers.testing_utils import ( from transformers.testing_utils import (
require_accelerate, require_accelerate,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_fp16,
require_vision, require_vision,
slow, slow,
torch_device, torch_device,
...@@ -316,7 +317,8 @@ class ViTModelIntegrationTest(unittest.TestCase): ...@@ -316,7 +317,8 @@ class ViTModelIntegrationTest(unittest.TestCase):
@slow @slow
@require_accelerate @require_accelerate
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_inference_fp16(self): def test_inference_fp16(self):
r""" r"""
A small test to make sure that inference work in half precision without any problem. A small test to make sure that inference work in half precision without any problem.
......
...@@ -29,6 +29,7 @@ from datasets import load_dataset ...@@ -29,6 +29,7 @@ from datasets import load_dataset
from transformers import Wav2Vec2Config, is_torch_available from transformers import Wav2Vec2Config, is_torch_available
from transformers.testing_utils import ( from transformers.testing_utils import (
CaptureLogger, CaptureLogger,
backend_empty_cache,
is_pt_flax_cross_test, is_pt_flax_cross_test,
is_pyctcdecode_available, is_pyctcdecode_available,
is_torchaudio_available, is_torchaudio_available,
...@@ -1455,7 +1456,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase): ...@@ -1455,7 +1456,7 @@ class Wav2Vec2ModelIntegrationTest(unittest.TestCase):
super().tearDown() super().tearDown()
# clean-up as much as possible GPU memory occupied by PyTorch # clean-up as much as possible GPU memory occupied by PyTorch
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def _load_datasamples(self, num_samples): def _load_datasamples(self, num_samples):
ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
......
...@@ -21,7 +21,14 @@ import numpy as np ...@@ -21,7 +21,14 @@ import numpy as np
from datasets import load_dataset from datasets import load_dataset
from transformers import Wav2Vec2ConformerConfig, is_torch_available from transformers import Wav2Vec2ConformerConfig, is_torch_available
from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
is_pt_flax_cross_test,
require_torch,
require_torch_accelerator,
require_torch_fp16,
slow,
torch_device,
)
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
from ...test_modeling_common import ( from ...test_modeling_common import (
...@@ -468,12 +475,14 @@ class Wav2Vec2ConformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest ...@@ -468,12 +475,14 @@ class Wav2Vec2ConformerModelTest(ModelTesterMixin, PipelineTesterMixin, unittest
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs) self.model_tester.create_and_check_model_with_adapter_proj_dim(*config_and_inputs)
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_model_float16_with_relative(self): def test_model_float16_with_relative(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative") config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="relative")
self.model_tester.create_and_check_model_float16(*config_and_inputs) self.model_tester.create_and_check_model_float16(*config_and_inputs)
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_model_float16_with_rotary(self): def test_model_float16_with_rotary(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="rotary") config_and_inputs = self.model_tester.prepare_config_and_inputs(position_embeddings_type="rotary")
self.model_tester.create_and_check_model_float16(*config_and_inputs) self.model_tester.create_and_check_model_float16(*config_and_inputs)
......
...@@ -24,7 +24,14 @@ import numpy as np ...@@ -24,7 +24,14 @@ import numpy as np
import transformers import transformers
from transformers import WhisperConfig from transformers import WhisperConfig
from transformers.testing_utils import is_pt_flax_cross_test, require_torch, require_torchaudio, slow, torch_device from transformers.testing_utils import (
is_pt_flax_cross_test,
require_torch,
require_torch_fp16,
require_torchaudio,
slow,
torch_device,
)
from transformers.utils import cached_property, is_flax_available, is_torch_available from transformers.utils import cached_property, is_flax_available, is_torch_available
from transformers.utils.import_utils import is_datasets_available from transformers.utils.import_utils import is_datasets_available
...@@ -429,14 +436,14 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi ...@@ -429,14 +436,14 @@ class WhisperModelTest(ModelTesterMixin, GenerationTesterMixin, PipelineTesterMi
def test_generate_with_head_masking(self): def test_generate_with_head_masking(self):
pass pass
@require_torch_fp16
def test_generate_fp16(self): def test_generate_fp16(self):
config, input_dict = self.model_tester.prepare_config_and_inputs() config, input_dict = self.model_tester.prepare_config_and_inputs()
config.max_target_positions = 400 config.max_target_positions = 400
input_features = input_dict["input_features"] input_features = input_dict["input_features"]
model = WhisperForConditionalGeneration(config).eval().to(torch_device) model = WhisperForConditionalGeneration(config).eval().to(torch_device)
if torch_device == "cuda": input_features = input_features.half()
input_features = input_features.half() model.half()
model.half()
model.generate(input_features) model.generate(input_features)
model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3) model.generate(input_features, num_beams=4, do_sample=True, early_stopping=False, num_return_sequences=3)
......
...@@ -19,7 +19,13 @@ import math ...@@ -19,7 +19,13 @@ import math
import unittest import unittest
from transformers import XGLMConfig, is_torch_available from transformers import XGLMConfig, is_torch_available
from transformers.testing_utils import require_torch, require_torch_gpu, slow, torch_device from transformers.testing_utils import (
require_torch,
require_torch_accelerator,
require_torch_fp16,
slow,
torch_device,
)
from ...generation.test_utils import GenerationTesterMixin from ...generation.test_utils import GenerationTesterMixin
from ...test_configuration_common import ConfigTester from ...test_configuration_common import ConfigTester
...@@ -492,7 +498,8 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase): ...@@ -492,7 +498,8 @@ class XGLMModelLanguageGenerationTest(unittest.TestCase):
duration = datetime.datetime.now() - start duration = datetime.datetime.now() - start
self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME)) self.assertGreater(duration, datetime.timedelta(seconds=1.25 * MAX_TIME))
@require_torch_gpu @require_torch_accelerator
@require_torch_fp16
def test_batched_nan_fp16(self): def test_batched_nan_fp16(self):
model_name = "facebook/xglm-564M" model_name = "facebook/xglm-564M"
tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left") tokenizer = XGLMTokenizer.from_pretrained(model_name, use_fast=False, padding_side="left")
......
...@@ -44,8 +44,8 @@ from transformers.testing_utils import ( ...@@ -44,8 +44,8 @@ from transformers.testing_utils import (
require_accelerate, require_accelerate,
require_safetensors, require_safetensors,
require_torch, require_torch,
require_torch_gpu, require_torch_accelerator,
require_torch_multi_gpu, require_torch_multi_accelerator,
require_usr_bin_time, require_usr_bin_time,
slow, slow,
torch_device, torch_device,
...@@ -681,7 +681,7 @@ class ModelUtilsTest(TestCasePlus): ...@@ -681,7 +681,7 @@ class ModelUtilsTest(TestCasePlus):
@require_accelerate @require_accelerate
@mark.accelerate_tests @mark.accelerate_tests
@require_torch_multi_gpu @require_torch_multi_accelerator
@slow @slow
def test_model_parallelism_gpt2(self): def test_model_parallelism_gpt2(self):
device_map = {"transformer.wte": 0, "transformer.wpe": 0, "lm_head": 0, "transformer.ln_f": 1} device_map = {"transformer.wte": 0, "transformer.wpe": 0, "lm_head": 0, "transformer.ln_f": 1}
...@@ -699,7 +699,7 @@ class ModelUtilsTest(TestCasePlus): ...@@ -699,7 +699,7 @@ class ModelUtilsTest(TestCasePlus):
@require_accelerate @require_accelerate
@mark.accelerate_tests @mark.accelerate_tests
@require_torch_gpu @require_torch_accelerator
def test_from_pretrained_disk_offload_task_model(self): def test_from_pretrained_disk_offload_task_model(self):
model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2") model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")
device_map = { device_map = {
...@@ -1036,7 +1036,7 @@ class ModelUtilsTest(TestCasePlus): ...@@ -1036,7 +1036,7 @@ class ModelUtilsTest(TestCasePlus):
opt_fn(input_ids) opt_fn(input_ids)
self.assertEqual(compile_counter.frame_count, 0) self.assertEqual(compile_counter.frame_count, 0)
@require_torch_gpu @require_torch_accelerator
@slow @slow
def test_pretrained_low_mem_new_config(self): def test_pretrained_low_mem_new_config(self):
# Checking for 1 model(the same one which was described in the issue) . # Checking for 1 model(the same one which was described in the issue) .
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment