Unverified Commit 15ad97f7 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[tests] make cuda only tests device-agnostic (#11058)

* enable bnb on xpu

* add 2 more cases

* add missing change

* add missing change

* add one more

* enable cuda only tests on xpu

* enable big gpu cases
parent 9f2d5c9e
...@@ -449,9 +449,9 @@ class TextualInversionLoaderMixin: ...@@ -449,9 +449,9 @@ class TextualInversionLoaderMixin:
# 7.5 Offload the model again # 7.5 Offload the model again
if is_model_cpu_offload: if is_model_cpu_offload:
self.enable_model_cpu_offload() self.enable_model_cpu_offload(device=device)
elif is_sequential_cpu_offload: elif is_sequential_cpu_offload:
self.enable_sequential_cpu_offload() self.enable_sequential_cpu_offload(device=device)
# / Unsafe Code > # / Unsafe Code >
......
...@@ -320,6 +320,21 @@ def require_torch_multi_gpu(test_case): ...@@ -320,6 +320,21 @@ def require_torch_multi_gpu(test_case):
return unittest.skipUnless(torch.cuda.device_count() > 1, "test requires multiple GPUs")(test_case) return unittest.skipUnless(torch.cuda.device_count() > 1, "test requires multiple GPUs")(test_case)
def require_torch_multi_accelerator(test_case):
"""
Decorator marking a test that requires a multi-accelerator setup (in PyTorch). These tests are skipped on a machine
without multiple hardware accelerators.
"""
if not is_torch_available():
return unittest.skip("test requires PyTorch")(test_case)
import torch
return unittest.skipUnless(
torch.cuda.device_count() > 1 or torch.xpu.device_count() > 1, "test requires multiple hardware accelerators"
)(test_case)
def require_torch_accelerator_with_fp16(test_case): def require_torch_accelerator_with_fp16(test_case):
"""Decorator marking a test that requires an accelerator with support for the FP16 data type.""" """Decorator marking a test that requires an accelerator with support for the FP16 data type."""
return unittest.skipUnless(_is_torch_fp16_available(torch_device), "test requires accelerator with fp16 support")( return unittest.skipUnless(_is_torch_fp16_available(torch_device), "test requires accelerator with fp16 support")(
...@@ -354,6 +369,31 @@ def require_big_gpu_with_torch_cuda(test_case): ...@@ -354,6 +369,31 @@ def require_big_gpu_with_torch_cuda(test_case):
)(test_case) )(test_case)
def require_big_accelerator(test_case):
"""
Decorator marking a test that requires a bigger hardware accelerator (24GB) for execution. Some example pipelines:
Flux, SD3, Cog, etc.
"""
if not is_torch_available():
return unittest.skip("test requires PyTorch")(test_case)
import torch
if not (torch.cuda.is_available() or torch.xpu.is_available()):
return unittest.skip("test requires PyTorch CUDA")(test_case)
if torch.xpu.is_available():
device_properties = torch.xpu.get_device_properties(0)
else:
device_properties = torch.cuda.get_device_properties(0)
total_memory = device_properties.total_memory / (1024**3)
return unittest.skipUnless(
total_memory >= BIG_GPU_MEMORY,
f"test requires a hardware accelerator with at least {BIG_GPU_MEMORY} GB memory",
)(test_case)
def require_torch_accelerator_with_training(test_case): def require_torch_accelerator_with_training(test_case):
"""Decorator marking a test that requires an accelerator with support for training.""" """Decorator marking a test that requires an accelerator with support for training."""
return unittest.skipUnless( return unittest.skipUnless(
......
...@@ -124,7 +124,7 @@ class AsymmetricAutoencoderKLIntegrationTests(unittest.TestCase): ...@@ -124,7 +124,7 @@ class AsymmetricAutoencoderKLIntegrationTests(unittest.TestCase):
return model return model
def get_generator(self, seed=0): def get_generator(self, seed=0):
generator_device = "cpu" if not torch_device.startswith("cuda") else "cuda" generator_device = "cpu" if not torch_device.startswith(torch_device) else torch_device
if torch_device != "mps": if torch_device != "mps":
return torch.Generator(device=generator_device).manual_seed(seed) return torch.Generator(device=generator_device).manual_seed(seed)
return torch.manual_seed(seed) return torch.manual_seed(seed)
......
...@@ -165,7 +165,7 @@ class AutoencoderKLTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase): ...@@ -165,7 +165,7 @@ class AutoencoderKLTests(ModelTesterMixin, UNetTesterMixin, unittest.TestCase):
model.eval() model.eval()
# Keep generator on CPU for non-CUDA devices to compare outputs with CPU result tensors # Keep generator on CPU for non-CUDA devices to compare outputs with CPU result tensors
generator_device = "cpu" if not torch_device.startswith("cuda") else "cuda" generator_device = "cpu" if not torch_device.startswith(torch_device) else torch_device
if torch_device != "mps": if torch_device != "mps":
generator = torch.Generator(device=generator_device).manual_seed(0) generator = torch.Generator(device=generator_device).manual_seed(0)
else: else:
...@@ -263,7 +263,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase): ...@@ -263,7 +263,7 @@ class AutoencoderKLIntegrationTests(unittest.TestCase):
return model return model
def get_generator(self, seed=0): def get_generator(self, seed=0):
generator_device = "cpu" if not torch_device.startswith("cuda") else "cuda" generator_device = "cpu" if not torch_device.startswith(torch_device) else torch_device
if torch_device != "mps": if torch_device != "mps":
return torch.Generator(device=generator_device).manual_seed(seed) return torch.Generator(device=generator_device).manual_seed(seed)
return torch.manual_seed(seed) return torch.manual_seed(seed)
......
...@@ -183,7 +183,7 @@ class AutoencoderOobleckIntegrationTests(unittest.TestCase): ...@@ -183,7 +183,7 @@ class AutoencoderOobleckIntegrationTests(unittest.TestCase):
return model return model
def get_generator(self, seed=0): def get_generator(self, seed=0):
generator_device = "cpu" if not torch_device.startswith("cuda") else "cuda" generator_device = "cpu" if not torch_device.startswith(torch_device) else torch_device
if torch_device != "mps": if torch_device != "mps":
return torch.Generator(device=generator_device).manual_seed(seed) return torch.Generator(device=generator_device).manual_seed(seed)
return torch.manual_seed(seed) return torch.manual_seed(seed)
......
...@@ -63,7 +63,7 @@ from diffusers.utils.testing_utils import ( ...@@ -63,7 +63,7 @@ from diffusers.utils.testing_utils import (
require_torch_accelerator, require_torch_accelerator,
require_torch_accelerator_with_training, require_torch_accelerator_with_training,
require_torch_gpu, require_torch_gpu,
require_torch_multi_gpu, require_torch_multi_accelerator,
run_test_in_subprocess, run_test_in_subprocess,
torch_all_close, torch_all_close,
torch_device, torch_device,
...@@ -1227,7 +1227,7 @@ class ModelTesterMixin: ...@@ -1227,7 +1227,7 @@ class ModelTesterMixin:
self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5)) self.assertTrue(torch.allclose(base_output[0], new_output[0], atol=1e-5))
@require_torch_multi_gpu @require_torch_multi_accelerator
def test_model_parallelism(self): def test_model_parallelism(self):
config, inputs_dict = self.prepare_init_args_and_inputs_for_common() config, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**config).eval() model = self.model_class(**config).eval()
......
...@@ -31,9 +31,10 @@ from diffusers import ( ...@@ -31,9 +31,10 @@ from diffusers import (
from diffusers.models import SD3ControlNetModel, SD3MultiControlNetModel from diffusers.models import SD3ControlNetModel, SD3MultiControlNetModel
from diffusers.utils import load_image from diffusers.utils import load_image
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -219,7 +220,7 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes ...@@ -219,7 +220,7 @@ class StableDiffusion3ControlNetPipelineFastTests(unittest.TestCase, PipelineTes
@slow @slow
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipeline_class = StableDiffusion3ControlNetPipeline pipeline_class = StableDiffusion3ControlNetPipeline
...@@ -227,12 +228,12 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -227,12 +228,12 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_canny(self): def test_canny(self):
controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny", torch_dtype=torch.float16) controlnet = SD3ControlNetModel.from_pretrained("InstantX/SD3-Controlnet-Canny", torch_dtype=torch.float16)
...@@ -272,7 +273,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -272,7 +273,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained( pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16 "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
...@@ -304,7 +305,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -304,7 +305,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained( pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16 "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
...@@ -338,7 +339,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -338,7 +339,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained( pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16 "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
......
...@@ -12,7 +12,7 @@ from diffusers.utils.testing_utils import ( ...@@ -12,7 +12,7 @@ from diffusers.utils.testing_utils import (
backend_empty_cache, backend_empty_cache,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -204,7 +204,7 @@ class FluxPipelineFastTests( ...@@ -204,7 +204,7 @@ class FluxPipelineFastTests(
@nightly @nightly
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class FluxPipelineSlowTests(unittest.TestCase): class FluxPipelineSlowTests(unittest.TestCase):
pipeline_class = FluxPipeline pipeline_class = FluxPipeline
...@@ -292,7 +292,7 @@ class FluxPipelineSlowTests(unittest.TestCase): ...@@ -292,7 +292,7 @@ class FluxPipelineSlowTests(unittest.TestCase):
@slow @slow
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class FluxIPAdapterPipelineSlowTests(unittest.TestCase): class FluxIPAdapterPipelineSlowTests(unittest.TestCase):
pipeline_class = FluxPipeline pipeline_class = FluxPipeline
...@@ -304,12 +304,12 @@ class FluxIPAdapterPipelineSlowTests(unittest.TestCase): ...@@ -304,12 +304,12 @@ class FluxIPAdapterPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0): def get_inputs(self, device, seed=0):
if str(device).startswith("mps"): if str(device).startswith("mps"):
......
...@@ -8,15 +8,16 @@ import torch ...@@ -8,15 +8,16 @@ import torch
from diffusers import FluxPipeline, FluxPriorReduxPipeline from diffusers import FluxPipeline, FluxPriorReduxPipeline
from diffusers.utils import load_image from diffusers.utils import load_image
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_accelerator,
slow, slow,
torch_device, torch_device,
) )
@slow @slow
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class FluxReduxSlowTests(unittest.TestCase): class FluxReduxSlowTests(unittest.TestCase):
pipeline_class = FluxPriorReduxPipeline pipeline_class = FluxPriorReduxPipeline
...@@ -27,12 +28,12 @@ class FluxReduxSlowTests(unittest.TestCase): ...@@ -27,12 +28,12 @@ class FluxReduxSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0): def get_inputs(self, device, seed=0):
init_image = load_image( init_image = load_image(
...@@ -59,7 +60,7 @@ class FluxReduxSlowTests(unittest.TestCase): ...@@ -59,7 +60,7 @@ class FluxReduxSlowTests(unittest.TestCase):
self.base_repo_id, torch_dtype=torch.bfloat16, text_encoder=None, text_encoder_2=None self.base_repo_id, torch_dtype=torch.bfloat16, text_encoder=None, text_encoder_2=None
) )
pipe_redux.to(torch_device) pipe_redux.to(torch_device)
pipe_base.enable_model_cpu_offload() pipe_base.enable_model_cpu_offload(device=torch_device)
inputs = self.get_inputs(torch_device) inputs = self.get_inputs(torch_device)
base_pipeline_inputs = self.get_base_pipeline_inputs(torch_device) base_pipeline_inputs = self.get_base_pipeline_inputs(torch_device)
......
...@@ -262,7 +262,7 @@ class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase): ...@@ -262,7 +262,7 @@ class StableDiffusion3PAGImg2ImgPipelineIntegrationTests(unittest.TestCase):
pipeline = AutoPipelineForImage2Image.from_pretrained( pipeline = AutoPipelineForImage2Image.from_pretrained(
self.repo_id, enable_pag=True, torch_dtype=torch.float16, pag_applied_layers=["blocks.(4|17)"] self.repo_id, enable_pag=True, torch_dtype=torch.float16, pag_applied_layers=["blocks.(4|17)"]
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.set_progress_bar_config(disable=None) pipeline.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device, guidance_scale=0.0, pag_scale=1.8) inputs = self.get_inputs(torch_device, guidance_scale=0.0, pag_scale=1.8)
......
...@@ -57,7 +57,7 @@ from diffusers.utils.testing_utils import ( ...@@ -57,7 +57,7 @@ from diffusers.utils.testing_utils import (
require_accelerate_version_greater, require_accelerate_version_greater,
require_torch_2, require_torch_2,
require_torch_accelerator, require_torch_accelerator,
require_torch_multi_gpu, require_torch_multi_accelerator,
run_test_in_subprocess, run_test_in_subprocess,
skip_mps, skip_mps,
slow, slow,
...@@ -1409,7 +1409,7 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase): ...@@ -1409,7 +1409,7 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase):
# (sayakpaul): This test suite was run in the DGX with two GPUs (1, 2). # (sayakpaul): This test suite was run in the DGX with two GPUs (1, 2).
@slow @slow
@require_torch_multi_gpu @require_torch_multi_accelerator
@require_accelerate_version_greater("0.27.0") @require_accelerate_version_greater("0.27.0")
class StableDiffusionPipelineDeviceMapTests(unittest.TestCase): class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
def tearDown(self): def tearDown(self):
...@@ -1497,7 +1497,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase): ...@@ -1497,7 +1497,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
assert sd_pipe_with_device_map.hf_device_map is None assert sd_pipe_with_device_map.hf_device_map is None
# Make sure `to()` can be used and the pipeline can be called. # Make sure `to()` can be used and the pipeline can be called.
pipe = sd_pipe_with_device_map.to("cuda") pipe = sd_pipe_with_device_map.to(torch_device)
_ = pipe("hello", num_inference_steps=2) _ = pipe("hello", num_inference_steps=2)
def test_reset_device_map_enable_model_cpu_offload(self): def test_reset_device_map_enable_model_cpu_offload(self):
...@@ -1509,7 +1509,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase): ...@@ -1509,7 +1509,7 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
assert sd_pipe_with_device_map.hf_device_map is None assert sd_pipe_with_device_map.hf_device_map is None
# Make sure `enable_model_cpu_offload()` can be used and the pipeline can be called. # Make sure `enable_model_cpu_offload()` can be used and the pipeline can be called.
sd_pipe_with_device_map.enable_model_cpu_offload() sd_pipe_with_device_map.enable_model_cpu_offload(device=torch_device)
_ = sd_pipe_with_device_map("hello", num_inference_steps=2) _ = sd_pipe_with_device_map("hello", num_inference_steps=2)
def test_reset_device_map_enable_sequential_cpu_offload(self): def test_reset_device_map_enable_sequential_cpu_offload(self):
...@@ -1521,5 +1521,5 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase): ...@@ -1521,5 +1521,5 @@ class StableDiffusionPipelineDeviceMapTests(unittest.TestCase):
assert sd_pipe_with_device_map.hf_device_map is None assert sd_pipe_with_device_map.hf_device_map is None
# Make sure `enable_sequential_cpu_offload()` can be used and the pipeline can be called. # Make sure `enable_sequential_cpu_offload()` can be used and the pipeline can be called.
sd_pipe_with_device_map.enable_sequential_cpu_offload() sd_pipe_with_device_map.enable_sequential_cpu_offload(device=torch_device)
_ = sd_pipe_with_device_map("hello", num_inference_steps=2) _ = sd_pipe_with_device_map("hello", num_inference_steps=2)
...@@ -10,7 +10,7 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transfo ...@@ -10,7 +10,7 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, SD3Transfo
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache, backend_empty_cache,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -232,7 +232,7 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin): ...@@ -232,7 +232,7 @@ class StableDiffusion3PipelineFastTests(unittest.TestCase, PipelineTesterMixin):
@slow @slow
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class StableDiffusion3PipelineSlowTests(unittest.TestCase): class StableDiffusion3PipelineSlowTests(unittest.TestCase):
pipeline_class = StableDiffusion3Pipeline pipeline_class = StableDiffusion3Pipeline
......
...@@ -18,7 +18,7 @@ from diffusers.utils.testing_utils import ( ...@@ -18,7 +18,7 @@ from diffusers.utils.testing_utils import (
backend_empty_cache, backend_empty_cache,
floats_tensor, floats_tensor,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -166,7 +166,7 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte ...@@ -166,7 +166,7 @@ class StableDiffusion3Img2ImgPipelineFastTests(PipelineLatentTesterMixin, unitte
@slow @slow
@require_big_gpu_with_torch_cuda @require_big_accelerator
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase): class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
pipeline_class = StableDiffusion3Img2ImgPipeline pipeline_class = StableDiffusion3Img2ImgPipeline
...@@ -202,11 +202,10 @@ class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase): ...@@ -202,11 +202,10 @@ class StableDiffusion3Img2ImgPipelineSlowTests(unittest.TestCase):
} }
def test_sd3_img2img_inference(self): def test_sd3_img2img_inference(self):
torch.manual_seed(0)
pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16) pipe = self.pipeline_class.from_pretrained(self.repo_id, torch_dtype=torch.float16)
pipe.enable_model_cpu_offload(device=torch_device) pipe.enable_model_cpu_offload(device=torch_device)
inputs = self.get_inputs(torch_device) inputs = self.get_inputs(torch_device)
image = pipe(**inputs).images[0] image = pipe(**inputs).images[0]
image_slice = image[0, :10, :10] image_slice = image[0, :10, :10]
expected_slice = np.array( expected_slice = np.array(
......
...@@ -45,6 +45,7 @@ from diffusers.utils.import_utils import is_xformers_available ...@@ -45,6 +45,7 @@ from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.source_code_parsing_utils import ReturnNameVisitor from diffusers.utils.source_code_parsing_utils import ReturnNameVisitor
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
backend_empty_cache,
require_accelerate_version_greater, require_accelerate_version_greater,
require_accelerator, require_accelerator,
require_hf_hub_version_greater, require_hf_hub_version_greater,
...@@ -1108,13 +1109,13 @@ class PipelineTesterMixin: ...@@ -1108,13 +1109,13 @@ class PipelineTesterMixin:
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test in case of CUDA runtime errors # clean up the VRAM after each test in case of CUDA runtime errors
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_save_load_local(self, expected_max_difference=5e-4): def test_save_load_local(self, expected_max_difference=5e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -1423,7 +1424,6 @@ class PipelineTesterMixin: ...@@ -1423,7 +1424,6 @@ class PipelineTesterMixin:
def test_save_load_optional_components(self, expected_max_difference=1e-4): def test_save_load_optional_components(self, expected_max_difference=1e-4):
if not hasattr(self.pipeline_class, "_optional_components"): if not hasattr(self.pipeline_class, "_optional_components"):
return return
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values(): for component in pipe.components.values():
...@@ -1438,6 +1438,7 @@ class PipelineTesterMixin: ...@@ -1438,6 +1438,7 @@ class PipelineTesterMixin:
generator_device = "cpu" generator_device = "cpu"
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output = pipe(**inputs)[0] output = pipe(**inputs)[0]
with tempfile.TemporaryDirectory() as tmpdir: with tempfile.TemporaryDirectory() as tmpdir:
...@@ -1456,6 +1457,7 @@ class PipelineTesterMixin: ...@@ -1456,6 +1457,7 @@ class PipelineTesterMixin:
) )
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output_loaded = pipe_loaded(**inputs)[0] output_loaded = pipe_loaded(**inputs)[0]
max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
...@@ -1550,12 +1552,14 @@ class PipelineTesterMixin: ...@@ -1550,12 +1552,14 @@ class PipelineTesterMixin:
generator_device = "cpu" generator_device = "cpu"
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_sequential_cpu_offload(device=torch_device) pipe.enable_sequential_cpu_offload(device=torch_device)
assert pipe._execution_device.type == torch_device assert pipe._execution_device.type == torch_device
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
...@@ -1613,12 +1617,14 @@ class PipelineTesterMixin: ...@@ -1613,12 +1617,14 @@ class PipelineTesterMixin:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_model_cpu_offload(device=torch_device) pipe.enable_model_cpu_offload(device=torch_device)
assert pipe._execution_device.type == torch_device assert pipe._execution_device.type == torch_device
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
torch.manual_seed(0)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
......
...@@ -303,6 +303,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -303,6 +303,7 @@ class UnCLIPPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler() shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler()
) )
shape = (batch_size, decoder.config.in_channels, decoder.config.sample_size, decoder.config.sample_size) shape = (batch_size, decoder.config.in_channels, decoder.config.sample_size, decoder.config.sample_size)
generator = torch.Generator(device=device).manual_seed(0)
decoder_latents = pipe.prepare_latents( decoder_latents = pipe.prepare_latents(
shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler() shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler()
) )
......
...@@ -407,6 +407,7 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -407,6 +407,7 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
pipe.super_res_first.config.sample_size, pipe.super_res_first.config.sample_size,
pipe.super_res_first.config.sample_size, pipe.super_res_first.config.sample_size,
) )
generator = torch.Generator(device=device).manual_seed(0)
super_res_latents = pipe.prepare_latents( super_res_latents = pipe.prepare_latents(
shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler() shape, dtype=dtype, device=device, generator=generator, latents=None, scheduler=DummyScheduler()
) )
......
...@@ -64,7 +64,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest): ...@@ -64,7 +64,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest):
if torch_device in ["mps"]: if torch_device in ["mps"]:
assert abs(result_sum.item() - 167.47821044921875) < 1e-2 assert abs(result_sum.item() - 167.47821044921875) < 1e-2
assert abs(result_mean.item() - 0.2178705964565277) < 1e-3 assert abs(result_mean.item() - 0.2178705964565277) < 1e-3
elif torch_device in ["cuda"]: elif torch_device in ["cuda", "xpu"]:
assert abs(result_sum.item() - 171.59352111816406) < 1e-2 assert abs(result_sum.item() - 171.59352111816406) < 1e-2
assert abs(result_mean.item() - 0.22342906892299652) < 1e-3 assert abs(result_mean.item() - 0.22342906892299652) < 1e-3
else: else:
...@@ -96,7 +96,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest): ...@@ -96,7 +96,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest):
if torch_device in ["mps"]: if torch_device in ["mps"]:
assert abs(result_sum.item() - 124.77149200439453) < 1e-2 assert abs(result_sum.item() - 124.77149200439453) < 1e-2
assert abs(result_mean.item() - 0.16226289014816284) < 1e-3 assert abs(result_mean.item() - 0.16226289014816284) < 1e-3
elif torch_device in ["cuda"]: elif torch_device in ["cuda", "xpu"]:
assert abs(result_sum.item() - 128.1663360595703) < 1e-2 assert abs(result_sum.item() - 128.1663360595703) < 1e-2
assert abs(result_mean.item() - 0.16688326001167297) < 1e-3 assert abs(result_mean.item() - 0.16688326001167297) < 1e-3
else: else:
...@@ -127,7 +127,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest): ...@@ -127,7 +127,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest):
if torch_device in ["mps"]: if torch_device in ["mps"]:
assert abs(result_sum.item() - 167.46957397460938) < 1e-2 assert abs(result_sum.item() - 167.46957397460938) < 1e-2
assert abs(result_mean.item() - 0.21805934607982635) < 1e-3 assert abs(result_mean.item() - 0.21805934607982635) < 1e-3
elif torch_device in ["cuda"]: elif torch_device in ["cuda", "xpu"]:
assert abs(result_sum.item() - 171.59353637695312) < 1e-2 assert abs(result_sum.item() - 171.59353637695312) < 1e-2
assert abs(result_mean.item() - 0.22342908382415771) < 1e-3 assert abs(result_mean.item() - 0.22342908382415771) < 1e-3
else: else:
...@@ -159,7 +159,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest): ...@@ -159,7 +159,7 @@ class DPMSolverSDESchedulerTest(SchedulerCommonTest):
if torch_device in ["mps"]: if torch_device in ["mps"]:
assert abs(result_sum.item() - 176.66974135742188) < 1e-2 assert abs(result_sum.item() - 176.66974135742188) < 1e-2
assert abs(result_mean.item() - 0.23003872730981811) < 1e-2 assert abs(result_mean.item() - 0.23003872730981811) < 1e-2
elif torch_device in ["cuda"]: elif torch_device in ["cuda", "xpu"]:
assert abs(result_sum.item() - 177.63653564453125) < 1e-2 assert abs(result_sum.item() - 177.63653564453125) < 1e-2
assert abs(result_mean.item() - 0.23003872730981811) < 1e-2 assert abs(result_mean.item() - 0.23003872730981811) < 1e-2
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment