Unverified Commit 64b3e0f5 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

make `pipelines` tests device-agnostic (part1) (#9399)



* enable on xpu

* add 1 more

* add one more

* enable more

* add 1 more

* add more

* enable 1

* enable more cases

* enable

* enable

* update comment

* one more

* enable 1

* add more cases

* enable xpu

* add one more caswe

* add more cases

* add 1

* add more

* add more cases

* add case

* enable

* add more

* add more

* add more

* enbale more

* add more

* update code

* update test marker

* add skip back

* update comment

* remove single files

* remove

* style

* add

* revert

* reformat

* update decorator

* update

* update

* update

* Update tests/pipelines/deepfloyd_if/test_if.py
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* Update tests/pipelines/animatediff/test_animatediff_controlnet.py
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* Update tests/pipelines/animatediff/test_animatediff.py
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* Update tests/pipelines/animatediff/test_animatediff_controlnet.py
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>

* update float16

* no unitest.skipt

* update

* apply style check

* reapply format

---------
Co-authored-by: default avatarSayak Paul <spsayakpaul@gmail.com>
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent 2e86a3f0
...@@ -18,7 +18,7 @@ from diffusers import ( ...@@ -18,7 +18,7 @@ from diffusers import (
UNetMotionModel, UNetMotionModel,
) )
from diffusers.utils import is_xformers_available, logging from diffusers.utils import is_xformers_available, logging
from diffusers.utils.testing_utils import floats_tensor, torch_device from diffusers.utils.testing_utils import floats_tensor, require_accelerator, torch_device
from ..test_pipelines_common import IPAdapterTesterMixin, PipelineFromPipeTesterMixin, PipelineTesterMixin from ..test_pipelines_common import IPAdapterTesterMixin, PipelineFromPipeTesterMixin, PipelineTesterMixin
...@@ -278,7 +278,7 @@ class PIAPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, PipelineFr ...@@ -278,7 +278,7 @@ class PIAPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, PipelineFr
max_diff = np.abs(to_np(output_batch[0][0]) - to_np(output[0][0])).max() max_diff = np.abs(to_np(output_batch[0][0]) - to_np(output[0][0])).max()
assert max_diff < expected_max_diff assert max_diff < expected_max_diff
@unittest.skipIf(torch_device != "cuda", reason="CUDA and CPU are required to switch devices") @require_accelerator
def test_to_device(self): def test_to_device(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -294,14 +294,14 @@ class PIAPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, PipelineFr ...@@ -294,14 +294,14 @@ class PIAPipelineFastTests(IPAdapterTesterMixin, PipelineTesterMixin, PipelineFr
output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0] output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0]
self.assertTrue(np.isnan(output_cpu).sum() == 0) self.assertTrue(np.isnan(output_cpu).sum() == 0)
pipe.to("cuda") pipe.to(torch_device)
model_devices = [ model_devices = [
component.device.type for component in pipe.components.values() if hasattr(component, "device") component.device.type for component in pipe.components.values() if hasattr(component, "device")
] ]
self.assertTrue(all(device == "cuda" for device in model_devices)) self.assertTrue(all(device == torch_device for device in model_devices))
output_cuda = pipe(**self.get_dummy_inputs("cuda"))[0] output_device = pipe(**self.get_dummy_inputs(torch_device))[0]
self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0) self.assertTrue(np.isnan(to_np(output_device)).sum() == 0)
def test_to_dtype(self): def test_to_dtype(self):
components = self.get_dummy_components() components = self.get_dummy_components()
......
...@@ -28,6 +28,7 @@ from diffusers.utils.testing_utils import ( ...@@ -28,6 +28,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
nightly, nightly,
require_accelerator,
require_torch_gpu, require_torch_gpu,
torch_device, torch_device,
) )
...@@ -237,7 +238,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase): ...@@ -237,7 +238,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):
image = pipe("example prompt", num_inference_steps=2).images[0] image = pipe("example prompt", num_inference_steps=2).images[0]
assert image is not None assert image is not None
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU") @require_accelerator
def test_semantic_diffusion_fp16(self): def test_semantic_diffusion_fp16(self):
"""Test that stable diffusion works with fp16""" """Test that stable diffusion works with fp16"""
unet = self.dummy_cond_unet unet = self.dummy_cond_unet
......
...@@ -30,7 +30,7 @@ from diffusers.utils.testing_utils import ( ...@@ -30,7 +30,7 @@ from diffusers.utils.testing_utils import (
load_numpy, load_numpy,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_accelerator,
skip_mps, skip_mps,
torch_device, torch_device,
) )
...@@ -205,7 +205,7 @@ class StableDiffusionAttendAndExcitePipelineFastTests( ...@@ -205,7 +205,7 @@ class StableDiffusionAttendAndExcitePipelineFastTests(
super().test_from_pipe_consistent_forward_pass_cpu_offload(expected_max_diff=5e-3) super().test_from_pipe_consistent_forward_pass_cpu_offload(expected_max_diff=5e-3)
@require_torch_gpu @require_torch_accelerator
@nightly @nightly
class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase): class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase):
# Attend and excite requires being able to run a backward pass at # Attend and excite requires being able to run a backward pass at
...@@ -237,7 +237,7 @@ class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase): ...@@ -237,7 +237,7 @@ class StableDiffusionAttendAndExcitePipelineIntegrationTests(unittest.TestCase):
pipe = StableDiffusionAttendAndExcitePipeline.from_pretrained( pipe = StableDiffusionAttendAndExcitePipeline.from_pretrained(
"CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16 "CompVis/stable-diffusion-v1-4", safety_checker=None, torch_dtype=torch.float16
) )
pipe.to("cuda") pipe.to(torch_device)
prompt = "a painting of an elephant with glasses" prompt = "a painting of an elephant with glasses"
token_indices = [5, 7] token_indices = [5, 7]
......
...@@ -36,13 +36,14 @@ from diffusers import ( ...@@ -36,13 +36,14 @@ from diffusers import (
StableDiffusionDepth2ImgPipeline, StableDiffusionDepth2ImgPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils import is_accelerate_available, is_accelerate_version
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_accelerate_version_greater,
require_accelerator,
require_torch_gpu, require_torch_gpu,
skip_mps, skip_mps,
slow, slow,
...@@ -194,7 +195,8 @@ class StableDiffusionDepth2ImgPipelineFastTests( ...@@ -194,7 +195,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(
max_diff = np.abs(output - output_loaded).max() max_diff = np.abs(output - output_loaded).max()
self.assertLess(max_diff, 1e-4) self.assertLess(max_diff, 1e-4)
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_save_load_float16(self): def test_save_load_float16(self):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -226,7 +228,8 @@ class StableDiffusionDepth2ImgPipelineFastTests( ...@@ -226,7 +228,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(
max_diff = np.abs(output - output_loaded).max() max_diff = np.abs(output - output_loaded).max()
self.assertLess(max_diff, 2e-2, "The output of the fp16 pipeline changed after saving and loading.") self.assertLess(max_diff, 2e-2, "The output of the fp16 pipeline changed after saving and loading.")
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_float16_inference(self): def test_float16_inference(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -246,10 +249,8 @@ class StableDiffusionDepth2ImgPipelineFastTests( ...@@ -246,10 +249,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(
max_diff = np.abs(output - output_fp16).max() max_diff = np.abs(output - output_fp16).max()
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.") self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"), @require_accelerate_version_greater("0.14.0")
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_cpu_offload_forward_pass(self): def test_cpu_offload_forward_pass(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -259,7 +260,7 @@ class StableDiffusionDepth2ImgPipelineFastTests( ...@@ -259,7 +260,7 @@ class StableDiffusionDepth2ImgPipelineFastTests(
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs(torch_device)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(torch_device) inputs = self.get_dummy_inputs(torch_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
......
...@@ -29,6 +29,7 @@ from diffusers.utils.testing_utils import ( ...@@ -29,6 +29,7 @@ from diffusers.utils.testing_utils import (
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
require_accelerator,
require_torch_gpu, require_torch_gpu,
slow, slow,
torch_device, torch_device,
...@@ -289,7 +290,7 @@ class StableDiffusionUpscalePipelineFastTests(unittest.TestCase): ...@@ -289,7 +290,7 @@ class StableDiffusionUpscalePipelineFastTests(unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_prompt_embeds_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_prompt_embeds_slice.flatten() - expected_slice).max() < 1e-2
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU") @require_accelerator
def test_stable_diffusion_upscale_fp16(self): def test_stable_diffusion_upscale_fp16(self):
"""Test that stable diffusion upscale works with fp16""" """Test that stable diffusion upscale works with fp16"""
unet = self.dummy_cond_unet_upscale unet = self.dummy_cond_unet_upscale
......
...@@ -34,6 +34,7 @@ from diffusers.utils.testing_utils import ( ...@@ -34,6 +34,7 @@ from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
load_numpy, load_numpy,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_accelerator,
require_torch_gpu, require_torch_gpu,
slow, slow,
torch_device, torch_device,
...@@ -213,7 +214,7 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase): ...@@ -213,7 +214,7 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase):
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU") @require_accelerator
def test_stable_diffusion_v_pred_fp16(self): def test_stable_diffusion_v_pred_fp16(self):
"""Test that stable diffusion v-prediction works with fp16""" """Test that stable diffusion v-prediction works with fp16"""
unet = self.dummy_cond_unet unet = self.dummy_cond_unet
......
...@@ -24,7 +24,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer ...@@ -24,7 +24,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
from diffusers import AutoencoderKL, DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel from diffusers import AutoencoderKL, DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel
from diffusers.pipelines.stable_diffusion_safe import StableDiffusionPipelineSafe as StableDiffusionPipeline from diffusers.pipelines.stable_diffusion_safe import StableDiffusionPipelineSafe as StableDiffusionPipeline
from diffusers.utils.testing_utils import floats_tensor, nightly, require_torch_gpu, torch_device from diffusers.utils.testing_utils import floats_tensor, nightly, require_accelerator, require_torch_gpu, torch_device
class SafeDiffusionPipelineFastTests(unittest.TestCase): class SafeDiffusionPipelineFastTests(unittest.TestCase):
...@@ -228,7 +228,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase): ...@@ -228,7 +228,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):
image = pipe("example prompt", num_inference_steps=2).images[0] image = pipe("example prompt", num_inference_steps=2).images[0]
assert image is not None assert image is not None
@unittest.skipIf(torch_device != "cuda", "This test requires a GPU") @require_accelerator
def test_stable_diffusion_fp16(self): def test_stable_diffusion_fp16(self):
"""Test that stable diffusion works with fp16""" """Test that stable diffusion works with fp16"""
unet = self.dummy_cond_unet unet = self.dummy_cond_unet
......
...@@ -18,13 +18,15 @@ from diffusers import ( ...@@ -18,13 +18,15 @@ from diffusers import (
StableVideoDiffusionPipeline, StableVideoDiffusionPipeline,
UNetSpatioTemporalConditionModel, UNetSpatioTemporalConditionModel,
) )
from diffusers.utils import is_accelerate_available, is_accelerate_version, load_image, logging from diffusers.utils import load_image, logging
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_accelerate_version_greater,
require_accelerator,
require_torch_gpu, require_torch_gpu,
slow, slow,
torch_device, torch_device,
...@@ -250,7 +252,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -250,7 +252,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
max_diff = np.abs(to_np(output) - to_np(output_fp16)).max() max_diff = np.abs(to_np(output) - to_np(output_fp16)).max()
self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.") self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.")
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_save_load_float16(self, expected_max_diff=1e-2): def test_save_load_float16(self, expected_max_diff=1e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -366,7 +369,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -366,7 +369,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
self.assertLess(max_diff, expected_max_difference) self.assertLess(max_diff, expected_max_difference)
@unittest.skipIf(torch_device != "cuda", reason="CUDA and CPU are required to switch devices") @require_accelerator
def test_to_device(self): def test_to_device(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -381,14 +384,14 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -381,14 +384,14 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
output_cpu = pipe(**self.get_dummy_inputs("cpu")).frames[0] output_cpu = pipe(**self.get_dummy_inputs("cpu")).frames[0]
self.assertTrue(np.isnan(output_cpu).sum() == 0) self.assertTrue(np.isnan(output_cpu).sum() == 0)
pipe.to("cuda") pipe.to(torch_device)
model_devices = [ model_devices = [
component.device.type for component in pipe.components.values() if hasattr(component, "device") component.device.type for component in pipe.components.values() if hasattr(component, "device")
] ]
self.assertTrue(all(device == "cuda" for device in model_devices)) self.assertTrue(all(device == torch_device for device in model_devices))
output_cuda = pipe(**self.get_dummy_inputs("cuda")).frames[0] output_device = pipe(**self.get_dummy_inputs(torch_device)).frames[0]
self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0) self.assertTrue(np.isnan(to_np(output_device)).sum() == 0)
def test_to_dtype(self): def test_to_dtype(self):
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -402,10 +405,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -402,10 +405,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
model_dtypes = [component.dtype for component in pipe.components.values() if hasattr(component, "dtype")] model_dtypes = [component.dtype for component in pipe.components.values() if hasattr(component, "dtype")]
self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes)) self.assertTrue(all(dtype == torch.float16 for dtype in model_dtypes))
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"), @require_accelerate_version_greater("0.14.0")
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4): def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -419,7 +420,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -419,7 +420,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_without_offload = pipe(**inputs).frames[0] output_without_offload = pipe(**inputs).frames[0]
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs).frames[0] output_with_offload = pipe(**inputs).frames[0]
...@@ -427,10 +428,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -427,10 +428,8 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max() max_diff = np.abs(to_np(output_with_offload) - to_np(output_without_offload)).max()
self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results") self.assertLess(max_diff, expected_max_diff, "CPU offloading should not affect the inference results")
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"), @require_accelerate_version_greater("0.17.0")
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
)
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
generator_device = "cpu" generator_device = "cpu"
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -446,7 +445,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -446,7 +445,7 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_without_offload = pipe(**inputs).frames[0] output_without_offload = pipe(**inputs).frames[0]
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs).frames[0] output_with_offload = pipe(**inputs).frames[0]
......
...@@ -38,9 +38,11 @@ from diffusers.models.unets.unet_motion_model import UNetMotionModel ...@@ -38,9 +38,11 @@ from diffusers.models.unets.unet_motion_model import UNetMotionModel
from diffusers.pipelines.pipeline_utils import StableDiffusionMixin from diffusers.pipelines.pipeline_utils import StableDiffusionMixin
from diffusers.schedulers import KarrasDiffusionSchedulers from diffusers.schedulers import KarrasDiffusionSchedulers
from diffusers.utils import logging from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
CaptureLogger, CaptureLogger,
require_accelerate_version_greater,
require_accelerator,
require_torch, require_torch,
skip_mps, skip_mps,
torch_device, torch_device,
...@@ -770,17 +772,15 @@ class PipelineFromPipeTesterMixin: ...@@ -770,17 +772,15 @@ class PipelineFromPipeTesterMixin:
type(proc) == AttnProcessor for proc in component.attn_processors.values() type(proc) == AttnProcessor for proc in component.attn_processors.values()
), "`from_pipe` changed the attention processor in original pipeline." ), "`from_pipe` changed the attention processor in original pipeline."
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"), @require_accelerate_version_greater("0.14.0")
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_from_pipe_consistent_forward_pass_cpu_offload(self, expected_max_diff=1e-3): def test_from_pipe_consistent_forward_pass_cpu_offload(self, expected_max_diff=1e-3):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
for component in pipe.components.values(): for component in pipe.components.values():
if hasattr(component, "set_default_attn_processor"): if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor() component.set_default_attn_processor()
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs_pipe(torch_device) inputs = self.get_dummy_inputs_pipe(torch_device)
output = pipe(**inputs)[0] output = pipe(**inputs)[0]
...@@ -815,7 +815,7 @@ class PipelineFromPipeTesterMixin: ...@@ -815,7 +815,7 @@ class PipelineFromPipeTesterMixin:
if hasattr(component, "set_default_attn_processor"): if hasattr(component, "set_default_attn_processor"):
component.set_default_attn_processor() component.set_default_attn_processor()
pipe_from_original.enable_model_cpu_offload() pipe_from_original.enable_model_cpu_offload(device=torch_device)
pipe_from_original.set_progress_bar_config(disable=None) pipe_from_original.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs_pipe(torch_device) inputs = self.get_dummy_inputs_pipe(torch_device)
output_from_original = pipe_from_original(**inputs)[0] output_from_original = pipe_from_original(**inputs)[0]
...@@ -1202,7 +1202,8 @@ class PipelineTesterMixin: ...@@ -1202,7 +1202,8 @@ class PipelineTesterMixin:
self.assertTrue(hasattr(pipe, "components")) self.assertTrue(hasattr(pipe, "components"))
self.assertTrue(set(pipe.components.keys()) == set(init_components.keys())) self.assertTrue(set(pipe.components.keys()) == set(init_components.keys()))
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_float16_inference(self, expected_max_diff=5e-2): def test_float16_inference(self, expected_max_diff=5e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -1239,7 +1240,8 @@ class PipelineTesterMixin: ...@@ -1239,7 +1240,8 @@ class PipelineTesterMixin:
max_diff = np.abs(to_np(output) - to_np(output_fp16)).max() max_diff = np.abs(to_np(output) - to_np(output_fp16)).max()
self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.") self.assertLess(max_diff, expected_max_diff, "The outputs of the fp16 and fp32 pipelines are too different.")
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_save_load_float16(self, expected_max_diff=1e-2): def test_save_load_float16(self, expected_max_diff=1e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -1320,7 +1322,7 @@ class PipelineTesterMixin: ...@@ -1320,7 +1322,7 @@ class PipelineTesterMixin:
max_diff = np.abs(to_np(output) - to_np(output_loaded)).max() max_diff = np.abs(to_np(output) - to_np(output_loaded)).max()
self.assertLess(max_diff, expected_max_difference) self.assertLess(max_diff, expected_max_difference)
@unittest.skipIf(torch_device != "cuda", reason="CUDA and CPU are required to switch devices") @require_accelerator
def test_to_device(self): def test_to_device(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -1333,11 +1335,11 @@ class PipelineTesterMixin: ...@@ -1333,11 +1335,11 @@ class PipelineTesterMixin:
output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0] output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0]
self.assertTrue(np.isnan(output_cpu).sum() == 0) self.assertTrue(np.isnan(output_cpu).sum() == 0)
pipe.to("cuda") pipe.to(torch_device)
model_devices = [component.device.type for component in components.values() if hasattr(component, "device")] model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
self.assertTrue(all(device == "cuda" for device in model_devices)) self.assertTrue(all(device == torch_device for device in model_devices))
output_cuda = pipe(**self.get_dummy_inputs("cuda"))[0] output_cuda = pipe(**self.get_dummy_inputs(torch_device))[0]
self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0) self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0)
def test_to_dtype(self): def test_to_dtype(self):
...@@ -1394,10 +1396,8 @@ class PipelineTesterMixin: ...@@ -1394,10 +1396,8 @@ class PipelineTesterMixin:
assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0])) assert_mean_pixel_difference(to_np(output_with_slicing1[0]), to_np(output_without_slicing[0]))
assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0])) assert_mean_pixel_difference(to_np(output_with_slicing2[0]), to_np(output_without_slicing[0]))
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"), @require_accelerate_version_greater("0.14.0")
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4): def test_sequential_cpu_offload_forward_pass(self, expected_max_diff=1e-4):
import accelerate import accelerate
...@@ -1413,8 +1413,8 @@ class PipelineTesterMixin: ...@@ -1413,8 +1413,8 @@ class PipelineTesterMixin:
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
assert pipe._execution_device.type == "cuda" assert pipe._execution_device.type == torch_device
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
...@@ -1457,10 +1457,8 @@ class PipelineTesterMixin: ...@@ -1457,10 +1457,8 @@ class PipelineTesterMixin:
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}", f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
) )
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"), @require_accelerate_version_greater("0.17.0")
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
)
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
import accelerate import accelerate
...@@ -1478,8 +1476,8 @@ class PipelineTesterMixin: ...@@ -1478,8 +1476,8 @@ class PipelineTesterMixin:
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
assert pipe._execution_device.type == "cuda" assert pipe._execution_device.type == torch_device
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
...@@ -1514,10 +1512,8 @@ class PipelineTesterMixin: ...@@ -1514,10 +1512,8 @@ class PipelineTesterMixin:
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}", f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
) )
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"), @require_accelerate_version_greater("0.17.0")
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
)
def test_cpu_offload_forward_pass_twice(self, expected_max_diff=2e-4): def test_cpu_offload_forward_pass_twice(self, expected_max_diff=2e-4):
import accelerate import accelerate
...@@ -1531,11 +1527,11 @@ class PipelineTesterMixin: ...@@ -1531,11 +1527,11 @@ class PipelineTesterMixin:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload_twice = pipe(**inputs)[0] output_with_offload_twice = pipe(**inputs)[0]
...@@ -1571,10 +1567,8 @@ class PipelineTesterMixin: ...@@ -1571,10 +1567,8 @@ class PipelineTesterMixin:
f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}", f"Not installed correct hook: {offloaded_modules_with_incorrect_hooks}",
) )
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"), @require_accelerate_version_greater("0.14.0")
reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
)
def test_sequential_offload_forward_pass_twice(self, expected_max_diff=2e-4): def test_sequential_offload_forward_pass_twice(self, expected_max_diff=2e-4):
import accelerate import accelerate
...@@ -1588,11 +1582,11 @@ class PipelineTesterMixin: ...@@ -1588,11 +1582,11 @@ class PipelineTesterMixin:
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(generator_device) inputs = self.get_dummy_inputs(generator_device)
output_with_offload_twice = pipe(**inputs)[0] output_with_offload_twice = pipe(**inputs)[0]
......
...@@ -23,8 +23,14 @@ import torch ...@@ -23,8 +23,14 @@ import torch
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel
from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version from diffusers.utils.testing_utils import (
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device enable_full_determinism,
nightly,
require_accelerate_version_greater,
require_accelerator,
require_torch_gpu,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import PipelineFromPipeTesterMixin, PipelineTesterMixin from ..test_pipelines_common import PipelineFromPipeTesterMixin, PipelineTesterMixin
...@@ -213,7 +219,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -213,7 +219,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
max_diff = np.abs(to_np(output) - to_np(output_tuple)).max() max_diff = np.abs(to_np(output) - to_np(output_tuple)).max()
self.assertLess(max_diff, expected_max_difference) self.assertLess(max_diff, expected_max_difference)
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_float16_inference(self, expected_max_diff=5e-2): def test_float16_inference(self, expected_max_diff=5e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -255,10 +262,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -255,10 +262,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
pass pass
@unittest.skipIf( @require_accelerator
torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.17.0"), @require_accelerate_version_greater("0.17.0")
reason="CPU offload is only available with CUDA and `accelerate v0.17.0` or higher",
)
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -268,7 +273,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -268,7 +273,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
inputs = self.get_dummy_inputs(self.generator_device) inputs = self.get_dummy_inputs(self.generator_device)
output_without_offload = pipe(**inputs)[0] output_without_offload = pipe(**inputs)[0]
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs(self.generator_device) inputs = self.get_dummy_inputs(self.generator_device)
output_with_offload = pipe(**inputs)[0] output_with_offload = pipe(**inputs)[0]
...@@ -279,7 +284,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -279,7 +284,8 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
def test_pipeline_call_signature(self): def test_pipeline_call_signature(self):
pass pass
@unittest.skipIf(torch_device != "cuda", reason="float16 requires CUDA") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator
def test_save_load_float16(self, expected_max_diff=1e-2): def test_save_load_float16(self, expected_max_diff=1e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -331,7 +337,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -331,7 +337,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
def test_sequential_cpu_offload_forward_pass(self): def test_sequential_cpu_offload_forward_pass(self):
pass pass
@unittest.skipIf(torch_device != "cuda", reason="CUDA and CPU are required to switch devices") @require_accelerator
def test_to_device(self): def test_to_device(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -344,12 +350,12 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -344,12 +350,12 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0] # generator set to cpu output_cpu = pipe(**self.get_dummy_inputs("cpu"))[0] # generator set to cpu
self.assertTrue(np.isnan(output_cpu).sum() == 0) self.assertTrue(np.isnan(output_cpu).sum() == 0)
pipe.to("cuda") pipe.to(torch_device)
model_devices = [component.device.type for component in components.values() if hasattr(component, "device")] model_devices = [component.device.type for component in components.values() if hasattr(component, "device")]
self.assertTrue(all(device == "cuda" for device in model_devices)) self.assertTrue(all(device == torch_device for device in model_devices))
output_cuda = pipe(**self.get_dummy_inputs("cpu"))[0] # generator set to cpu output_device = pipe(**self.get_dummy_inputs("cpu"))[0] # generator set to cpu
self.assertTrue(np.isnan(to_np(output_cuda)).sum() == 0) self.assertTrue(np.isnan(to_np(output_device)).sum() == 0)
@unittest.skip( @unittest.skip(
reason="Cannot call `set_default_attn_processor` as this pipeline uses a specific attention processor." reason="Cannot call `set_default_attn_processor` as this pipeline uses a specific attention processor."
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment