[tests] make tests device-agnostic (part 4) (#10508)

* initial comit * fix empty cache * fix one more * fix style * update device functions * update * update * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * with gc.collect * update * make style * check_torch_dependencies * add mps empty cache * add changes * bug fix * enable on xpu * update more cases * revert * revert back * Update test_stable_diffusion_xl.py * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Apply suggestions from code review Co-authored-by: hlky <hlky@hlky.ac> * add test marker --------- Co-authored-by: hlky <hlky@hlky.ac>

[tests] make tests device-agnostic (part 4) (#10508)
* initial comit * fix empty cache * fix one more * fix style * update device functions * update * update * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update src/diffusers/utils/testing_utils.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/controlnet/test_controlnet.py Co-authored-by: hlky <hlky@hlky.ac> * with gc.collect * update * make style * check_torch_dependencies * add mps empty cache * add changes * bug fix * enable on xpu * update more cases * revert * revert back * Update test_stable_diffusion_xl.py * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py Co-authored-by: hlky <hlky@hlky.ac> * Apply suggestions from code review Co-authored-by: hlky <hlky@hlky.ac> * add test marker --------- Co-authored-by: hlky <hlky@hlky.ac>
7855ac59 · Fanli Lin · GitHub · 30cef6bf · 7855ac59 · 7855ac59
Unverified Commit 7855ac59 authored Mar 04, 2025 by Fanli Lin Committed by GitHub Mar 04, 2025
6 changed files
--- a/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_k_diffusion.py
+++ b/tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_k_diffusion.py
@@ -20,14 +20,20 @@ import numpy as np
 import torch

 from diffusers import StableDiffusionXLKDiffusionPipeline
-from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, slow, torch_device
+from diffusers.utils.testing_utils import (
+    backend_empty_cache,
+    enable_full_determinism,
+    require_torch_accelerator,
+    slow,
+    torch_device,
+)


 enable_full_determinism()


 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
    dtype = torch.float16

@@ -35,13 +41,13 @@ class StableDiffusionXLKPipelineIntegrationTests(unittest.TestCase):
        # clean up the VRAM before each test
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        # clean up the VRAM after each test
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def test_stable_diffusion_xl(self):
        sd_pipe = StableDiffusionXLKDiffusionPipeline.from_pretrained(

--- a/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py
+++ b/tests/pipelines/stable_video_diffusion/test_stable_video_diffusion.py
@@ -22,12 +22,13 @@ from diffusers.utils import load_image, logging
 from diffusers.utils.import_utils import is_xformers_available
 from diffusers.utils.testing_utils import (
    CaptureLogger,
+    backend_empty_cache,
    enable_full_determinism,
    floats_tensor,
    numpy_cosine_similarity_distance,
    require_accelerate_version_greater,
    require_accelerator,
-    require_torch_gpu,
+    require_torch_accelerator,
    slow,
    torch_device,
 )
@@ -515,19 +516,19 @@ class StableVideoDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCa


 @slow
-@require_torch_gpu
+@require_torch_accelerator
 class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
    def setUp(self):
        # clean up the VRAM before each test
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        # clean up the VRAM after each test
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def test_sd_video(self):
        pipe = StableVideoDiffusionPipeline.from_pretrained(
@@ -535,7 +536,7 @@ class StableVideoDiffusionPipelineSlowTests(unittest.TestCase):
            variant="fp16",
            torch_dtype=torch.float16,
        )
-        pipe.enable_model_cpu_offload()
+        pipe.enable_model_cpu_offload(device=torch_device)
        pipe.set_progress_bar_config(disable=None)
        image = load_image(
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/pix2pix/cat_6.png?download=true"

--- a/tests/pipelines/test_pipelines.py
+++ b/tests/pipelines/test_pipelines.py
@@ -1383,11 +1383,11 @@ class PipelineFastTests(unittest.TestCase):
            feature_extractor=self.dummy_extractor,
        )

-        sd.enable_model_cpu_offload()
+        sd.enable_model_cpu_offload(device=torch_device)

        logger = logging.get_logger("diffusers.pipelines.pipeline_utils")
        with CaptureLogger(logger) as cap_logger:
-            sd.to("cuda")
+            sd.to(torch_device)

        assert "It is strongly recommended against doing so" in str(cap_logger)


--- a/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
+++ b/tests/pipelines/text_to_video_synthesis/test_text_to_video.py
@@ -23,10 +23,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
 from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoSDPipeline, UNet3DConditionModel
 from diffusers.utils import is_xformers_available
 from diffusers.utils.testing_utils import (
+    backend_empty_cache,
    enable_full_determinism,
    load_numpy,
    numpy_cosine_similarity_distance,
-    require_torch_gpu,
+    require_torch_accelerator,
    skip_mps,
    slow,
    torch_device,
@@ -184,19 +185,19 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin,

 @slow
 @skip_mps
-@require_torch_gpu
+@require_torch_accelerator
 class TextToVideoSDPipelineSlowTests(unittest.TestCase):
    def setUp(self):
        # clean up the VRAM before each test
        super().setUp()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def tearDown(self):
        # clean up the VRAM after each test
        super().tearDown()
        gc.collect()
-        torch.cuda.empty_cache()
+        backend_empty_cache(torch_device)

    def test_two_step_model(self):
        expected_video = load_numpy(

--- a/tests/pipelines/unidiffuser/test_unidiffuser.py
+++ b/tests/pipelines/unidiffuser/test_unidiffuser.py
@@ -27,6 +27,7 @@ from diffusers.utils.testing_utils import (
    load_image,
    nightly,
    require_torch_2,
+    require_torch_accelerator,
    require_torch_gpu,
    run_test_in_subprocess,
    torch_device,
@@ -501,20 +502,19 @@ class UniDiffuserPipelineFastTests(
    def test_inference_batch_single_identical(self):
        super().test_inference_batch_single_identical(expected_max_diff=2e-4)

-    @require_torch_gpu
-    def test_unidiffuser_default_joint_v1_cuda_fp16(self):
-        device = "cuda"
+    @require_torch_accelerator
+    def test_unidiffuser_default_joint_v1_fp16(self):
        unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
            "hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
        )
-        unidiffuser_pipe = unidiffuser_pipe.to(device)
+        unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
        unidiffuser_pipe.set_progress_bar_config(disable=None)

        # Set mode to 'joint'
        unidiffuser_pipe.set_joint_mode()
        assert unidiffuser_pipe.mode == "joint"

-        inputs = self.get_dummy_inputs_with_latents(device)
+        inputs = self.get_dummy_inputs_with_latents(torch_device)
        # Delete prompt and image for joint inference.
        del inputs["prompt"]
        del inputs["image"]
@@ -531,20 +531,19 @@ class UniDiffuserPipelineFastTests(
        expected_text_prefix = '" This This'
        assert text[0][: len(expected_text_prefix)] == expected_text_prefix

-    @require_torch_gpu
-    def test_unidiffuser_default_text2img_v1_cuda_fp16(self):
-        device = "cuda"
+    @require_torch_accelerator
+    def test_unidiffuser_default_text2img_v1_fp16(self):
        unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
            "hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
        )
-        unidiffuser_pipe = unidiffuser_pipe.to(device)
+        unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
        unidiffuser_pipe.set_progress_bar_config(disable=None)

        # Set mode to 'text2img'
        unidiffuser_pipe.set_text_to_image_mode()
        assert unidiffuser_pipe.mode == "text2img"

-        inputs = self.get_dummy_inputs_with_latents(device)
+        inputs = self.get_dummy_inputs_with_latents(torch_device)
        # Delete prompt and image for joint inference.
        del inputs["image"]
        inputs["data_type"] = 1
@@ -556,20 +555,19 @@ class UniDiffuserPipelineFastTests(
        expected_img_slice = np.array([0.5054, 0.5498, 0.5854, 0.3052, 0.4458, 0.6489, 0.5122, 0.4810, 0.6138])
        assert np.abs(image_slice.flatten() - expected_img_slice).max() < 1e-3

-    @require_torch_gpu
-    def test_unidiffuser_default_img2text_v1_cuda_fp16(self):
-        device = "cuda"
+    @require_torch_accelerator
+    def test_unidiffuser_default_img2text_v1_fp16(self):
        unidiffuser_pipe = UniDiffuserPipeline.from_pretrained(
            "hf-internal-testing/unidiffuser-test-v1", torch_dtype=torch.float16
        )
-        unidiffuser_pipe = unidiffuser_pipe.to(device)
+        unidiffuser_pipe = unidiffuser_pipe.to(torch_device)
        unidiffuser_pipe.set_progress_bar_config(disable=None)

        # Set mode to 'img2text'
        unidiffuser_pipe.set_image_to_text_mode()
        assert unidiffuser_pipe.mode == "img2text"

-        inputs = self.get_dummy_inputs_with_latents(device)
+        inputs = self.get_dummy_inputs_with_latents(torch_device)
        # Delete prompt and image for joint inference.
        del inputs["prompt"]
        inputs["data_type"] = 1

--- a/tests/pipelines/wuerstchen/test_wuerstchen_combined.py
+++ b/tests/pipelines/wuerstchen/test_wuerstchen_combined.py
@@ -21,7 +21,7 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

 from diffusers import DDPMWuerstchenScheduler, WuerstchenCombinedPipeline
 from diffusers.pipelines.wuerstchen import PaellaVQModel, WuerstchenDiffNeXt, WuerstchenPrior
-from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
+from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device

 from ..test_pipelines_common import PipelineTesterMixin

@@ -198,7 +198,7 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase
            np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
        ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"

-    @require_torch_gpu
+    @require_torch_accelerator
    def test_offloads(self):
        pipes = []
        components = self.get_dummy_components()
@@ -207,12 +207,12 @@ class WuerstchenCombinedPipelineFastTests(PipelineTesterMixin, unittest.TestCase

        components = self.get_dummy_components()
        sd_pipe = self.pipeline_class(**components)
-        sd_pipe.enable_sequential_cpu_offload()
+        sd_pipe.enable_sequential_cpu_offload(device=torch_device)
        pipes.append(sd_pipe)

        components = self.get_dummy_components()
        sd_pipe = self.pipeline_class(**components)
-        sd_pipe.enable_model_cpu_offload()
+        sd_pipe.enable_model_cpu_offload(device=torch_device)
        pipes.append(sd_pipe)

        image_slices = []