enable 2 test cases on XPU (#11332)

* enable 2 test cases on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * Apply style fixes --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>

enable 2 test cases on XPU (#11332)
* enable 2 test cases on XPU Signed-off-by: YAO Matrix <matrix.yao@intel.com> * Apply style fixes --------- Signed-off-by: YAO Matrix <matrix.yao@intel.com> Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com> Co-authored-by: Dhruv Nair <dhruv.nair@gmail.com>
eef3d659 · Yao Matrix · GitHub · ee6ad51d · eef3d659 · eef3d659
Unverified Commit eef3d659 authored Apr 18, 2025 by Yao Matrix Committed by GitHub Apr 17, 2025
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 5 deletions

tests/quantization/bnb/test_mixed_int8.py tests/quantization/bnb/test_mixed_int8.py +3 -1

tests/quantization/utils.py tests/quantization/utils.py +10 -4

No files found.
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -523,13 +523,15 @@ class SlowBnb8bitTests(Base8bitTests):
            torch_dtype=torch.float16,
            device_map=torch_device,
        )
+
        # CUDA device placement works.
+        device = torch_device if torch_device != "rocm" else "cuda"
        pipeline_8bit = DiffusionPipeline.from_pretrained(
            self.model_name,
            transformer=transformer_8bit,
            text_encoder_3=text_encoder_3_8bit,
            torch_dtype=torch.float16,
-        ).to("cuda")
+        ).to(device)

        # Check if inference works.
        _ = pipeline_8bit("table", max_sequence_length=20, num_inference_steps=2)

--- a/tests/quantization/utils.py
+++ b/tests/quantization/utils.py
 from diffusers.utils import is_torch_available
+from diffusers.utils.testing_utils import (
+    backend_empty_cache,
+    backend_max_memory_allocated,
+    backend_reset_peak_memory_stats,
+    torch_device,
+)


 if is_torch_available():
@@ -30,9 +36,9 @@ if is_torch_available():
    @torch.no_grad()
    @torch.inference_mode()
    def get_memory_consumption_stat(model, inputs):
-        torch.cuda.reset_peak_memory_stats()
-        torch.cuda.empty_cache()
+        backend_reset_peak_memory_stats(torch_device)
+        backend_empty_cache(torch_device)

        model(**inputs)
-        max_memory_mem_allocated = torch.cuda.max_memory_allocated()
-        return max_memory_mem_allocated
+        max_mem_allocated = backend_max_memory_allocated(torch_device)
+        return max_mem_allocated