Reproducibility 3/3 (#1924)

* make tests deterministic * run slow tests * prepare for testing * finish * refactor * add print statements * finish more * correct some test failures * more fixes * set up to correct tests * more corrections * up * fix more * more prints * add * up * up * up * uP * uP * more fixes * uP * up * up * up * up * fix more * up * up * clean tests * up * up * up * more fixes * Apply suggestions from code review Co-authored-by: Suraj Patil <surajp815@gmail.com> * make * correct * finish * finish Co-authored-by: Suraj Patil <surajp815@gmail.com>

Reproducibility 3/3 (#1924)
* make tests deterministic * run slow tests * prepare for testing * finish * refactor * add print statements * finish more * correct some test failures * more fixes * set up to correct tests * more corrections * up * fix more * more prints * add * up * up * up * uP * uP * more fixes * uP * up * up * up * up * fix more * up * up * clean tests * up * up * up * more fixes * Apply suggestions from code review Co-authored-by: Suraj Patil <surajp815@gmail.com> * make * correct * finish * finish Co-authored-by: Suraj Patil <surajp815@gmail.com>
6ba2231d · Patrick von Platen · GitHub · 008c22d3 · 6ba2231d · 6ba2231d
Unverified Commit 6ba2231d authored Jan 25, 2023 by Patrick von Platen Committed by GitHub Jan 25, 2023
20 changed files
--- a/tests/pipelines/score_sde_ve/test_score_sde_ve.py
+++ b/tests/pipelines/score_sde_ve/test_score_sde_ve.py
@@ -61,6 +61,7 @@ class ScoreSdeVeipelineFastTests(unittest.TestCase):

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.0, 1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2

@@ -86,4 +87,5 @@ class ScoreSdeVePipelineIntegrationTests(unittest.TestCase):
        assert image.shape == (1, 256, 256, 3)

        expected_slice = np.array([0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
--- a/tests/pipelines/stable_diffusion/test_cycle_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_cycle_diffusion.py
@@ -182,7 +182,7 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase):
        source_prompt = "A black colored car"
        prompt = "A blue colored car"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            source_prompt=source_prompt,
@@ -221,7 +221,7 @@ class CycleDiffusionPipelineIntegrationTests(unittest.TestCase):
        source_prompt = "A black colored car"
        prompt = "A blue colored car"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            source_prompt=source_prompt,

--- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion.py
@@ -60,6 +60,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.65072, 0.58492, 0.48219, 0.55521, 0.53180, 0.55939, 0.50697, 0.39800, 0.46455])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_pipeline_pndm(self):
@@ -73,6 +74,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.65863, 0.59425, 0.49326, 0.56313, 0.53875, 0.56627, 0.51065, 0.39777, 0.46330])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_pipeline_lms(self):
@@ -86,6 +88,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.53755, 0.60786, 0.47402, 0.49488, 0.51869, 0.49819, 0.47985, 0.38957, 0.44279])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_pipeline_euler(self):
@@ -99,6 +102,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.53755, 0.60786, 0.47402, 0.49488, 0.51869, 0.49819, 0.47985, 0.38957, 0.44279])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_pipeline_euler_ancestral(self):
@@ -112,6 +116,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.53817, 0.60812, 0.47384, 0.49530, 0.51894, 0.49814, 0.47984, 0.38958, 0.44271])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_pipeline_dpm_multistep(self):
@@ -125,6 +130,7 @@ class OnnxStableDiffusionPipelineFastTests(OnnxPipelineTesterMixin, unittest.Tes

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.53895, 0.60808, 0.47933, 0.49608, 0.51886, 0.49950, 0.48053, 0.38957, 0.44200])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2


@@ -169,6 +175,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.0452, 0.0390, 0.0087, 0.0350, 0.0617, 0.0364, 0.0544, 0.0523, 0.0720])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_inference_ddim(self):
@@ -194,6 +201,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.2867, 0.1974, 0.1481, 0.7294, 0.7251, 0.6667, 0.4194, 0.5642, 0.6486])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_inference_k_lms(self):
@@ -219,6 +227,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.2306, 0.1959, 0.1593, 0.6549, 0.6394, 0.5408, 0.5065, 0.6010, 0.6161])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_intermediate_state(self):
@@ -234,6 +243,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
                expected_slice = np.array(
                    [-0.6772, -0.3835, -1.2456, 0.1905, -1.0974, 0.6967, -1.9353, 0.0178, 1.0167]
                )
+
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
            elif step == 5:
                assert latents.shape == (1, 4, 64, 64)
@@ -241,6 +251,7 @@ class OnnxStableDiffusionPipelineIntegrationTests(unittest.TestCase):
                expected_slice = np.array(
                    [-0.3351, 0.2241, -0.1837, -0.2325, -0.6577, 0.3393, -0.0241, 0.5899, 1.3875]
                )
+
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3

        test_callback_fn.has_been_called = False

--- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py
+++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_img2img.py
@@ -82,6 +82,7 @@ class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unitt

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.61710, 0.53390, 0.49310, 0.55622, 0.50982, 0.58240, 0.50716, 0.38629, 0.46856])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1

    def test_pipeline_lms(self):
@@ -98,6 +99,7 @@ class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unitt

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.52761, 0.59977, 0.49033, 0.49619, 0.54282, 0.50311, 0.47600, 0.40918, 0.45203])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1

    def test_pipeline_euler(self):
@@ -111,6 +113,7 @@ class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unitt

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.52911, 0.60004, 0.49229, 0.49805, 0.54502, 0.50680, 0.47777, 0.41028, 0.45304])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1

    def test_pipeline_euler_ancestral(self):
@@ -124,6 +127,7 @@ class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unitt

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.52911, 0.60004, 0.49229, 0.49805, 0.54502, 0.50680, 0.47777, 0.41028, 0.45304])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1

    def test_pipeline_dpm_multistep(self):
@@ -137,6 +141,7 @@ class OnnxStableDiffusionImg2ImgPipelineFastTests(OnnxPipelineTesterMixin, unitt

        assert image.shape == (1, 128, 128, 3)
        expected_slice = np.array([0.65331, 0.58277, 0.48204, 0.56059, 0.53665, 0.56235, 0.50969, 0.40009, 0.46552])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1


@@ -195,6 +200,7 @@ class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase):
        assert images.shape == (1, 512, 768, 3)
        expected_slice = np.array([0.4909, 0.5059, 0.5372, 0.4623, 0.4876, 0.5049, 0.4820, 0.4956, 0.5019])
        # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2

    def test_inference_k_lms(self):
@@ -235,4 +241,5 @@ class OnnxStableDiffusionImg2ImgPipelineIntegrationTests(unittest.TestCase):
        assert images.shape == (1, 512, 768, 3)
        expected_slice = np.array([0.8043, 0.926, 0.9581, 0.8119, 0.8954, 0.913, 0.7209, 0.7463, 0.7431])
        # TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
--- a/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py
+++ b/tests/pipelines/stable_diffusion/test_onnx_stable_diffusion_inpaint.py
@@ -94,6 +94,7 @@ class OnnxStableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):

        assert images.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.2514, 0.3007, 0.3517, 0.1790, 0.2382, 0.3167, 0.1944, 0.2273, 0.2464])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_inference_k_lms(self):
@@ -136,4 +137,5 @@ class OnnxStableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):

        assert images.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.0086, 0.0077, 0.0083, 0.0093, 0.0107, 0.0139, 0.0094, 0.0097, 0.0125])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion.py
@@ -244,6 +244,7 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.5094, 0.5674, 0.4667, 0.5125, 0.5696, 0.4674, 0.5277, 0.4964, 0.4945])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_no_safety_checker(self):
@@ -295,6 +296,7 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
                0.5042197108268738,
            ]
        )
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_k_euler_ancestral(self):
@@ -325,6 +327,7 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
                0.504422664642334,
            ]
        )
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_k_euler(self):
@@ -355,6 +358,7 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
                0.5042197108268738,
            ]
        )
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_vae_slicing(self):
@@ -409,6 +413,7 @@ class StableDiffusionPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
                0.4899061322212219,
            ]
        )
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_num_images_per_prompt(self):
@@ -519,8 +524,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
        inputs = {
@@ -657,9 +662,11 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
        mem_bytes = torch.cuda.max_memory_allocated()
        assert mem_bytes > 4e9
        # There is a small discrepancy at the image borders vs. a fully batched version.
-        assert np.abs(image_sliced - image).max() < 4e-3
+        assert np.abs(image_sliced - image).max() < 1e-2

    def test_stable_diffusion_fp16_vs_autocast(self):
+        # this test makes sure that the original model with autocast
+        # and the new model with fp16 yield the same result
        pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16)
        pipe = pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)
@@ -688,14 +695,20 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.5713, -0.3018, -0.9814, 0.04663, -0.879, 0.76, -1.734, 0.1044, 1.161])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-3
+                expected_slice = np.array(
+                    [-0.5693, -0.3018, -0.9746, 0.0518, -0.8770, 0.7559, -1.7402, 0.1022, 1.1582]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.1885, -0.3022, -1.012, -0.514, -0.477, 0.6143, -0.9336, 0.6553, 1.453])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-2
+                expected_slice = np.array(
+                    [-0.1958, -0.2993, -1.0166, -0.5005, -0.4810, 0.6162, -0.9492, 0.6621, 1.4492]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False

@@ -750,8 +763,8 @@ class StableDiffusionPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
        inputs = {

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_image_variation.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_image_variation.py
@@ -117,6 +117,7 @@ class StableDiffusionImageVariationPipelineFastTests(PipelineTesterMixin, unitte

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.5167, 0.5746, 0.4835, 0.4914, 0.5605, 0.4691, 0.5201, 0.4898, 0.4958])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img_variation_multiple_images(self):
@@ -136,6 +137,7 @@ class StableDiffusionImageVariationPipelineFastTests(PipelineTesterMixin, unitte

        assert image.shape == (2, 64, 64, 3)
        expected_slice = np.array([0.6568, 0.5470, 0.5684, 0.5444, 0.5945, 0.6221, 0.5508, 0.5531, 0.5263])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img_variation_num_images_per_prompt(self):
@@ -183,8 +185,8 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_imgvar/input_image_vermeer.png"
@@ -227,13 +229,17 @@ class StableDiffusionImageVariationPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.1572, 0.2837, -0.798, -0.1201, -1.304, 0.7754, -2.12, 0.0443, 1.627])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-3
+                expected_slice = np.array(
+                    [-0.1621, 0.2837, -0.7979, -0.1221, -1.3057, 0.7681, -2.1191, 0.0464, 1.6309]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.6143, 1.734, 1.158, -2.145, -1.926, 0.748, -0.7246, 0.994, 1.539])
+                expected_slice = np.array([0.6299, 1.7500, 1.1992, -2.1582, -1.8994, 0.7334, -0.7090, 1.0137, 1.5273])
+
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False
@@ -282,8 +288,8 @@ class StableDiffusionImageVariationPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_imgvar/input_image_vermeer.png"

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
@@ -119,6 +119,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.4492, 0.3865, 0.4222, 0.5854, 0.5139, 0.4379, 0.4193, 0.48, 0.4218])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img2img_negative_prompt(self):
@@ -136,6 +137,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.4065, 0.3783, 0.4050, 0.5266, 0.4781, 0.4252, 0.4203, 0.4692, 0.4365])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img2img_multiple_init_images(self):
@@ -153,6 +155,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test

        assert image.shape == (2, 32, 32, 3)
        expected_slice = np.array([0.5144, 0.4447, 0.4735, 0.6676, 0.5526, 0.5454, 0.645, 0.5149, 0.4689])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img2img_k_lms(self):
@@ -171,6 +174,7 @@ class StableDiffusionImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.Test

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.4367, 0.4986, 0.4372, 0.6706, 0.5665, 0.444, 0.5864, 0.6019, 0.5203])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_img2img_num_images_per_prompt(self):
@@ -218,8 +222,8 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_img2img/sketch-mountains-input.png"
@@ -246,7 +250,8 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1].flatten()

        assert image.shape == (1, 512, 768, 3)
-        expected_slice = np.array([0.27150, 0.14849, 0.15605, 0.26740, 0.16954, 0.18204, 0.31470, 0.26311, 0.24525])
+        expected_slice = np.array([0.4300, 0.4662, 0.4930, 0.3990, 0.4307, 0.4525, 0.3719, 0.4064, 0.3923])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_img2img_k_lms(self):
@@ -261,7 +266,8 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1].flatten()

        assert image.shape == (1, 512, 768, 3)
-        expected_slice = np.array([0.04890, 0.04862, 0.06422, 0.04655, 0.05108, 0.05307, 0.05926, 0.08759, 0.06852])
+        expected_slice = np.array([0.0389, 0.0346, 0.0415, 0.0290, 0.0218, 0.0210, 0.0408, 0.0567, 0.0271])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_img2img_ddim(self):
@@ -276,7 +282,8 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1].flatten()

        assert image.shape == (1, 512, 768, 3)
-        expected_slice = np.array([0.06069, 0.05703, 0.08054, 0.05797, 0.06286, 0.06234, 0.08438, 0.11151, 0.08068])
+        expected_slice = np.array([0.0593, 0.0607, 0.0851, 0.0582, 0.0636, 0.0721, 0.0751, 0.0981, 0.0781])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_img2img_intermediate_state(self):
@@ -290,14 +297,16 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 96)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.7705, 0.1045, 0.5, 3.393, 3.723, 4.273, 2.467, 3.486, 1.758])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+                expected_slice = np.array([-0.4958, 0.5107, 1.1045, 2.7539, 4.6680, 3.8320, 1.5049, 1.8633, 2.6523])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 96)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.765, 0.1047, 0.4973, 3.375, 3.709, 4.258, 2.451, 3.46, 1.755])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+                expected_slice = np.array([-0.4956, 0.5078, 1.0918, 2.7520, 4.6484, 3.8125, 1.5146, 1.8633, 2.6367])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False

@@ -352,7 +361,7 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):

        prompt = "A fantasy landscape, trending on artstation"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            image=init_image,
@@ -366,8 +375,9 @@ class StableDiffusionImg2ImgPipelineSlowTests(unittest.TestCase):
        image_slice = image[255:258, 383:386, -1]

        assert image.shape == (504, 760, 3)
-        expected_slice = np.array([0.7124, 0.7105, 0.6993, 0.7140, 0.7106, 0.6945, 0.7198, 0.7172, 0.7031])
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
+        expected_slice = np.array([0.9393, 0.9500, 0.9399, 0.9438, 0.9458, 0.9400, 0.9455, 0.9414, 0.9423])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3


 @nightly
@@ -378,8 +388,8 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_img2img/sketch-mountains-input.png"

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py
@@ -125,6 +125,7 @@ class StableDiffusionInpaintPipelineFastTests(PipelineTesterMixin, unittest.Test

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.4723, 0.5731, 0.3939, 0.5441, 0.5922, 0.4392, 0.5059, 0.4651, 0.4474])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_inpaint_image_tensor(self):
@@ -172,8 +173,8 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_inpaint/input_bench_image.png"
@@ -206,7 +207,8 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.05978, 0.10983, 0.10514, 0.07922, 0.08483, 0.08587, 0.05302, 0.03218, 0.01636])
+        expected_slice = np.array([0.0427, 0.0460, 0.0483, 0.0460, 0.0584, 0.0521, 0.1549, 0.1695, 0.1794])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_inpaint_fp16(self):
@@ -222,8 +224,9 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.06152, 0.11060, 0.10449, 0.07959, 0.08643, 0.08496, 0.05420, 0.03247, 0.01831])
-        assert np.abs(expected_slice - image_slice).max() < 1e-2
+        expected_slice = np.array([0.1443, 0.1218, 0.1587, 0.1594, 0.1411, 0.1284, 0.1370, 0.1506, 0.2339])
+
+        assert np.abs(expected_slice - image_slice).max() < 5e-2

    def test_stable_diffusion_inpaint_pndm(self):
        pipe = StableDiffusionInpaintPipeline.from_pretrained(
@@ -239,7 +242,8 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.06892, 0.06994, 0.07905, 0.05366, 0.04709, 0.04890, 0.04107, 0.05083, 0.04180])
+        expected_slice = np.array([0.0425, 0.0273, 0.0344, 0.1694, 0.1727, 0.1812, 0.3256, 0.3311, 0.3272])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_inpaint_k_lms(self):
@@ -256,7 +260,8 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.23513, 0.22413, 0.29442, 0.24243, 0.26214, 0.30329, 0.26431, 0.25025, 0.25197])
+        expected_slice = np.array([0.9314, 0.7575, 0.9432, 0.8885, 0.9028, 0.7298, 0.9811, 0.9667, 0.7633])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_inpaint_with_sequential_cpu_offloading(self):
@@ -288,8 +293,8 @@ class StableDiffusionInpaintPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_inpaint/input_bench_image.png"

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py
@@ -213,6 +213,7 @@ class StableDiffusionInpaintLegacyPipelineFastTests(unittest.TestCase):

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.4731, 0.5346, 0.4531, 0.6251, 0.5446, 0.4057, 0.5527, 0.5896, 0.5153])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2

@@ -260,6 +261,7 @@ class StableDiffusionInpaintLegacyPipelineFastTests(unittest.TestCase):

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.4765, 0.5339, 0.4541, 0.6240, 0.5439, 0.4055, 0.5503, 0.5891, 0.5150])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_inpaint_legacy_num_images_per_prompt(self):
@@ -347,8 +349,8 @@ class StableDiffusionInpaintLegacyPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_inpaint/input_bench_image.png"
@@ -382,7 +384,8 @@ class StableDiffusionInpaintLegacyPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.27200, 0.29103, 0.34405, 0.21418, 0.26317, 0.34281, 0.18033, 0.24911, 0.32028])
+        expected_slice = np.array([0.5669, 0.6124, 0.6431, 0.4073, 0.4614, 0.5670, 0.1609, 0.3128, 0.4330])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_inpaint_legacy_k_lms(self):
@@ -399,7 +402,8 @@ class StableDiffusionInpaintLegacyPipelineSlowTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.29014, 0.28882, 0.32835, 0.26502, 0.28182, 0.31162, 0.29297, 0.29534, 0.28214])
+        expected_slice = np.array([0.4533, 0.4465, 0.4327, 0.4329, 0.4339, 0.4219, 0.4243, 0.4332, 0.4426])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_inpaint_legacy_intermediate_state(self):
@@ -413,13 +417,15 @@ class StableDiffusionInpaintLegacyPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.103, 1.415, -0.02197, -0.5107, -0.5903, 0.1953, 0.75, 0.3477, -1.356])
+                expected_slice = np.array([0.5977, 1.5449, 1.0586, -0.3250, 0.7383, -0.0862, 0.4631, -0.2571, -1.1289])
+
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.4802, 1.154, 0.628, 0.2319, 0.2593, -0.1455, 0.7075, -0.1617, -0.5615])
+                expected_slice = np.array([0.5190, 1.1621, 0.6885, 0.2424, 0.3337, -0.1617, 0.6914, -0.1957, -0.5474])
+
                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3

        callback_fn.has_been_called = False
@@ -445,8 +451,8 @@ class StableDiffusionInpaintLegacyPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main"
            "/stable_diffusion_inpaint/input_bench_image.png"

--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_instruction_pix2pix.py
@@ -122,6 +122,7 @@ class StableDiffusionInstructPix2PixPipelineFastTests(PipelineTesterMixin, unitt
        image_slice = image[0, -3:, -3:, -1]
        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.7318, 0.3723, 0.4662, 0.623, 0.5770, 0.5014, 0.4281, 0.5550, 0.4813])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_negative_prompt(self):
@@ -139,6 +140,7 @@ class StableDiffusionInstructPix2PixPipelineFastTests(PipelineTesterMixin, unitt

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.7323, 0.3688, 0.4611, 0.6255, 0.5746, 0.5017, 0.433, 0.5553, 0.4827])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_multiple_init_images(self):
@@ -161,6 +163,7 @@ class StableDiffusionInstructPix2PixPipelineFastTests(PipelineTesterMixin, unitt

        assert image.shape == (2, 32, 32, 3)
        expected_slice = np.array([0.606, 0.5712, 0.5099, 0.598, 0.5805, 0.7205, 0.6793, 0.554, 0.5607])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_euler(self):
@@ -182,6 +185,7 @@ class StableDiffusionInstructPix2PixPipelineFastTests(PipelineTesterMixin, unitt

        assert image.shape == (1, 32, 32, 3)
        expected_slice = np.array([0.726, 0.3902, 0.4868, 0.585, 0.5672, 0.511, 0.3906, 0.551, 0.4846])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_num_images_per_prompt(self):
@@ -259,6 +263,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.5902, 0.6015, 0.6027, 0.5983, 0.6092, 0.6061, 0.5765, 0.5785, 0.5555])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_k_lms(self):
@@ -276,6 +281,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.6578, 0.6817, 0.6972, 0.6761, 0.6856, 0.6916, 0.6428, 0.6516, 0.6301])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_ddim(self):
@@ -293,6 +299,7 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):

        assert image.shape == (1, 512, 512, 3)
        expected_slice = np.array([0.3828, 0.3834, 0.3818, 0.3792, 0.3865, 0.3752, 0.3792, 0.3847, 0.3753])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-3

    def test_stable_diffusion_pix2pix_intermediate_state(self):
@@ -306,14 +313,16 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.2388, -0.4673, -0.9775, 1.5127, 1.4414, 0.7778, 0.9907, 0.8472, 0.7788])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-2
+                expected_slice = np.array([-0.2463, -0.4644, -0.9756, 1.5176, 1.4414, 0.7866, 0.9897, 0.8521, 0.7983])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.2568, -0.4648, -0.9639, 1.5137, 1.4609, 0.7603, 0.9795, 0.8403, 0.7949])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-2
+                expected_slice = np.array([-0.2644, -0.4626, -0.9653, 1.5176, 1.4551, 0.7686, 0.9805, 0.8452, 0.8115])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False

@@ -369,5 +378,6 @@ class StableDiffusionInstructPix2PixPipelineSlowTests(unittest.TestCase):
        image_slice = image[255:258, 383:386, -1]

        assert image.shape == (504, 504, 3)
-        expected_slice = np.array([0.2726, 0.2529, 0.2664, 0.2655, 0.2641, 0.2642, 0.2591, 0.2649, 0.259])
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3
+        expected_slice = np.array([0.2726, 0.2529, 0.2664, 0.2655, 0.2641, 0.2642, 0.2591, 0.2649, 0.2590])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3
--- a/tests/pipelines/stable_diffusion/test_stable_diffusion_k_diffusion.py
+++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_k_diffusion.py
@@ -44,7 +44,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_scheduler("sample_euler")

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = sd_pipe([prompt], generator=generator, guidance_scale=9.0, num_inference_steps=20, output_type="np")

        image = output.images
@@ -52,7 +52,8 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1]

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.8887, 0.915, 0.91, 0.894, 0.909, 0.912, 0.919, 0.925, 0.883])
+        expected_slice = np.array([0.0447, 0.0492, 0.0468, 0.0408, 0.0383, 0.0408, 0.0354, 0.0380, 0.0339])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_2(self):
@@ -63,7 +64,7 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_scheduler("sample_euler")

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = sd_pipe([prompt], generator=generator, guidance_scale=9.0, num_inference_steps=20, output_type="np")

        image = output.images
@@ -71,7 +72,6 @@ class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1]

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array(
-            [0.826810, 0.81958747, 0.8510199, 0.8376758, 0.83958465, 0.8682068, 0.84370345, 0.85251087, 0.85884345]
-        )
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+        expected_slice = np.array([0.1237, 0.1320, 0.1438, 0.1359, 0.1390, 0.1132, 0.1277, 0.1175, 0.1112])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-1
--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
@@ -149,6 +149,7 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.5099, 0.5677, 0.4671, 0.5128, 0.5697, 0.4676, 0.5277, 0.4964, 0.4946])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_k_lms(self):
@@ -165,6 +166,7 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_k_euler_ancestral(self):
@@ -181,6 +183,7 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.4715, 0.5376, 0.4569, 0.5224, 0.5734, 0.4797, 0.5465, 0.5074, 0.5046])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_k_euler(self):
@@ -197,6 +200,7 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.4717, 0.5376, 0.4568, 0.5225, 0.5734, 0.4797, 0.5467, 0.5074, 0.5043])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_long_prompt(self):
@@ -246,8 +250,8 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
        inputs = {
@@ -340,14 +344,20 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-0.3857, -0.4507, -1.167, 0.074, -1.108, 0.7183, -1.822, 0.1915, 1.283])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-3
+                expected_slice = np.array(
+                    [-0.3862, -0.4507, -1.1729, 0.0686, -1.1045, 0.7124, -1.8301, 0.1903, 1.2773]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 64, 64)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([0.268, -0.2095, -0.7744, -0.541, -0.79, 0.3926, -0.7754, 0.465, 1.291])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-2
+                expected_slice = np.array(
+                    [0.2720, -0.1863, -0.7383, -0.5029, -0.7534, 0.3970, -0.7646, 0.4468, 1.2686]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False

@@ -392,8 +402,8 @@ class StableDiffusion2PipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
-        generator = torch.Generator(device=device).manual_seed(seed)
+    def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
+        generator = torch.Generator(device=generator_device).manual_seed(seed)
        latents = np.random.RandomState(seed).standard_normal((1, 4, 64, 64))
        latents = torch.from_numpy(latents).to(device=device, dtype=dtype)
        inputs = {

--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_depth.py
@@ -289,6 +289,7 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
            expected_slice = np.array([0.6071, 0.5035, 0.4378, 0.5776, 0.5753, 0.4316, 0.4513, 0.5263, 0.4546])
        else:
            expected_slice = np.array([0.6854, 0.3740, 0.4857, 0.7130, 0.7403, 0.5536, 0.4829, 0.6182, 0.5053])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_depth2img_negative_prompt(self):
@@ -309,6 +310,7 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
            expected_slice = np.array([0.5825, 0.5135, 0.4095, 0.5452, 0.6059, 0.4211, 0.3994, 0.5177, 0.4335])
        else:
            expected_slice = np.array([0.6074, 0.3096, 0.4802, 0.7463, 0.7388, 0.5393, 0.4531, 0.5928, 0.4972])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_depth2img_multiple_init_images(self):
@@ -330,6 +332,7 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
            expected_slice = np.array([0.6501, 0.5150, 0.4939, 0.6688, 0.5437, 0.5758, 0.5115, 0.4406, 0.4551])
        else:
            expected_slice = np.array([0.6681, 0.5023, 0.6611, 0.7605, 0.5724, 0.7959, 0.7240, 0.5871, 0.5383])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3

    def test_stable_diffusion_depth2img_num_images_per_prompt(self):
@@ -384,6 +387,7 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
            expected_slice = np.array([0.53232, 0.47015, 0.40868, 0.45651, 0.4891, 0.4668, 0.4287, 0.48822, 0.47439])
        else:
            expected_slice = np.array([0.6853, 0.3740, 0.4856, 0.7130, 0.7402, 0.5535, 0.4828, 0.6182, 0.5053])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-3


@@ -395,7 +399,7 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
+    def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
        generator = torch.Generator(device=device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
@@ -419,12 +423,13 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        pipe.set_progress_bar_config(disable=None)
        pipe.enable_attention_slicing()

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 480, 640, 3)
-        expected_slice = np.array([0.75446, 0.74692, 0.75951, 0.81611, 0.80593, 0.79992, 0.90529, 0.87921, 0.86903])
+        expected_slice = np.array([0.9057, 0.9365, 0.9258, 0.8937, 0.8555, 0.8541, 0.8260, 0.7747, 0.7421])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_depth2img_pipeline_k_lms(self):
@@ -436,12 +441,13 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        pipe.set_progress_bar_config(disable=None)
        pipe.enable_attention_slicing()

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 480, 640, 3)
-        expected_slice = np.array([0.63957, 0.64879, 0.65668, 0.64385, 0.67078, 0.63588, 0.66577, 0.62180, 0.66286])
+        expected_slice = np.array([0.6363, 0.6274, 0.6309, 0.6370, 0.6226, 0.6286, 0.6213, 0.6453, 0.6306])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_depth2img_pipeline_ddim(self):
@@ -453,12 +459,13 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        pipe.set_progress_bar_config(disable=None)
        pipe.enable_attention_slicing()

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images
        image_slice = image[0, 253:256, 253:256, -1].flatten()

        assert image.shape == (1, 480, 640, 3)
-        expected_slice = np.array([0.62840, 0.64191, 0.62953, 0.63653, 0.64205, 0.61574, 0.62252, 0.65827, 0.64809])
+        expected_slice = np.array([0.6424, 0.6524, 0.6249, 0.6041, 0.6634, 0.6420, 0.6522, 0.6555, 0.6436])
+
        assert np.abs(expected_slice - image_slice).max() < 1e-4

    def test_stable_diffusion_depth2img_intermediate_state(self):
@@ -472,14 +479,20 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 60, 80)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-1.148, -0.2079, -0.622, -2.477, -2.348, 0.3828, -2.055, -1.569, -1.526])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+                expected_slice = np.array(
+                    [-0.7168, -1.5137, -0.1418, -2.9219, -2.7266, -2.4414, -2.1035, -3.0078, -1.7051]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 2:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 60, 80)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array([-1.145, -0.2063, -0.6216, -2.469, -2.344, 0.3794, -2.05, -1.57, -1.521])
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-3
+                expected_slice = np.array(
+                    [-0.7109, -1.5068, -0.1403, -2.9160, -2.7207, -2.4414, -2.1035, -3.0059, -1.7090]
+                )
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        callback_fn.has_been_called = False

@@ -490,7 +503,7 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        pipe.set_progress_bar_config(disable=None)
        pipe.enable_attention_slicing()

-        inputs = self.get_inputs(torch_device, dtype=torch.float16)
+        inputs = self.get_inputs(dtype=torch.float16)
        pipe(**inputs, callback=callback_fn, callback_steps=1)
        assert callback_fn.has_been_called
        assert number_of_steps == 2
@@ -508,7 +521,7 @@ class StableDiffusionDepth2ImgPipelineSlowTests(unittest.TestCase):
        pipe.enable_attention_slicing(1)
        pipe.enable_sequential_cpu_offload()

-        inputs = self.get_inputs(torch_device, dtype=torch.float16)
+        inputs = self.get_inputs(dtype=torch.float16)
        _ = pipe(**inputs)

        mem_bytes = torch.cuda.max_memory_allocated()
@@ -524,7 +537,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        gc.collect()
        torch.cuda.empty_cache()

-    def get_inputs(self, device, dtype=torch.float32, seed=0):
+    def get_inputs(self, device="cpu", dtype=torch.float32, seed=0):
        generator = torch.Generator(device=device).manual_seed(seed)
        init_image = load_image(
            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/depth2img/two_cats.png"
@@ -545,7 +558,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images[0]

        expected_image = load_numpy(
@@ -561,7 +574,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images[0]

        expected_image = load_numpy(
@@ -577,7 +590,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        image = pipe(**inputs).images[0]

        expected_image = load_numpy(
@@ -593,7 +606,7 @@ class StableDiffusionImg2ImgPipelineNightlyTests(unittest.TestCase):
        pipe.to(torch_device)
        pipe.set_progress_bar_config(disable=None)

-        inputs = self.get_inputs(torch_device)
+        inputs = self.get_inputs()
        inputs["num_inference_steps"] = 30
        image = pipe(**inputs).images[0]


--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py
@@ -158,7 +158,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):

        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            image=init_image,
@@ -196,7 +196,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):

        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            image=init_image,
@@ -237,7 +237,7 @@ class StableDiffusionInpaintPipelineIntegrationTests(unittest.TestCase):

        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        _ = pipe(
            prompt=prompt,
            image=init_image,

--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
@@ -241,7 +241,7 @@ class StableDiffusionUpscalePipelineFastTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        image = sd_pipe(
            [prompt],
            image=low_res_image,
@@ -281,7 +281,7 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):

        prompt = "a cat sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            image=image,
@@ -314,7 +314,7 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):

        prompt = "a cat sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(
            prompt=prompt,
            image=image,
@@ -348,7 +348,7 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):

        prompt = "a cat sitting on a park bench"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        _ = pipe(
            prompt=prompt,
            image=image,

--- a/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
+++ b/tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
@@ -194,6 +194,7 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.4616, 0.5184, 0.4887, 0.5111, 0.4839, 0.48, 0.5119, 0.5263, 0.4776])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2

@@ -233,7 +234,7 @@ class StableDiffusion2VPredictionPipelineFastTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        image = sd_pipe([prompt], generator=generator, num_inference_steps=2, output_type="np").images

        assert image.shape == (1, 64, 64, 3)
@@ -255,14 +256,15 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = sd_pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=20, output_type="np")

        image = output.images
        image_slice = image[0, 253:256, 253:256, -1]

        assert image.shape == (1, 768, 768, 3)
-        expected_slice = np.array([0.0567, 0.057, 0.0416, 0.0463, 0.0433, 0.06, 0.0517, 0.0526, 0.0866])
+        expected_slice = np.array([0.1868, 0.1922, 0.1527, 0.1921, 0.1908, 0.1624, 0.1779, 0.1652, 0.1734])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_v_pred_upcast_attention(self):
@@ -274,15 +276,16 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = sd_pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=20, output_type="np")

        image = output.images
        image_slice = image[0, 253:256, 253:256, -1]

        assert image.shape == (1, 768, 768, 3)
-        expected_slice = np.array([0.0461, 0.0483, 0.0566, 0.0512, 0.0446, 0.0751, 0.0664, 0.0551, 0.0488])
-        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
+        expected_slice = np.array([0.4209, 0.4087, 0.4097, 0.4209, 0.3860, 0.4329, 0.4280, 0.4324, 0.4187])
+
+        assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-2

    def test_stable_diffusion_v_pred_euler(self):
        scheduler = EulerDiscreteScheduler.from_pretrained("stabilityai/stable-diffusion-2", subfolder="scheduler")
@@ -292,7 +295,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)

        output = sd_pipe([prompt], generator=generator, num_inference_steps=5, output_type="numpy")
        image = output.images
@@ -300,7 +303,8 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1]

        assert image.shape == (1, 768, 768, 3)
-        expected_slice = np.array([0.0351, 0.0376, 0.0505, 0.0424, 0.0551, 0.0656, 0.0471, 0.0276, 0.0596])
+        expected_slice = np.array([0.1781, 0.1695, 0.1661, 0.1705, 0.1588, 0.1699, 0.2005, 0.1589, 0.1677])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_v_pred_dpm(self):
@@ -316,14 +320,15 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "a photograph of an astronaut riding a horse"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        image = sd_pipe(
            [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=5, output_type="numpy"
        ).images

        image_slice = image[0, 253:256, 253:256, -1]
        assert image.shape == (1, 768, 768, 3)
-        expected_slice = np.array([0.2049, 0.2115, 0.2323, 0.2416, 0.256, 0.2484, 0.2517, 0.2358, 0.236])
+        expected_slice = np.array([0.3303, 0.3184, 0.3291, 0.3300, 0.3256, 0.3113, 0.2965, 0.3134, 0.3192])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_stable_diffusion_attention_slicing_v_pred(self):
@@ -337,12 +342,11 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):

        # make attention efficient
        pipe.enable_attention_slicing()
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            output_chunked = pipe(
-                [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy"
-            )
-            image_chunked = output_chunked.images
+        generator = torch.manual_seed(0)
+        output_chunked = pipe(
+            [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy"
+        )
+        image_chunked = output_chunked.images

        mem_bytes = torch.cuda.max_memory_allocated()
        torch.cuda.reset_peak_memory_stats()
@@ -351,12 +355,9 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):

        # disable slicing
        pipe.disable_attention_slicing()
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            output = pipe(
-                [prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy"
-            )
-            image = output.images
+        generator = torch.manual_seed(0)
+        output = pipe([prompt], generator=generator, guidance_scale=7.5, num_inference_steps=10, output_type="numpy")
+        image = output.images

        # make sure that more than 5.5 GB is allocated
        mem_bytes = torch.cuda.max_memory_allocated()
@@ -376,12 +377,12 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):

        prompt = "astronaut riding a horse"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(prompt=prompt, guidance_scale=7.5, generator=generator, output_type="np")
        image = output.images[0]

        assert image.shape == (768, 768, 3)
-        assert np.abs(expected_image - image).max() < 5e-3
+        assert np.abs(expected_image - image).max() < 7.5e-2

    def test_stable_diffusion_text2img_pipeline_v_pred_fp16(self):
        expected_image = load_numpy(
@@ -395,12 +396,12 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):

        prompt = "astronaut riding a horse"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        output = pipe(prompt=prompt, guidance_scale=7.5, generator=generator, output_type="np")
        image = output.images[0]

        assert image.shape == (768, 768, 3)
-        assert np.abs(expected_image - image).max() < 5e-1
+        assert np.abs(expected_image - image).max() < 7.5e-1

    def test_stable_diffusion_text2img_intermediate_state_v_pred(self):
        number_of_steps = 0
@@ -413,18 +414,16 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 96, 96)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array(
-                    [-0.2543, -1.2755, 0.4261, -0.9555, -1.173, -0.5892, 2.4159, 0.1554, -1.2098]
-                )
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-3
+                expected_slice = np.array([0.7749, 0.0325, 0.5088, 0.1619, 0.3372, 0.3667, -0.5186, 0.6860, 1.4326])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2
            elif step == 19:
                latents = latents.detach().cpu().numpy()
                assert latents.shape == (1, 4, 96, 96)
                latents_slice = latents[0, -3:, -3:, -1]
-                expected_slice = np.array(
-                    [-0.9572, -0.967, -0.6152, 0.0894, -0.699, -0.2344, 1.5465, -0.0357, -0.1141]
-                )
-                assert np.abs(latents_slice.flatten() - expected_slice).max() < 1e-2
+                expected_slice = np.array([1.3887, 1.0273, 1.7266, 0.0726, 0.6611, 0.1598, -1.0547, 0.1522, 0.0227])
+
+                assert np.abs(latents_slice.flatten() - expected_slice).max() < 5e-2

        test_callback_fn.has_been_called = False

@@ -435,16 +434,15 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):

        prompt = "Andromeda galaxy in a bottle"

-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        with torch.autocast(torch_device):
-            pipe(
-                prompt=prompt,
-                num_inference_steps=20,
-                guidance_scale=7.5,
-                generator=generator,
-                callback=test_callback_fn,
-                callback_steps=1,
-            )
+        generator = torch.manual_seed(0)
+        pipe(
+            prompt=prompt,
+            num_inference_steps=20,
+            guidance_scale=7.5,
+            generator=generator,
+            callback=test_callback_fn,
+            callback_steps=1,
+        )
        assert test_callback_fn.has_been_called
        assert number_of_steps == 20

@@ -475,7 +473,7 @@ class StableDiffusion2VPredictionPipelineIntegrationTests(unittest.TestCase):
        pipeline.enable_attention_slicing(1)
        pipeline.enable_sequential_cpu_offload()

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        _ = pipeline(prompt, generator=generator, num_inference_steps=5)

        mem_bytes = torch.cuda.max_memory_allocated()

--- a/tests/pipelines/stable_diffusion_safe/test_safe_diffusion.py
+++ b/tests/pipelines/stable_diffusion_safe/test_safe_diffusion.py
@@ -23,7 +23,7 @@ import torch

 from diffusers import AutoencoderKL, DDIMScheduler, LMSDiscreteScheduler, PNDMScheduler, UNet2DConditionModel
 from diffusers.pipelines.stable_diffusion_safe import StableDiffusionPipelineSafe as StableDiffusionPipeline
-from diffusers.utils import floats_tensor, slow, torch_device
+from diffusers.utils import floats_tensor, nightly, torch_device
 from diffusers.utils.testing_utils import require_torch_gpu
 from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

@@ -201,6 +201,7 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):

        assert image.shape == (1, 64, 64, 3)
        expected_slice = np.array([0.5095, 0.5674, 0.4668, 0.5126, 0.5697, 0.4675, 0.5278, 0.4964, 0.4945])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
        assert np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2

@@ -253,13 +254,12 @@ class SafeDiffusionPipelineFastTests(unittest.TestCase):
        sd_pipe.set_progress_bar_config(disable=None)

        prompt = "A painting of a squirrel eating a burger"
-        generator = torch.Generator(device=torch_device).manual_seed(0)
-        image = sd_pipe([prompt], generator=generator, num_inference_steps=2, output_type="np").images
+        image = sd_pipe([prompt], num_inference_steps=2, output_type="np").images

        assert image.shape == (1, 64, 64, 3)


-@slow
+@nightly
 @require_torch_gpu
 class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
    def tearDown(self):
@@ -284,7 +284,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        guidance_scale = 7

        # without safety guidance (sld_guidance_scale = 0)
-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -301,10 +301,11 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        expected_slice = [0.2278, 0.2231, 0.2249, 0.2333, 0.2303, 0.1885, 0.2273, 0.2144, 0.2176]

        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

        # without safety guidance (strong configuration)
-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -325,6 +326,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        expected_slice = [0.2383, 0.2276, 0.236, 0.2192, 0.2186, 0.2053, 0.1971, 0.1901, 0.1719]

        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_nudity_safe_stable_diffusion(self):
@@ -337,7 +339,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        seed = 2734971755
        guidance_scale = 7

-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -354,9 +356,10 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        expected_slice = [0.3502, 0.3622, 0.3396, 0.3642, 0.3478, 0.3318, 0.35, 0.3348, 0.3297]

        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -377,6 +380,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        expected_slice = [0.5531, 0.5206, 0.4895, 0.5156, 0.5182, 0.4751, 0.4802, 0.4803, 0.4443]

        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2

    def test_nudity_safetychecker_safe_stable_diffusion(self):
@@ -391,7 +395,7 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        seed = 1044355234
        guidance_scale = 12

-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -408,9 +412,10 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        expected_slice = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0])

        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-7

-        generator = torch.Generator(device=torch_device).manual_seed(seed)
+        generator = torch.manual_seed(seed)
        output = sd_pipe(
            [prompt],
            generator=generator,
@@ -430,4 +435,5 @@ class SafeDiffusionPipelineIntegrationTests(unittest.TestCase):
        image_slice = image[0, -3:, -3:, -1]
        expected_slice = np.array([0.5818, 0.6285, 0.6835, 0.6019, 0.625, 0.6754, 0.6096, 0.6334, 0.6561])
        assert image.shape == (1, 512, 512, 3)
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2
--- a/tests/pipelines/unclip/test_unclip.py
+++ b/tests/pipelines/unclip/test_unclip.py
@@ -460,11 +460,9 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase):
        pipe.enable_attention_slicing()
        pipe.enable_sequential_cpu_offload()

-        generator = torch.Generator(device=torch_device).manual_seed(0)
        _ = pipe(
            "horse",
            num_images_per_prompt=1,
-            generator=generator,
            prior_num_inference_steps=2,
            decoder_num_inference_steps=2,
            super_res_num_inference_steps=2,

--- a/tests/pipelines/versatile_diffusion/test_versatile_diffusion_dual_guided.py
+++ b/tests/pipelines/versatile_diffusion/test_versatile_diffusion_dual_guided.py
@@ -51,7 +51,7 @@ class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
            "https://raw.githubusercontent.com/SHI-Labs/Versatile-Diffusion/master/assets/benz.jpg"
        )

-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        image = pipe(
            prompt="first prompt",
            image=second_prompt,
@@ -92,7 +92,7 @@ class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
        second_prompt = load_image(
            "https://raw.githubusercontent.com/SHI-Labs/Versatile-Diffusion/master/assets/benz.jpg"
        )
-        generator = torch.Generator(device=torch_device).manual_seed(0)
+        generator = torch.manual_seed(0)
        image = pipe(
            prompt=first_prompt,
            image=second_prompt,
@@ -106,5 +106,6 @@ class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
        image_slice = image[0, 253:256, 253:256, -1]

        assert image.shape == (1, 512, 512, 3)
-        expected_slice = np.array([0.014, 0.0112, 0.0136, 0.0145, 0.0107, 0.0113, 0.0272, 0.0215, 0.0216])
+        expected_slice = np.array([0.0787, 0.0849, 0.0826, 0.0812, 0.0807, 0.0795, 0.0818, 0.0798, 0.0779])
+
        assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-2