"vscode:/vscode.git/clone" did not exist on "ef757da2c90ad52f35c95688095dfd84655cceb7"
Unverified Commit 7855ac59 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[tests] make tests device-agnostic (part 4) (#10508)



* initial comit

* fix empty cache

* fix one more

* fix style

* update device functions

* update

* update

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* with gc.collect

* update

* make style

* check_torch_dependencies

* add mps empty cache

* add changes

* bug fix

* enable on xpu

* update more cases

* revert

* revert back

* Update test_stable_diffusion_xl.py

* Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Apply suggestions from code review
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* add test marker

---------
Co-authored-by: default avatarhlky <hlky@hlky.ac>
parent 30cef6bf
...@@ -33,11 +33,12 @@ from diffusers import ( ...@@ -33,11 +33,12 @@ from diffusers import (
) )
from diffusers.utils.import_utils import is_accelerate_available from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
load_image, load_image,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_peft_backend, require_peft_backend,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -101,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): ...@@ -101,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
# Keeping this test here makes sense because it doesn't look any integration # Keeping this test here makes sense because it doesn't look any integration
# (value assertions on logits). # (value assertions on logits).
@slow @slow
@require_torch_gpu @require_torch_accelerator
def test_integration_move_lora_cpu(self): def test_integration_move_lora_cpu(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5" path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target" lora_id = "takuma104/lora-test-text-encoder-lora-target"
...@@ -158,7 +159,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): ...@@ -158,7 +159,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
self.assertTrue(m.weight.device != torch.device("cpu")) self.assertTrue(m.weight.device != torch.device("cpu"))
@slow @slow
@require_torch_gpu @require_torch_accelerator
def test_integration_move_lora_dora_cpu(self): def test_integration_move_lora_dora_cpu(self):
from peft import LoraConfig from peft import LoraConfig
...@@ -209,18 +210,18 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase): ...@@ -209,18 +210,18 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
@slow @slow
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
@require_peft_backend @require_peft_backend
class LoraIntegrationTests(unittest.TestCase): class LoraIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_integration_logits_with_scale(self): def test_integration_logits_with_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5" path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
...@@ -378,7 +379,7 @@ class LoraIntegrationTests(unittest.TestCase): ...@@ -378,7 +379,7 @@ class LoraIntegrationTests(unittest.TestCase):
generator = torch.Generator().manual_seed(0) generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors" lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
...@@ -400,7 +401,7 @@ class LoraIntegrationTests(unittest.TestCase): ...@@ -400,7 +401,7 @@ class LoraIntegrationTests(unittest.TestCase):
generator = torch.Generator().manual_seed(0) generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None) pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload() pipe.enable_sequential_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora" lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors" lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename) pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
...@@ -656,7 +657,7 @@ class LoraIntegrationTests(unittest.TestCase): ...@@ -656,7 +657,7 @@ class LoraIntegrationTests(unittest.TestCase):
See: https://github.com/huggingface/diffusers/issues/5606 See: https://github.com/huggingface/diffusers/issues/5606
""" """
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5") pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipeline.enable_sequential_cpu_offload() pipeline.enable_sequential_cpu_offload(device=torch_device)
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors") civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri") pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
......
...@@ -30,12 +30,13 @@ from diffusers import ( ...@@ -30,12 +30,13 @@ from diffusers import (
from diffusers.utils import load_image from diffusers.utils import load_image
from diffusers.utils.import_utils import is_accelerate_available from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
is_flaky, is_flaky,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_gpu_with_torch_cuda,
require_peft_backend, require_peft_backend,
require_torch_gpu, require_torch_accelerator,
torch_device, torch_device,
) )
...@@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): ...@@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
def output_shape(self): def output_shape(self):
return (1, 32, 32, 3) return (1, 32, 32, 3)
@require_torch_gpu @require_torch_accelerator
def test_sd3_lora(self): def test_sd3_lora(self):
""" """
Test loading the loras that are saved with the diffusers and peft formats. Test loading the loras that are saved with the diffusers and peft formats.
...@@ -135,7 +136,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests): ...@@ -135,7 +136,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
@require_peft_backend @require_peft_backend
@require_big_gpu_with_torch_cuda @require_big_gpu_with_torch_cuda
@pytest.mark.big_gpu_with_torch_cuda @pytest.mark.big_gpu_with_torch_cuda
...@@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase): ...@@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0): def get_inputs(self, device, seed=0):
init_image = load_image( init_image = load_image(
......
...@@ -36,6 +36,9 @@ from diffusers.utils import logging ...@@ -36,6 +36,9 @@ from diffusers.utils import logging
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache, backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
is_peft_available, is_peft_available,
...@@ -1002,7 +1005,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1002,7 +1005,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local(self): def test_load_sharded_checkpoint_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common() _, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy") ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
...@@ -1013,7 +1016,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1013,7 +1016,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local_subfolder(self): def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common() _, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder") ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
...@@ -1024,7 +1027,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1024,7 +1027,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
@parameterized.expand( @parameterized.expand(
[ [
("hf-internal-testing/unet2d-sharded-dummy", None), ("hf-internal-testing/unet2d-sharded-dummy", None),
...@@ -1039,7 +1042,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1039,7 +1042,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
@parameterized.expand( @parameterized.expand(
[ [
("hf-internal-testing/unet2d-sharded-dummy-subfolder", None), ("hf-internal-testing/unet2d-sharded-dummy-subfolder", None),
...@@ -1054,7 +1057,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1054,7 +1057,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local(self): def test_load_sharded_checkpoint_device_map_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common() _, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy") ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
...@@ -1064,7 +1067,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test ...@@ -1064,7 +1067,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16) assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu @require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self): def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common() _, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder") ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
...@@ -1164,11 +1167,11 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): ...@@ -1164,11 +1167,11 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
return model return model
@require_torch_gpu @require_torch_accelerator
def test_set_attention_slice_auto(self): def test_set_attention_slice_auto(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model() unet = self.get_unet_model()
unet.set_attention_slice("auto") unet.set_attention_slice("auto")
...@@ -1180,15 +1183,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): ...@@ -1180,15 +1183,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad(): with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample _ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9 assert mem_bytes < 5 * 10**9
@require_torch_gpu @require_torch_accelerator
def test_set_attention_slice_max(self): def test_set_attention_slice_max(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model() unet = self.get_unet_model()
unet.set_attention_slice("max") unet.set_attention_slice("max")
...@@ -1200,15 +1203,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): ...@@ -1200,15 +1203,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad(): with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample _ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9 assert mem_bytes < 5 * 10**9
@require_torch_gpu @require_torch_accelerator
def test_set_attention_slice_int(self): def test_set_attention_slice_int(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model() unet = self.get_unet_model()
unet.set_attention_slice(2) unet.set_attention_slice(2)
...@@ -1220,15 +1223,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): ...@@ -1220,15 +1223,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad(): with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample _ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9 assert mem_bytes < 5 * 10**9
@require_torch_gpu @require_torch_accelerator
def test_set_attention_slice_list(self): def test_set_attention_slice_list(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
# there are 32 sliceable layers # there are 32 sliceable layers
slice_list = 16 * [2, 3] slice_list = 16 * [2, 3]
...@@ -1242,7 +1245,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase): ...@@ -1242,7 +1245,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad(): with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample _ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9 assert mem_bytes < 5 * 10**9
......
...@@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout): ...@@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
pipe = StableDiffusionControlNetPipeline.from_pretrained( pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet "stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
) )
pipe.to("cuda") pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last) pipe.unet.to(memory_format=torch.channels_last)
......
...@@ -40,7 +40,7 @@ from diffusers.utils.import_utils import is_xformers_available ...@@ -40,7 +40,7 @@ from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
require_torch_gpu, require_torch_accelerator,
torch_device, torch_device,
) )
...@@ -245,7 +245,7 @@ class ControlNetPipelineSDXLFastTests( ...@@ -245,7 +245,7 @@ class ControlNetPipelineSDXLFastTests(
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=2e-3) self._test_inference_batch_single_identical(expected_max_diff=2e-3)
@require_torch_gpu @require_torch_accelerator
def test_stable_diffusion_xl_offloads(self): def test_stable_diffusion_xl_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -254,12 +254,12 @@ class ControlNetPipelineSDXLFastTests( ...@@ -254,12 +254,12 @@ class ControlNetPipelineSDXLFastTests(
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
......
...@@ -223,12 +223,12 @@ class StableDiffusionXLControlNetPipelineFastTests( ...@@ -223,12 +223,12 @@ class StableDiffusionXLControlNetPipelineFastTests(
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
......
...@@ -31,6 +31,7 @@ from diffusers import ( ...@@ -31,6 +31,7 @@ from diffusers import (
from diffusers.models import FluxControlNetModel from diffusers.models import FluxControlNetModel
from diffusers.utils import load_image from diffusers.utils import load_image
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
...@@ -217,12 +218,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase): ...@@ -217,12 +218,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_canny(self): def test_canny(self):
controlnet = FluxControlNetModel.from_pretrained( controlnet = FluxControlNetModel.from_pretrained(
......
...@@ -239,7 +239,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase): ...@@ -239,7 +239,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained( pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16 "stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
......
...@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPToken ...@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPToken
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
nightly, nightly,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda, require_big_gpu_with_torch_cuda,
...@@ -212,12 +213,12 @@ class FluxPipelineSlowTests(unittest.TestCase): ...@@ -212,12 +213,12 @@ class FluxPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0): def get_inputs(self, device, seed=0):
generator = torch.Generator(device="cpu").manual_seed(seed) generator = torch.Generator(device="cpu").manual_seed(seed)
......
...@@ -34,11 +34,12 @@ from diffusers import ( ...@@ -34,11 +34,12 @@ from diffusers import (
from diffusers.image_processor import IPAdapterMaskProcessor from diffusers.image_processor import IPAdapterMaskProcessor
from diffusers.utils import load_image from diffusers.utils import load_image
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
is_flaky, is_flaky,
load_pt, load_pt,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -54,13 +55,13 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase): ...@@ -54,13 +55,13 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_image_encoder(self, repo_id, subfolder): def get_image_encoder(self, repo_id, subfolder):
image_encoder = CLIPVisionModelWithProjection.from_pretrained( image_encoder = CLIPVisionModelWithProjection.from_pretrained(
...@@ -165,7 +166,7 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase): ...@@ -165,7 +166,7 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin): class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image(self): def test_text_to_image(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder") image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
...@@ -280,7 +281,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -280,7 +281,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
inputs = self.get_dummy_inputs() inputs = self.get_dummy_inputs()
output_without_offload = pipeline(**inputs).images output_without_offload = pipeline(**inputs).images
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs() inputs = self.get_dummy_inputs()
output_with_offload = pipeline(**inputs).images output_with_offload = pipeline(**inputs).images
max_diff = np.abs(output_with_offload - output_without_offload).max() max_diff = np.abs(output_with_offload - output_without_offload).max()
...@@ -391,7 +392,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -391,7 +392,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image_sdxl(self): def test_text_to_image_sdxl(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="sdxl_models/image_encoder") image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="sdxl_models/image_encoder")
...@@ -403,7 +404,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -403,7 +404,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs() inputs = self.get_dummy_inputs()
...@@ -461,7 +462,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -461,7 +462,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_image_to_image=True) inputs = self.get_dummy_inputs(for_image_to_image=True)
...@@ -530,7 +531,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -530,7 +531,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor, feature_extractor=feature_extractor,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin") pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_inpainting=True) inputs = self.get_dummy_inputs(for_inpainting=True)
...@@ -578,7 +579,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -578,7 +579,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder, image_encoder=image_encoder,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter( pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors" "h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
) )
...@@ -606,7 +607,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -606,7 +607,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder, image_encoder=image_encoder,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter( pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2 "h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
) )
...@@ -633,7 +634,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -633,7 +634,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
pipeline = StableDiffusionXLPipeline.from_pretrained( pipeline = StableDiffusionXLPipeline.from_pretrained(
"RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16" "RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16"
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter( pipeline.load_ip_adapter(
["ostris/ip-composition-adapter", "h94/IP-Adapter"], ["ostris/ip-composition-adapter", "h94/IP-Adapter"],
...@@ -674,7 +675,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin): ...@@ -674,7 +675,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder, image_encoder=image_encoder,
torch_dtype=self.dtype, torch_dtype=self.dtype,
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter( pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] "h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"]
) )
......
...@@ -24,10 +24,11 @@ from transformers import XLMRobertaTokenizerFast ...@@ -24,10 +24,11 @@ from transformers import XLMRobertaTokenizerFast
from diffusers import DDIMScheduler, KandinskyPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel from diffusers import DDIMScheduler, KandinskyPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_numpy, load_numpy,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -246,7 +247,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -246,7 +247,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -255,12 +256,12 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -255,12 +256,12 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -275,19 +276,19 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -275,19 +276,19 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class KandinskyPipelineIntegrationTests(unittest.TestCase): class KandinskyPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_text2img(self): def test_kandinsky_text2img(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -306,7 +307,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase): ...@@ -306,7 +307,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
prompt = "red cat, 4k photo" prompt = "red cat, 4k photo"
generator = torch.Generator(device="cuda").manual_seed(0) generator = torch.Generator(device=torch_device).manual_seed(0)
image_emb, zero_image_emb = pipe_prior( image_emb, zero_image_emb = pipe_prior(
prompt, prompt,
generator=generator, generator=generator,
...@@ -314,7 +315,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase): ...@@ -314,7 +315,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
negative_prompt="", negative_prompt="",
).to_tuple() ).to_tuple()
generator = torch.Generator(device="cuda").manual_seed(0) generator = torch.Generator(device=torch_device).manual_seed(0)
output = pipeline( output = pipeline(
prompt, prompt,
image_embeds=image_emb, image_embeds=image_emb,
......
...@@ -18,7 +18,7 @@ import unittest ...@@ -18,7 +18,7 @@ import unittest
import numpy as np import numpy as np
from diffusers import KandinskyCombinedPipeline, KandinskyImg2ImgCombinedPipeline, KandinskyInpaintCombinedPipeline from diffusers import KandinskyCombinedPipeline, KandinskyImg2ImgCombinedPipeline, KandinskyInpaintCombinedPipeline
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies from .test_kandinsky import Dummies
...@@ -105,7 +105,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase) ...@@ -105,7 +105,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -114,12 +114,12 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase) ...@@ -114,12 +114,12 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -213,7 +213,7 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te ...@@ -213,7 +213,7 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -222,12 +222,12 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te ...@@ -222,12 +222,12 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -325,7 +325,7 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te ...@@ -325,7 +325,7 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -334,12 +334,12 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te ...@@ -334,12 +334,12 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
......
...@@ -32,12 +32,13 @@ from diffusers import ( ...@@ -32,12 +32,13 @@ from diffusers import (
) )
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -267,7 +268,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -267,7 +268,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -299,19 +300,19 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -299,19 +300,19 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase): class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_img2img(self): def test_kandinsky_img2img(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -365,19 +366,19 @@ class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase): ...@@ -365,19 +366,19 @@ class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class KandinskyImg2ImgPipelineNightlyTests(unittest.TestCase): class KandinskyImg2ImgPipelineNightlyTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_img2img_ddpm(self): def test_kandinsky_img2img_ddpm(self):
expected_image = load_numpy( expected_image = load_numpy(
......
...@@ -25,12 +25,13 @@ from transformers import XLMRobertaTokenizerFast ...@@ -25,12 +25,13 @@ from transformers import XLMRobertaTokenizerFast
from diffusers import DDIMScheduler, KandinskyInpaintPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel from diffusers import DDIMScheduler, KandinskyInpaintPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_accelerator,
torch_device, torch_device,
) )
...@@ -265,7 +266,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -265,7 +266,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=3e-3) super().test_inference_batch_single_identical(expected_max_diff=3e-3)
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -274,12 +275,12 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -274,12 +275,12 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -297,19 +298,19 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -297,19 +298,19 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase): class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_inpaint(self): def test_kandinsky_inpaint(self):
expected_image = load_numpy( expected_image = load_numpy(
......
...@@ -22,12 +22,14 @@ import torch ...@@ -22,12 +22,14 @@ import torch
from diffusers import DDIMScheduler, KandinskyV22Pipeline, KandinskyV22PriorPipeline, UNet2DConditionModel, VQModel from diffusers import DDIMScheduler, KandinskyV22Pipeline, KandinskyV22PriorPipeline, UNet2DConditionModel, VQModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_numpy, load_numpy,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device,
) )
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
...@@ -221,19 +223,19 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -221,19 +223,19 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class KandinskyV22PipelineIntegrationTests(unittest.TestCase): class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_text2img(self): def test_kandinsky_text2img(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -244,12 +246,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase): ...@@ -244,12 +246,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained( pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16 "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
) )
pipe_prior.enable_model_cpu_offload() pipe_prior.enable_model_cpu_offload(device=torch_device)
pipeline = KandinskyV22Pipeline.from_pretrained( pipeline = KandinskyV22Pipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16 "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.set_progress_bar_config(disable=None) pipeline.set_progress_bar_config(disable=None)
prompt = "red cat, 4k photo" prompt = "red cat, 4k photo"
......
...@@ -22,7 +22,7 @@ from diffusers import ( ...@@ -22,7 +22,7 @@ from diffusers import (
KandinskyV22Img2ImgCombinedPipeline, KandinskyV22Img2ImgCombinedPipeline,
KandinskyV22InpaintCombinedPipeline, KandinskyV22InpaintCombinedPipeline,
) )
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies from .test_kandinsky import Dummies
...@@ -110,7 +110,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -110,7 +110,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -119,12 +119,12 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -119,12 +119,12 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -234,7 +234,7 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest ...@@ -234,7 +234,7 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -243,12 +243,12 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest ...@@ -243,12 +243,12 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
...@@ -357,7 +357,7 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest ...@@ -357,7 +357,7 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2 np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}" ), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu @require_torch_accelerator
def test_offloads(self): def test_offloads(self):
pipes = [] pipes = []
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -366,12 +366,12 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest ...@@ -366,12 +366,12 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload() sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
components = self.get_dummy_components() components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components) sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload() sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe) pipes.append(sd_pipe)
image_slices = [] image_slices = []
......
...@@ -29,13 +29,15 @@ from diffusers import ( ...@@ -29,13 +29,15 @@ from diffusers import (
VQModel, VQModel,
) )
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device,
) )
from ..test_pipelines_common import PipelineTesterMixin from ..test_pipelines_common import PipelineTesterMixin
...@@ -238,19 +240,19 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -238,19 +240,19 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow @slow
@require_torch_gpu @require_torch_accelerator
class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase): class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_img2img(self): def test_kandinsky_img2img(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -266,12 +268,12 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase): ...@@ -266,12 +268,12 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained( pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16 "kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
) )
pipe_prior.enable_model_cpu_offload() pipe_prior.enable_model_cpu_offload(device=torch_device)
pipeline = KandinskyV22Img2ImgPipeline.from_pretrained( pipeline = KandinskyV22Img2ImgPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16 "kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
) )
pipeline.enable_model_cpu_offload() pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.set_progress_bar_config(disable=None) pipeline.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
......
...@@ -29,13 +29,14 @@ from diffusers import ( ...@@ -29,13 +29,14 @@ from diffusers import (
VQModel, VQModel,
) )
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
is_flaky, is_flaky,
load_image, load_image,
load_numpy, load_numpy,
numpy_cosine_similarity_distance, numpy_cosine_similarity_distance,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -292,19 +293,19 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas ...@@ -292,19 +293,19 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow @slow
@require_torch_gpu @require_torch_accelerator
class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase): class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinsky_inpaint(self): def test_kandinsky_inpaint(self):
expected_image = load_numpy( expected_image = load_numpy(
......
...@@ -31,10 +31,12 @@ from diffusers import ( ...@@ -31,10 +31,12 @@ from diffusers import (
from diffusers.image_processor import VaeImageProcessor from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
load_image, load_image,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device,
) )
from ..pipeline_params import ( from ..pipeline_params import (
...@@ -167,25 +169,25 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase): ...@@ -167,25 +169,25 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow @slow
@require_torch_gpu @require_torch_accelerator
class Kandinsky3PipelineIntegrationTests(unittest.TestCase): class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinskyV3(self): def test_kandinskyV3(self):
pipe = AutoPipelineForText2Image.from_pretrained( pipe = AutoPipelineForText2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16 "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background." prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
...@@ -211,7 +213,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase): ...@@ -211,7 +213,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
pipe = AutoPipelineForImage2Image.from_pretrained( pipe = AutoPipelineForImage2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16 "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
......
...@@ -31,10 +31,11 @@ from diffusers import ( ...@@ -31,10 +31,11 @@ from diffusers import (
from diffusers.image_processor import VaeImageProcessor from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
require_torch_gpu, require_torch_accelerator,
slow, slow,
torch_device, torch_device,
) )
...@@ -192,25 +193,25 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase) ...@@ -192,25 +193,25 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
@slow @slow
@require_torch_gpu @require_torch_accelerator
class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase): class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_kandinskyV3_img2img(self): def test_kandinskyV3_img2img(self):
pipe = AutoPipelineForImage2Image.from_pretrained( pipe = AutoPipelineForImage2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16 "kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
) )
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment