Unverified Commit 7855ac59 authored by Fanli Lin's avatar Fanli Lin Committed by GitHub
Browse files

[tests] make tests device-agnostic (part 4) (#10508)



* initial comit

* fix empty cache

* fix one more

* fix style

* update device functions

* update

* update

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update src/diffusers/utils/testing_utils.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/controlnet/test_controlnet.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* with gc.collect

* update

* make style

* check_torch_dependencies

* add mps empty cache

* add changes

* bug fix

* enable on xpu

* update more cases

* revert

* revert back

* Update test_stable_diffusion_xl.py

* Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Update tests/pipelines/stable_diffusion/test_stable_diffusion_img2img.py
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* Apply suggestions from code review
Co-authored-by: default avatarhlky <hlky@hlky.ac>

* add test marker

---------
Co-authored-by: default avatarhlky <hlky@hlky.ac>
parent 30cef6bf
......@@ -33,11 +33,12 @@ from diffusers import (
)
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
load_image,
nightly,
numpy_cosine_similarity_distance,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -101,7 +102,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
# Keeping this test here makes sense because it doesn't look any integration
# (value assertions on logits).
@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_cpu(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
lora_id = "takuma104/lora-test-text-encoder-lora-target"
......@@ -158,7 +159,7 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
self.assertTrue(m.weight.device != torch.device("cpu"))
@slow
@require_torch_gpu
@require_torch_accelerator
def test_integration_move_lora_dora_cpu(self):
from peft import LoraConfig
......@@ -209,18 +210,18 @@ class StableDiffusionLoRATests(PeftLoraLoaderMixinTests, unittest.TestCase):
@slow
@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
class LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_integration_logits_with_scale(self):
path = "stable-diffusion-v1-5/stable-diffusion-v1-5"
......@@ -378,7 +379,7 @@ class LoraIntegrationTests(unittest.TestCase):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
......@@ -400,7 +401,7 @@ class LoraIntegrationTests(unittest.TestCase):
generator = torch.Generator().manual_seed(0)
pipe = StableDiffusionPipeline.from_pretrained("hf-internal-testing/Counterfeit-V2.5", safety_checker=None)
pipe.enable_sequential_cpu_offload()
pipe.enable_sequential_cpu_offload(device=torch_device)
lora_model_id = "hf-internal-testing/civitai-light-shadow-lora"
lora_filename = "light_and_shadow.safetensors"
pipe.load_lora_weights(lora_model_id, weight_name=lora_filename)
......@@ -656,7 +657,7 @@ class LoraIntegrationTests(unittest.TestCase):
See: https://github.com/huggingface/diffusers/issues/5606
"""
pipeline = StableDiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
pipeline.enable_sequential_cpu_offload()
pipeline.enable_sequential_cpu_offload(device=torch_device)
civitai_path = hf_hub_download("ybelkada/test-ahi-civitai", "ahi_lora_weights.safetensors")
pipeline.load_lora_weights(civitai_path, adapter_name="ahri")
......
......@@ -30,12 +30,13 @@ from diffusers import (
from diffusers.utils import load_image
from diffusers.utils.import_utils import is_accelerate_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
is_flaky,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
require_peft_backend,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)
......@@ -93,7 +94,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
def output_shape(self):
return (1, 32, 32, 3)
@require_torch_gpu
@require_torch_accelerator
def test_sd3_lora(self):
"""
Test loading the loras that are saved with the diffusers and peft formats.
......@@ -135,7 +136,7 @@ class SD3LoRATests(unittest.TestCase, PeftLoraLoaderMixinTests):
@nightly
@require_torch_gpu
@require_torch_accelerator
@require_peft_backend
@require_big_gpu_with_torch_cuda
@pytest.mark.big_gpu_with_torch_cuda
......@@ -146,12 +147,12 @@ class SD3LoraIntegrationTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0):
init_image = load_image(
......
......@@ -36,6 +36,9 @@ from diffusers.utils import logging
from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
floats_tensor,
is_peft_available,
......@@ -1002,7 +1005,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
......@@ -1013,7 +1016,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
......@@ -1024,7 +1027,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy", None),
......@@ -1039,7 +1042,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
@parameterized.expand(
[
("hf-internal-testing/unet2d-sharded-dummy-subfolder", None),
......@@ -1054,7 +1057,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy")
......@@ -1064,7 +1067,7 @@ class UNet2DConditionModelTests(ModelTesterMixin, UNetTesterMixin, unittest.Test
assert loaded_model
assert new_output.sample.shape == (4, 4, 16, 16)
@require_torch_gpu
@require_torch_accelerator
def test_load_sharded_checkpoint_device_map_from_hub_local_subfolder(self):
_, inputs_dict = self.prepare_init_args_and_inputs_for_common()
ckpt_path = snapshot_download("hf-internal-testing/unet2d-sharded-dummy-subfolder")
......@@ -1164,11 +1167,11 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
return model
@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_auto(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model()
unet.set_attention_slice("auto")
......@@ -1180,15 +1183,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9
@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_max(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model()
unet.set_attention_slice("max")
......@@ -1200,15 +1203,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9
@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_int(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
unet = self.get_unet_model()
unet.set_attention_slice(2)
......@@ -1220,15 +1223,15 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9
@require_torch_gpu
@require_torch_accelerator
def test_set_attention_slice_list(self):
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
backend_empty_cache(torch_device)
backend_reset_max_memory_allocated(torch_device)
backend_reset_peak_memory_stats(torch_device)
# there are 32 sliceable layers
slice_list = 16 * [2, 3]
......@@ -1242,7 +1245,7 @@ class UNet2DConditionModelIntegrationTests(unittest.TestCase):
with torch.no_grad():
_ = unet(latents, timestep=timestep, encoder_hidden_states=encoder_hidden_states).sample
mem_bytes = torch.cuda.max_memory_allocated()
mem_bytes = backend_max_memory_allocated(torch_device)
assert mem_bytes < 5 * 10**9
......
......@@ -79,7 +79,7 @@ def _test_stable_diffusion_compile(in_queue, out_queue, timeout):
pipe = StableDiffusionControlNetPipeline.from_pretrained(
"stable-diffusion-v1-5/stable-diffusion-v1-5", safety_checker=None, controlnet=controlnet
)
pipe.to("cuda")
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None)
pipe.unet.to(memory_format=torch.channels_last)
......
......@@ -40,7 +40,7 @@ from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import (
enable_full_determinism,
floats_tensor,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)
......@@ -245,7 +245,7 @@ class ControlNetPipelineSDXLFastTests(
def test_inference_batch_single_identical(self):
self._test_inference_batch_single_identical(expected_max_diff=2e-3)
@require_torch_gpu
@require_torch_accelerator
def test_stable_diffusion_xl_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -254,12 +254,12 @@ class ControlNetPipelineSDXLFastTests(
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......
......@@ -223,12 +223,12 @@ class StableDiffusionXLControlNetPipelineFastTests(
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......
......@@ -31,6 +31,7 @@ from diffusers import (
from diffusers.models import FluxControlNetModel
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
numpy_cosine_similarity_distance,
......@@ -217,12 +218,12 @@ class FluxControlNetPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_canny(self):
controlnet = FluxControlNetModel.from_pretrained(
......
......@@ -239,7 +239,7 @@ class StableDiffusion3ControlNetPipelineSlowTests(unittest.TestCase):
pipe = StableDiffusion3ControlNetPipeline.from_pretrained(
"stabilityai/stable-diffusion-3-medium-diffusers", controlnet=controlnet, torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
......
......@@ -9,6 +9,7 @@ from transformers import AutoTokenizer, CLIPTextConfig, CLIPTextModel, CLIPToken
from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler, FluxPipeline, FluxTransformer2DModel
from diffusers.utils.testing_utils import (
backend_empty_cache,
nightly,
numpy_cosine_similarity_distance,
require_big_gpu_with_torch_cuda,
......@@ -212,12 +213,12 @@ class FluxPipelineSlowTests(unittest.TestCase):
def setUp(self):
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def get_inputs(self, device, seed=0):
generator = torch.Generator(device="cpu").manual_seed(seed)
......
......@@ -34,11 +34,12 @@ from diffusers import (
from diffusers.image_processor import IPAdapterMaskProcessor
from diffusers.utils import load_image
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
is_flaky,
load_pt,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -54,13 +55,13 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def get_image_encoder(self, repo_id, subfolder):
image_encoder = CLIPVisionModelWithProjection.from_pretrained(
......@@ -165,7 +166,7 @@ class IPAdapterNightlyTestsMixin(unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="models/image_encoder")
......@@ -280,7 +281,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
inputs = self.get_dummy_inputs()
output_without_offload = pipeline(**inputs).images
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
inputs = self.get_dummy_inputs()
output_with_offload = pipeline(**inputs).images
max_diff = np.abs(output_with_offload - output_without_offload).max()
......@@ -391,7 +392,7 @@ class IPAdapterSDIntegrationTests(IPAdapterNightlyTestsMixin):
@slow
@require_torch_gpu
@require_torch_accelerator
class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
def test_text_to_image_sdxl(self):
image_encoder = self.get_image_encoder(repo_id="h94/IP-Adapter", subfolder="sdxl_models/image_encoder")
......@@ -403,7 +404,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs()
......@@ -461,7 +462,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_image_to_image=True)
......@@ -530,7 +531,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_inpainting=True)
......@@ -578,7 +579,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
)
......@@ -606,7 +607,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
)
......@@ -633,7 +634,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
pipeline = StableDiffusionXLPipeline.from_pretrained(
"RunDiffusion/Juggernaut-XL-v9", torch_dtype=torch.float16, image_encoder=image_encoder, variant="fp16"
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter(
["ostris/ip-composition-adapter", "h94/IP-Adapter"],
......@@ -674,7 +675,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"]
)
......
......@@ -24,10 +24,11 @@ from transformers import XLMRobertaTokenizerFast
from diffusers import DDIMScheduler, KandinskyPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_numpy,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -246,7 +247,7 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -255,12 +256,12 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -275,19 +276,19 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class KandinskyPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_text2img(self):
expected_image = load_numpy(
......@@ -306,7 +307,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
prompt = "red cat, 4k photo"
generator = torch.Generator(device="cuda").manual_seed(0)
generator = torch.Generator(device=torch_device).manual_seed(0)
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
......@@ -314,7 +315,7 @@ class KandinskyPipelineIntegrationTests(unittest.TestCase):
negative_prompt="",
).to_tuple()
generator = torch.Generator(device="cuda").manual_seed(0)
generator = torch.Generator(device=torch_device).manual_seed(0)
output = pipeline(
prompt,
image_embeds=image_emb,
......
......@@ -18,7 +18,7 @@ import unittest
import numpy as np
from diffusers import KandinskyCombinedPipeline, KandinskyImg2ImgCombinedPipeline, KandinskyInpaintCombinedPipeline
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies
......@@ -105,7 +105,7 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -114,12 +114,12 @@ class KandinskyPipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCase)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -213,7 +213,7 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -222,12 +222,12 @@ class KandinskyPipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest.Te
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -325,7 +325,7 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -334,12 +334,12 @@ class KandinskyPipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest.Te
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......
......@@ -32,12 +32,13 @@ from diffusers import (
)
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_image,
load_numpy,
nightly,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -267,7 +268,7 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -299,19 +300,19 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_img2img(self):
expected_image = load_numpy(
......@@ -365,19 +366,19 @@ class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
@nightly
@require_torch_gpu
@require_torch_accelerator
class KandinskyImg2ImgPipelineNightlyTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_img2img_ddpm(self):
expected_image = load_numpy(
......
......@@ -25,12 +25,13 @@ from transformers import XLMRobertaTokenizerFast
from diffusers import DDIMScheduler, KandinskyInpaintPipeline, KandinskyPriorPipeline, UNet2DConditionModel, VQModel
from diffusers.pipelines.kandinsky.text_encoder import MCLIPConfig, MultilingualCLIP
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_image,
load_numpy,
nightly,
require_torch_gpu,
require_torch_accelerator,
torch_device,
)
......@@ -265,7 +266,7 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
def test_inference_batch_single_identical(self):
super().test_inference_batch_single_identical(expected_max_diff=3e-3)
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -274,12 +275,12 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -297,19 +298,19 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@nightly
@require_torch_gpu
@require_torch_accelerator
class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_inpaint(self):
expected_image = load_numpy(
......
......@@ -22,12 +22,14 @@ import torch
from diffusers import DDIMScheduler, KandinskyV22Pipeline, KandinskyV22PriorPipeline, UNet2DConditionModel, VQModel
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_numpy,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin
......@@ -221,19 +223,19 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_text2img(self):
expected_image = load_numpy(
......@@ -244,12 +246,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.enable_model_cpu_offload()
pipe_prior.enable_model_cpu_offload(device=torch_device)
pipeline = KandinskyV22Pipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.set_progress_bar_config(disable=None)
prompt = "red cat, 4k photo"
......
......@@ -22,7 +22,7 @@ from diffusers import (
KandinskyV22Img2ImgCombinedPipeline,
KandinskyV22InpaintCombinedPipeline,
)
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_gpu, torch_device
from diffusers.utils.testing_utils import enable_full_determinism, require_torch_accelerator, torch_device
from ..test_pipelines_common import PipelineTesterMixin
from .test_kandinsky import Dummies
......@@ -110,7 +110,7 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -119,12 +119,12 @@ class KandinskyV22PipelineCombinedFastTests(PipelineTesterMixin, unittest.TestCa
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -234,7 +234,7 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -243,12 +243,12 @@ class KandinskyV22PipelineImg2ImgCombinedFastTests(PipelineTesterMixin, unittest
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......@@ -357,7 +357,7 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
np.abs(image_from_tuple_slice.flatten() - expected_slice).max() < 1e-2
), f" expected_slice {expected_slice}, but got {image_from_tuple_slice.flatten()}"
@require_torch_gpu
@require_torch_accelerator
def test_offloads(self):
pipes = []
components = self.get_dummy_components()
......@@ -366,12 +366,12 @@ class KandinskyV22PipelineInpaintCombinedFastTests(PipelineTesterMixin, unittest
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_model_cpu_offload()
sd_pipe.enable_model_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
components = self.get_dummy_components()
sd_pipe = self.pipeline_class(**components)
sd_pipe.enable_sequential_cpu_offload()
sd_pipe.enable_sequential_cpu_offload(device=torch_device)
pipes.append(sd_pipe)
image_slices = []
......
......@@ -29,13 +29,15 @@ from diffusers import (
VQModel,
)
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_image,
load_numpy,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin
......@@ -238,19 +240,19 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow
@require_torch_gpu
@require_torch_accelerator
class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_img2img(self):
expected_image = load_numpy(
......@@ -266,12 +268,12 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.enable_model_cpu_offload()
pipe_prior.enable_model_cpu_offload(device=torch_device)
pipeline = KandinskyV22Img2ImgPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
)
pipeline.enable_model_cpu_offload()
pipeline.enable_model_cpu_offload(device=torch_device)
pipeline.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
......
......@@ -29,13 +29,14 @@ from diffusers import (
VQModel,
)
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
is_flaky,
load_image,
load_numpy,
numpy_cosine_similarity_distance,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -292,19 +293,19 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow
@require_torch_gpu
@require_torch_accelerator
class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinsky_inpaint(self):
expected_image = load_numpy(
......
......@@ -31,10 +31,12 @@ from diffusers import (
from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
load_image,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
from ..pipeline_params import (
......@@ -167,25 +169,25 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
@require_torch_accelerator
class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinskyV3(self):
pipe = AutoPipelineForText2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
......@@ -211,7 +213,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
pipe = AutoPipelineForImage2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
......
......@@ -31,10 +31,11 @@ from diffusers import (
from diffusers.image_processor import VaeImageProcessor
from diffusers.schedulers.scheduling_ddpm import DDPMScheduler
from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
floats_tensor,
load_image,
require_torch_gpu,
require_torch_accelerator,
slow,
torch_device,
)
......@@ -192,25 +193,25 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
@slow
@require_torch_gpu
@require_torch_accelerator
class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
backend_empty_cache(torch_device)
def test_kandinskyV3_img2img(self):
pipe = AutoPipelineForImage2Image.from_pretrained(
"kandinsky-community/kandinsky-3", variant="fp16", torch_dtype=torch.float16
)
pipe.enable_model_cpu_offload()
pipe.enable_model_cpu_offload(device=torch_device)
pipe.set_progress_bar_config(disable=None)
generator = torch.Generator(device="cpu").manual_seed(0)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment