Unverified Commit 9ecd9248 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

[Tests] Correct PT2 (#2724)

* [Tests] Correct PT2

* correct more

* move versatile to nightly

* up

* up

* again

* Apply suggestions from code review
parent 116f70cb
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
import gc import gc
import math import math
import tracemalloc
import unittest import unittest
import torch import torch
...@@ -155,33 +154,6 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase): ...@@ -155,33 +154,6 @@ class UNetLDMModelTests(ModelTesterMixin, unittest.TestCase):
assert torch_all_close(arr_accelerate, arr_normal_load, rtol=1e-3) assert torch_all_close(arr_accelerate, arr_normal_load, rtol=1e-3)
@unittest.skipIf(torch_device != "cuda", "This test is supposed to run on GPU")
def test_memory_footprint_gets_reduced(self):
torch.cuda.empty_cache()
gc.collect()
tracemalloc.start()
# by defautl model loading will use accelerate as `low_cpu_mem_usage=True`
model_accelerate, _ = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update", output_loading_info=True)
model_accelerate.to(torch_device)
model_accelerate.eval()
_, peak_accelerate = tracemalloc.get_traced_memory()
del model_accelerate
torch.cuda.empty_cache()
gc.collect()
model_normal_load, _ = UNet2DModel.from_pretrained(
"fusing/unet-ldm-dummy-update", output_loading_info=True, low_cpu_mem_usage=False
)
model_normal_load.to(torch_device)
model_normal_load.eval()
_, peak_normal = tracemalloc.get_traced_memory()
tracemalloc.stop()
assert peak_accelerate < peak_normal
def test_output_pretrained(self): def test_output_pretrained(self):
model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update") model = UNet2DModel.from_pretrained("fusing/unet-ldm-dummy-update")
model.eval() model.eval()
......
...@@ -125,8 +125,8 @@ class DiTPipelineIntegrationTests(unittest.TestCase): ...@@ -125,8 +125,8 @@ class DiTPipelineIntegrationTests(unittest.TestCase):
) )
assert np.abs((expected_image - image).max()) < 1e-3 assert np.abs((expected_image - image).max()) < 1e-3
def test_dit_512_fp16(self): def test_dit_512(self):
pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512", torch_dtype=torch.float16) pipe = DiTPipeline.from_pretrained("facebook/DiT-XL-2-512")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.to("cuda") pipe.to("cuda")
...@@ -139,7 +139,7 @@ class DiTPipelineIntegrationTests(unittest.TestCase): ...@@ -139,7 +139,7 @@ class DiTPipelineIntegrationTests(unittest.TestCase):
for word, image in zip(words, images): for word, image in zip(words, images):
expected_image = load_numpy( expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
f"/dit/{word}_fp16.npy" f"/dit/{word}_512.npy"
) )
assert np.abs((expected_image - image).max()) < 7.5e-1 assert np.abs((expected_image - image).max()) < 1e-1
...@@ -118,7 +118,6 @@ class LDMSuperResolutionPipelineIntegrationTests(unittest.TestCase): ...@@ -118,7 +118,6 @@ class LDMSuperResolutionPipelineIntegrationTests(unittest.TestCase):
init_image = init_image.resize((64, 64), resample=PIL_INTERPOLATION["lanczos"]) init_image = init_image.resize((64, 64), resample=PIL_INTERPOLATION["lanczos"])
ldm = LDMSuperResolutionPipeline.from_pretrained("duongna/ldm-super-resolution", device_map="auto") ldm = LDMSuperResolutionPipeline.from_pretrained("duongna/ldm-super-resolution", device_map="auto")
ldm.to(torch_device)
ldm.set_progress_bar_config(disable=None) ldm.set_progress_bar_config(disable=None)
generator = torch.manual_seed(0) generator = torch.manual_seed(0)
......
...@@ -35,6 +35,7 @@ from diffusers import ( ...@@ -35,6 +35,7 @@ from diffusers import (
UNet2DConditionModel, UNet2DConditionModel,
logging, logging,
) )
from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import load_numpy, nightly, slow, torch_device from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu
...@@ -698,7 +699,6 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -698,7 +699,6 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
torch.cuda.reset_peak_memory_stats() torch.cuda.reset_peak_memory_stats()
model_id = "CompVis/stable-diffusion-v1-4" model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16) pipe = StableDiffusionPipeline.from_pretrained(model_id, revision="fp16", torch_dtype=torch.float16)
pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
pipe.enable_attention_slicing() pipe.enable_attention_slicing()
pipe.unet = pipe.unet.to(memory_format=torch.channels_last) pipe.unet = pipe.unet.to(memory_format=torch.channels_last)
...@@ -708,42 +708,36 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -708,42 +708,36 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
# enable vae tiling # enable vae tiling
pipe.enable_vae_tiling() pipe.enable_vae_tiling()
generator = torch.Generator(device=torch_device).manual_seed(0) pipe.enable_model_cpu_offload()
with torch.autocast(torch_device): generator = torch.Generator(device="cpu").manual_seed(0)
output_chunked = pipe( output_chunked = pipe(
[prompt], [prompt],
width=640, width=1024,
height=640, height=1024,
generator=generator, generator=generator,
guidance_scale=7.5, guidance_scale=7.5,
num_inference_steps=2, num_inference_steps=2,
output_type="numpy", output_type="numpy",
) )
image_chunked = output_chunked.images image_chunked = output_chunked.images
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
# make sure that less than 4 GB is allocated
assert mem_bytes < 4e9
# disable vae tiling # disable vae tiling
pipe.disable_vae_tiling() pipe.disable_vae_tiling()
generator = torch.Generator(device=torch_device).manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
with torch.autocast(torch_device): output = pipe(
output = pipe( [prompt],
[prompt], width=1024,
width=640, height=1024,
height=640, generator=generator,
generator=generator, guidance_scale=7.5,
guidance_scale=7.5, num_inference_steps=2,
num_inference_steps=2, output_type="numpy",
output_type="numpy", )
) image = output.images
image = output.images
# make sure that more than 4 GB is allocated assert mem_bytes < 1e10
mem_bytes = torch.cuda.max_memory_allocated()
assert mem_bytes > 5e9
assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2 assert np.abs(image_chunked.flatten() - image.flatten()).max() < 1e-2
def test_stable_diffusion_fp16_vs_autocast(self): def test_stable_diffusion_fp16_vs_autocast(self):
...@@ -849,6 +843,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -849,6 +843,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
"CompVis/stable-diffusion-v1-4", "CompVis/stable-diffusion-v1-4",
torch_dtype=torch.float16, torch_dtype=torch.float16,
) )
pipe.unet.set_attn_processor(AttnProcessor())
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
outputs = pipe(**inputs) outputs = pipe(**inputs)
...@@ -861,6 +856,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -861,6 +856,7 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
"CompVis/stable-diffusion-v1-4", "CompVis/stable-diffusion-v1-4",
torch_dtype=torch.float16, torch_dtype=torch.float16,
) )
pipe.unet.set_attn_processor(AttnProcessor())
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated() torch.cuda.reset_max_memory_allocated()
...@@ -868,6 +864,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase): ...@@ -868,6 +864,8 @@ class StableDiffusionPipelineSlowTests(unittest.TestCase):
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device, dtype=torch.float16)
outputs_offloaded = pipe(**inputs) outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated() mem_bytes_offloaded = torch.cuda.max_memory_allocated()
......
...@@ -214,7 +214,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase): ...@@ -214,7 +214,7 @@ class StableDiffusionInpaintPipelineSlowTests(unittest.TestCase):
image_slice = image[0, 253:256, 253:256, -1].flatten() image_slice = image[0, 253:256, 253:256, -1].flatten()
assert image.shape == (1, 512, 512, 3) assert image.shape == (1, 512, 512, 3)
expected_slice = np.array([0.1443, 0.1218, 0.1587, 0.1594, 0.1411, 0.1284, 0.1370, 0.1506, 0.2339]) expected_slice = np.array([0.1350, 0.1123, 0.1350, 0.1641, 0.1328, 0.1230, 0.1289, 0.1531, 0.1687])
assert np.abs(expected_slice - image_slice).max() < 5e-2 assert np.abs(expected_slice - image_slice).max() < 5e-2
......
...@@ -339,4 +339,4 @@ class StableDiffusionPanoramaSlowTests(unittest.TestCase): ...@@ -339,4 +339,4 @@ class StableDiffusionPanoramaSlowTests(unittest.TestCase):
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
# make sure that less than 5.2 GB is allocated # make sure that less than 5.2 GB is allocated
assert mem_bytes < 5.2 * 10**9 assert mem_bytes < 5.5 * 10**9
...@@ -361,7 +361,7 @@ class InversionPipelineSlowTests(unittest.TestCase): ...@@ -361,7 +361,7 @@ class InversionPipelineSlowTests(unittest.TestCase):
image_slice = inv_latents[0, -3:, -3:, -1].flatten() image_slice = inv_latents[0, -3:, -3:, -1].flatten()
assert inv_latents.shape == (1, 4, 64, 64) assert inv_latents.shape == (1, 4, 64, 64)
expected_slice = np.array([0.8877, 0.0587, 0.7700, -1.6035, -0.5962, 0.4827, -0.6265, 1.0498, -0.8599]) expected_slice = np.array([0.8447, -0.0730, 0.7588, -1.2070, -0.4678, 0.1511, -0.8555, 1.1816, -0.7666])
assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2 assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2
...@@ -383,7 +383,7 @@ class InversionPipelineSlowTests(unittest.TestCase): ...@@ -383,7 +383,7 @@ class InversionPipelineSlowTests(unittest.TestCase):
image_slice = inv_latents[0, -3:, -3:, -1].flatten() image_slice = inv_latents[0, -3:, -3:, -1].flatten()
assert inv_latents.shape == (1, 4, 64, 64) assert inv_latents.shape == (1, 4, 64, 64)
expected_slice = np.array([0.7515, -0.2397, 0.4922, -0.9736, -0.7031, 0.4846, -1.0781, 1.1309, -0.6973]) expected_slice = np.array([0.8970, -0.1611, 0.4766, -1.1162, -0.5923, 0.1050, -0.9678, 1.0537, -0.6050])
assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2 assert np.abs(expected_slice - image_slice.cpu().numpy()).max() < 5e-2
......
...@@ -32,6 +32,7 @@ from diffusers import ( ...@@ -32,6 +32,7 @@ from diffusers import (
UNet2DConditionModel, UNet2DConditionModel,
logging, logging,
) )
from diffusers.models.attention_processor import AttnProcessor
from diffusers.utils import load_numpy, nightly, slow, torch_device from diffusers.utils import load_numpy, nightly, slow, torch_device
from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu from diffusers.utils.testing_utils import CaptureLogger, require_torch_gpu
...@@ -409,6 +410,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase): ...@@ -409,6 +410,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
"stabilityai/stable-diffusion-2-base", "stabilityai/stable-diffusion-2-base",
torch_dtype=torch.float16, torch_dtype=torch.float16,
) )
pipe.unet.set_attn_processor(AttnProcessor())
pipe.to(torch_device) pipe.to(torch_device)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
outputs = pipe(**inputs) outputs = pipe(**inputs)
...@@ -421,6 +423,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase): ...@@ -421,6 +423,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
"stabilityai/stable-diffusion-2-base", "stabilityai/stable-diffusion-2-base",
torch_dtype=torch.float16, torch_dtype=torch.float16,
) )
pipe.unet.set_attn_processor(AttnProcessor())
torch.cuda.empty_cache() torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated() torch.cuda.reset_max_memory_allocated()
...@@ -428,6 +431,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase): ...@@ -428,6 +431,7 @@ class StableDiffusion2PipelineSlowTests(unittest.TestCase):
pipe.enable_model_cpu_offload() pipe.enable_model_cpu_offload()
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
inputs = self.get_inputs(torch_device, dtype=torch.float16)
outputs_offloaded = pipe(**inputs) outputs_offloaded = pipe(**inputs)
mem_bytes_offloaded = torch.cuda.max_memory_allocated() mem_bytes_offloaded = torch.cuda.max_memory_allocated()
......
...@@ -358,5 +358,5 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase): ...@@ -358,5 +358,5 @@ class StableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
) )
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = torch.cuda.max_memory_allocated()
# make sure that less than 2.65 GB is allocated # make sure that less than 2.9 GB is allocated
assert mem_bytes < 2.65 * 10**9 assert mem_bytes < 2.9 * 10**9
...@@ -21,17 +21,13 @@ import numpy as np ...@@ -21,17 +21,13 @@ import numpy as np
import torch import torch
from diffusers import VersatileDiffusionDualGuidedPipeline from diffusers import VersatileDiffusionDualGuidedPipeline
from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device from diffusers.utils.testing_utils import load_image, nightly, require_torch_gpu, torch_device
torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cuda.matmul.allow_tf32 = False
class VersatileDiffusionDualGuidedPipelineFastTests(unittest.TestCase): @nightly
pass
@slow
@require_torch_gpu @require_torch_gpu
class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase): class VersatileDiffusionDualGuidedPipelineIntegrationTests(unittest.TestCase):
def tearDown(self): def tearDown(self):
......
...@@ -21,7 +21,7 @@ import numpy as np ...@@ -21,7 +21,7 @@ import numpy as np
import torch import torch
from diffusers import VersatileDiffusionPipeline from diffusers import VersatileDiffusionPipeline
from diffusers.utils.testing_utils import load_image, require_torch_gpu, slow, torch_device from diffusers.utils.testing_utils import load_image, nightly, require_torch_gpu, torch_device
torch.backends.cuda.matmul.allow_tf32 = False torch.backends.cuda.matmul.allow_tf32 = False
...@@ -31,7 +31,7 @@ class VersatileDiffusionMegaPipelineFastTests(unittest.TestCase): ...@@ -31,7 +31,7 @@ class VersatileDiffusionMegaPipelineFastTests(unittest.TestCase):
pass pass
@slow @nightly
@require_torch_gpu @require_torch_gpu
class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase): class VersatileDiffusionMegaPipelineIntegrationTests(unittest.TestCase):
def tearDown(self): def tearDown(self):
......
...@@ -153,4 +153,4 @@ class EMAModelTests(unittest.TestCase): ...@@ -153,4 +153,4 @@ class EMAModelTests(unittest.TestCase):
output = unet(noisy_latents, timesteps, encoder_hidden_states).sample output = unet(noisy_latents, timesteps, encoder_hidden_states).sample
output_loaded = loaded_unet(noisy_latents, timesteps, encoder_hidden_states).sample output_loaded = loaded_unet(noisy_latents, timesteps, encoder_hidden_states).sample
assert torch.allclose(output, output_loaded) assert torch.allclose(output, output_loaded, atol=1e-4)
...@@ -25,6 +25,7 @@ import torch ...@@ -25,6 +25,7 @@ import torch
from requests.exceptions import HTTPError from requests.exceptions import HTTPError
from diffusers.models import ModelMixin, UNet2DConditionModel from diffusers.models import ModelMixin, UNet2DConditionModel
from diffusers.models.attention_processor import AttnProcessor
from diffusers.training_utils import EMAModel from diffusers.training_utils import EMAModel
from diffusers.utils import torch_device from diffusers.utils import torch_device
...@@ -105,12 +106,16 @@ class ModelTesterMixin: ...@@ -105,12 +106,16 @@ class ModelTesterMixin:
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict) model = self.model_class(**init_dict)
if hasattr(model, "set_attn_processor"):
model.set_attn_processor(AttnProcessor())
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname) model.save_pretrained(tmpdirname)
new_model = self.model_class.from_pretrained(tmpdirname) new_model = self.model_class.from_pretrained(tmpdirname)
if hasattr(new_model, "set_attn_processor"):
new_model.set_attn_processor(AttnProcessor())
new_model.to(torch_device) new_model.to(torch_device)
with torch.no_grad(): with torch.no_grad():
...@@ -135,12 +140,16 @@ class ModelTesterMixin: ...@@ -135,12 +140,16 @@ class ModelTesterMixin:
init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common() init_dict, inputs_dict = self.prepare_init_args_and_inputs_for_common()
model = self.model_class(**init_dict) model = self.model_class(**init_dict)
if hasattr(model, "set_attn_processor"):
model.set_attn_processor(AttnProcessor())
model.to(torch_device) model.to(torch_device)
model.eval() model.eval()
with tempfile.TemporaryDirectory() as tmpdirname: with tempfile.TemporaryDirectory() as tmpdirname:
model.save_pretrained(tmpdirname, variant="fp16") model.save_pretrained(tmpdirname, variant="fp16")
new_model = self.model_class.from_pretrained(tmpdirname, variant="fp16") new_model = self.model_class.from_pretrained(tmpdirname, variant="fp16")
if hasattr(new_model, "set_attn_processor"):
new_model.set_attn_processor(AttnProcessor())
# non-variant cannot be loaded # non-variant cannot be loaded
with self.assertRaises(OSError) as error_context: with self.assertRaises(OSError) as error_context:
......
...@@ -1123,7 +1123,7 @@ class PipelineSlowTests(unittest.TestCase): ...@@ -1123,7 +1123,7 @@ class PipelineSlowTests(unittest.TestCase):
f"/compel/forest_{i}.npy" f"/compel/forest_{i}.npy"
) )
assert np.abs(image - expected_image).max() < 1e-3 assert np.abs(image - expected_image).max() < 1e-2
@nightly @nightly
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment