Unverified Commit 9a349538 authored by Dhruv Nair's avatar Dhruv Nair Committed by GitHub
Browse files

Additional Memory clean up for slow tests (#7436)

* update

* update

* update
parent e29f16cf
......@@ -85,6 +85,12 @@ class IFPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, unittest.T
@slow
@require_torch_gpu
class IFPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -94,6 +94,12 @@ class IFImg2ImgPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin, uni
@slow
@require_torch_gpu
class IFImg2ImgPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -109,6 +115,10 @@ class IFImg2ImgPipelineSlowTests(unittest.TestCase):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
image = floats_tensor((1, 3, 64, 64), rng=random.Random(0)).to(torch_device)
generator = torch.Generator(device="cpu").manual_seed(0)
output = pipe(
......
......@@ -92,6 +92,12 @@ class IFImg2ImgSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineT
@slow
@require_torch_gpu
class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -107,6 +113,10 @@ class IFImg2ImgSuperResolutionPipelineSlowTests(unittest.TestCase):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
torch.cuda.reset_max_memory_allocated()
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats()
generator = torch.Generator(device="cpu").manual_seed(0)
original_image = floats_tensor((1, 3, 256, 256), rng=random.Random(0)).to(torch_device)
......
......@@ -92,6 +92,12 @@ class IFInpaintingPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMixin,
@slow
@require_torch_gpu
class IFInpaintingPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -105,7 +111,6 @@ class IFInpaintingPipelineSlowTests(unittest.TestCase):
pipe.unet.set_attn_processor(AttnAddedKVProcessor())
pipe.enable_model_cpu_offload()
# Super resolution test
torch.cuda.empty_cache()
torch.cuda.reset_max_memory_allocated()
torch.cuda.reset_peak_memory_stats()
......
......@@ -94,6 +94,12 @@ class IFInpaintingSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipeli
@slow
@require_torch_gpu
class IFInpaintingSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -87,6 +87,12 @@ class IFSuperResolutionPipelineFastTests(PipelineTesterMixin, IFPipelineTesterMi
@slow
@require_torch_gpu
class IFSuperResolutionPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -50,7 +50,14 @@ enable_full_determinism()
class IPAdapterNightlyTestsMixin(unittest.TestCase):
dtype = torch.float16
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
......@@ -313,7 +320,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs()
......@@ -373,7 +380,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_image_to_image=True)
......@@ -442,7 +449,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
feature_extractor=feature_extractor,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter("h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin")
inputs = self.get_dummy_inputs(for_inpainting=True)
......@@ -490,7 +497,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter-plus-face_sdxl_vit-h.safetensors"
)
......@@ -518,7 +525,7 @@ class IPAdapterSDXLIntegrationTests(IPAdapterNightlyTestsMixin):
image_encoder=image_encoder,
torch_dtype=self.dtype,
)
pipeline.to(torch_device)
pipeline.enable_model_cpu_offload()
pipeline.load_ip_adapter(
"h94/IP-Adapter", subfolder="sdxl_models", weight_name=["ip-adapter-plus-face_sdxl_vit-h.safetensors"] * 2
)
......
......@@ -275,6 +275,12 @@ class KandinskyPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
class KandinskyPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -299,6 +299,12 @@ class KandinskyImg2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
class KandinskyImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -297,6 +297,12 @@ class KandinskyInpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@nightly
@require_torch_gpu
class KandinskyInpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......
......@@ -27,7 +27,6 @@ from diffusers.utils.testing_utils import (
load_numpy,
require_torch_gpu,
slow,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
......@@ -223,6 +222,12 @@ class KandinskyV22PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -238,12 +243,12 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()
pipeline = KandinskyV22Pipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)
prompt = "red cat, 4k photo"
......@@ -252,7 +257,7 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
num_inference_steps=5,
num_inference_steps=3,
negative_prompt="",
).to_tuple()
......@@ -261,7 +266,7 @@ class KandinskyV22PipelineIntegrationTests(unittest.TestCase):
image_embeds=image_emb,
negative_image_embeds=zero_image_emb,
generator=generator,
num_inference_steps=100,
num_inference_steps=3,
output_type="np",
)
......
......@@ -34,7 +34,6 @@ from diffusers.utils.testing_utils import (
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
......@@ -228,6 +227,12 @@ class KandinskyV22ControlnetPipelineFastTests(PipelineTesterMixin, unittest.Test
@nightly
@require_torch_gpu
class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -250,12 +255,12 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()
pipeline = KandinskyV22ControlnetPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)
prompt = "A robot, 4k photo"
......@@ -264,7 +269,7 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
num_inference_steps=5,
num_inference_steps=2,
negative_prompt="",
).to_tuple()
......@@ -274,7 +279,7 @@ class KandinskyV22ControlnetPipelineIntegrationTests(unittest.TestCase):
negative_image_embeds=zero_image_emb,
hint=hint,
generator=generator,
num_inference_steps=100,
num_inference_steps=2,
output_type="np",
)
......
......@@ -35,7 +35,6 @@ from diffusers.utils.testing_utils import (
load_numpy,
nightly,
require_torch_gpu,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
......@@ -235,6 +234,12 @@ class KandinskyV22ControlnetImg2ImgPipelineFastTests(PipelineTesterMixin, unitte
@nightly
@require_torch_gpu
class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -264,12 +269,12 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorEmb2EmbPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()
pipeline = KandinskyV22ControlnetImg2ImgPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-controlnet-depth", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)
......@@ -281,6 +286,7 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
strength=0.85,
generator=generator,
negative_prompt="",
num_inference_steps=5,
).to_tuple()
output = pipeline(
......@@ -289,7 +295,7 @@ class KandinskyV22ControlnetImg2ImgPipelineIntegrationTests(unittest.TestCase):
negative_image_embeds=zero_image_emb,
hint=hint,
generator=generator,
num_inference_steps=100,
num_inference_steps=5,
height=512,
width=512,
strength=0.5,
......
......@@ -35,7 +35,6 @@ from diffusers.utils.testing_utils import (
load_numpy,
require_torch_gpu,
slow,
torch_device,
)
from ..test_pipelines_common import PipelineTesterMixin, assert_mean_pixel_difference
......@@ -240,6 +239,12 @@ class KandinskyV22Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow
@require_torch_gpu
class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -260,12 +265,12 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
pipe_prior = KandinskyV22PriorPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-prior", torch_dtype=torch.float16
)
pipe_prior.to(torch_device)
pipe_prior.enable_model_cpu_offload()
pipeline = KandinskyV22Img2ImgPipeline.from_pretrained(
"kandinsky-community/kandinsky-2-2-decoder", torch_dtype=torch.float16
)
pipeline = pipeline.to(torch_device)
pipeline = pipeline.enable_model_cpu_offload()
pipeline.set_progress_bar_config(disable=None)
......@@ -282,7 +287,7 @@ class KandinskyV22Img2ImgPipelineIntegrationTests(unittest.TestCase):
image_embeds=image_emb,
negative_image_embeds=zero_image_emb,
generator=generator,
num_inference_steps=100,
num_inference_steps=5,
height=768,
width=768,
strength=0.2,
......
......@@ -293,6 +293,12 @@ class KandinskyV22InpaintPipelineFastTests(PipelineTesterMixin, unittest.TestCas
@slow
@require_torch_gpu
class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -328,7 +334,7 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
image_emb, zero_image_emb = pipe_prior(
prompt,
generator=generator,
num_inference_steps=5,
num_inference_steps=2,
negative_prompt="",
).to_tuple()
......@@ -338,7 +344,7 @@ class KandinskyV22InpaintPipelineIntegrationTests(unittest.TestCase):
image_embeds=image_emb,
negative_image_embeds=zero_image_emb,
generator=generator,
num_inference_steps=100,
num_inference_steps=2,
height=768,
width=768,
output_type="np",
......
......@@ -169,6 +169,12 @@ class Kandinsky3PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
@slow
@require_torch_gpu
class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -186,7 +192,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0)
image = pipe(prompt, num_inference_steps=25, generator=generator).images[0]
image = pipe(prompt, num_inference_steps=5, generator=generator).images[0]
assert image.size == (1024, 1024)
......@@ -217,7 +223,7 @@ class Kandinsky3PipelineIntegrationTests(unittest.TestCase):
image = image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
prompt = "A painting of the inside of a subway train with tiny raccoons."
image = pipe(prompt, image=image, strength=0.75, num_inference_steps=25, generator=generator).images[0]
image = pipe(prompt, image=image, strength=0.75, num_inference_steps=5, generator=generator).images[0]
assert image.size == (512, 512)
......
......@@ -187,6 +187,12 @@ class Kandinsky3Img2ImgPipelineFastTests(PipelineTesterMixin, unittest.TestCase)
@slow
@require_torch_gpu
class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
......@@ -209,7 +215,7 @@ class Kandinsky3Img2ImgPipelineIntegrationTests(unittest.TestCase):
image = image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
prompt = "A painting of the inside of a subway train with tiny raccoons."
image = pipe(prompt, image=image, strength=0.75, num_inference_steps=25, generator=generator).images[0]
image = pipe(prompt, image=image, strength=0.75, num_inference_steps=5, generator=generator).images[0]
assert image.size == (512, 512)
......
......@@ -779,7 +779,14 @@ class StableDiffusionXLImg2ImgRefinerOnlyPipelineFastTests(
@slow
class StableDiffusionXLImg2ImgIntegrationTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
......
......@@ -639,6 +639,12 @@ class PipelineTesterMixin:
"`callback_cfg_params = TEXT_TO_IMAGE_CFG_PARAMS.union({'mask', 'masked_image_latents'})`"
)
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test in case of CUDA runtime errors
super().tearDown()
......
......@@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import gc
import unittest
import numpy as np
......@@ -183,6 +184,18 @@ class TextToVideoSDPipelineFastTests(PipelineTesterMixin, SDFunctionTesterMixin,
@skip_mps
@require_torch_gpu
class TextToVideoSDPipelineSlowTests(unittest.TestCase):
def setUp(self):
# clean up the VRAM before each test
super().setUp()
gc.collect()
torch.cuda.empty_cache()
def tearDown(self):
# clean up the VRAM after each test
super().tearDown()
gc.collect()
torch.cuda.empty_cache()
def test_two_step_model(self):
expected_video = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/video_2step.npy"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment