"tests/python/pytorch/sparse/utils.py" did not exist on "0698e91a0e4b40bd4a5a4e59205d098e1bb3d3c9"
Unverified Commit 049082e0 authored by Yao Matrix's avatar Yao Matrix Committed by GitHub
Browse files

enable pipeline test cases on xpu (#11527)



* enable several pipeline integration tests on xpu
Signed-off-by: default avatarYao Matrix <matrix.yao@intel.com>

* fix style
Signed-off-by: default avatarYao Matrix <matrix.yao@intel.com>

* update per comments
Signed-off-by: default avatarMatrix Yao <matrix.yao@intel.com>

---------
Signed-off-by: default avatarYao Matrix <matrix.yao@intel.com>
Signed-off-by: default avatarMatrix Yao <matrix.yao@intel.com>
parent f161e277
...@@ -635,10 +635,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) - ...@@ -635,10 +635,10 @@ def load_numpy(arry: Union[str, np.ndarray], local_path: Optional[str] = None) -
return arry return arry
def load_pt(url: str, map_location: str): def load_pt(url: str, map_location: Optional[str] = None, weights_only: Optional[bool] = True):
response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT) response = requests.get(url, timeout=DIFFUSERS_REQUEST_TIMEOUT)
response.raise_for_status() response.raise_for_status()
arry = torch.load(BytesIO(response.content), map_location=map_location) arry = torch.load(BytesIO(response.content), map_location=map_location, weights_only=weights_only)
return arry return arry
......
...@@ -304,7 +304,8 @@ class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase): ...@@ -304,7 +304,8 @@ class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase):
generator = torch.Generator(device="cpu").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
image_embedding = load_pt( image_embedding = load_pt(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt",
map_location=torch_device,
) )
image = pipe( image = pipe(
...@@ -320,4 +321,4 @@ class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase): ...@@ -320,4 +321,4 @@ class StableCascadeDecoderPipelineIntegrationTests(unittest.TestCase):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy"
) )
max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten()) max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
assert max_diff < 1e-4 assert max_diff < 2e-4
...@@ -20,26 +20,32 @@ import numpy as np ...@@ -20,26 +20,32 @@ import numpy as np
import torch import torch
from diffusers import StableDiffusionKDiffusionPipeline from diffusers import StableDiffusionKDiffusionPipeline
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)
enable_full_determinism() enable_full_determinism()
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase): class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_stable_diffusion_1(self): def test_stable_diffusion_1(self):
sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") sd_pipe = StableDiffusionKDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
......
...@@ -28,7 +28,13 @@ from diffusers import ( ...@@ -28,7 +28,13 @@ from diffusers import (
StableDiffusionLDM3DPipeline, StableDiffusionLDM3DPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
...@@ -205,17 +211,17 @@ class StableDiffusionLDM3DPipelineFastTests(unittest.TestCase): ...@@ -205,17 +211,17 @@ class StableDiffusionLDM3DPipelineFastTests(unittest.TestCase):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase): class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed) generator = torch.Generator(device=generator_device).manual_seed(seed)
...@@ -256,17 +262,17 @@ class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase): ...@@ -256,17 +262,17 @@ class StableDiffusionLDM3DPipelineSlowTests(unittest.TestCase):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableDiffusionPipelineNightlyTests(unittest.TestCase): class StableDiffusionPipelineNightlyTests(unittest.TestCase):
def setUp(self): def setUp(self):
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0): def get_inputs(self, device, generator_device="cpu", dtype=torch.float32, seed=0):
generator = torch.Generator(device=generator_device).manual_seed(seed) generator = torch.Generator(device=generator_device).manual_seed(seed)
......
...@@ -29,7 +29,13 @@ from diffusers import ( ...@@ -29,7 +29,13 @@ from diffusers import (
StableDiffusionSAGPipeline, StableDiffusionSAGPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils.testing_utils import enable_full_determinism, nightly, require_torch_gpu, torch_device from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism,
nightly,
require_torch_accelerator,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import ( from ..test_pipelines_common import (
...@@ -162,19 +168,19 @@ class StableDiffusionSAGPipelineFastTests( ...@@ -162,19 +168,19 @@ class StableDiffusionSAGPipelineFastTests(
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableDiffusionPipelineIntegrationTests(unittest.TestCase): class StableDiffusionPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_stable_diffusion_1(self): def test_stable_diffusion_1(self):
sag_pipe = StableDiffusionSAGPipeline.from_pretrained("CompVis/stable-diffusion-v1-4") sag_pipe = StableDiffusionSAGPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
......
...@@ -13,7 +13,17 @@ from diffusers import ( ...@@ -13,7 +13,17 @@ from diffusers import (
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.testing_utils import enable_full_determinism, load_numpy, nightly, require_torch_gpu, torch_device from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism,
load_numpy,
nightly,
require_torch_accelerator,
torch_device,
)
from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS from ..pipeline_params import TEXT_TO_IMAGE_BATCH_PARAMS, TEXT_TO_IMAGE_IMAGE_PARAMS, TEXT_TO_IMAGE_PARAMS
from ..test_pipelines_common import ( from ..test_pipelines_common import (
...@@ -190,19 +200,19 @@ class StableUnCLIPPipelineFastTests( ...@@ -190,19 +200,19 @@ class StableUnCLIPPipelineFastTests(
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableUnCLIPPipelineIntegrationTests(unittest.TestCase): class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_stable_unclip(self): def test_stable_unclip(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -226,9 +236,9 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase): ...@@ -226,9 +236,9 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
assert_mean_pixel_difference(image, expected_image) assert_mean_pixel_difference(image, expected_image)
def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self): def test_stable_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16) pipe = StableUnCLIPPipeline.from_pretrained("fusing/stable-unclip-2-1-l", torch_dtype=torch.float16)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -242,6 +252,6 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase): ...@@ -242,6 +252,6 @@ class StableUnCLIPPipelineIntegrationTests(unittest.TestCase):
output_type="np", output_type="np",
) )
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated # make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9 assert mem_bytes < 7 * 10**9
...@@ -18,12 +18,16 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline ...@@ -18,12 +18,16 @@ from diffusers.pipelines.pipeline_utils import DiffusionPipeline
from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer from diffusers.pipelines.stable_diffusion.stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
from diffusers.utils.import_utils import is_xformers_available from diffusers.utils.import_utils import is_xformers_available
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_accelerator,
skip_mps, skip_mps,
torch_device, torch_device,
) )
...@@ -213,19 +217,19 @@ class StableUnCLIPImg2ImgPipelineFastTests( ...@@ -213,19 +217,19 @@ class StableUnCLIPImg2ImgPipelineFastTests(
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase): class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_stable_unclip_l_img2img(self): def test_stable_unclip_l_img2img(self):
input_image = load_image( input_image = load_image(
...@@ -286,9 +290,9 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase): ...@@ -286,9 +290,9 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_unclip/turtle.png"
) )
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained( pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
"fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16 "fusing/stable-unclip-2-1-h-img2img", torch_dtype=torch.float16
...@@ -304,6 +308,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase): ...@@ -304,6 +308,6 @@ class StableUnCLIPImg2ImgPipelineIntegrationTests(unittest.TestCase):
output_type="np", output_type="np",
) )
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated # make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9 assert mem_bytes < 7 * 10**9
...@@ -19,37 +19,44 @@ import unittest ...@@ -19,37 +19,44 @@ import unittest
import torch import torch
from diffusers import DDIMScheduler, TextToVideoZeroPipeline from diffusers import DDIMScheduler, TextToVideoZeroPipeline
from diffusers.utils.testing_utils import load_pt, nightly, require_torch_gpu from diffusers.utils.testing_utils import (
backend_empty_cache,
load_pt,
nightly,
require_torch_accelerator,
torch_device,
)
from ..test_pipelines_common import assert_mean_pixel_difference from ..test_pipelines_common import assert_mean_pixel_difference
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class TextToVideoZeroPipelineSlowTests(unittest.TestCase): class TextToVideoZeroPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_full_model(self): def test_full_model(self):
model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5" model_id = "stable-diffusion-v1-5/stable-diffusion-v1-5"
pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to("cuda") pipe = TextToVideoZeroPipeline.from_pretrained(model_id, torch_dtype=torch.float16).to(torch_device)
pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config) pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
generator = torch.Generator(device="cuda").manual_seed(0) generator = torch.Generator(device="cpu").manual_seed(0)
prompt = "A bear is playing a guitar on Times Square" prompt = "A bear is playing a guitar on Times Square"
result = pipe(prompt=prompt, generator=generator).images result = pipe(prompt=prompt, generator=generator).images
expected_result = load_pt( expected_result = load_pt(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt" "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/text-to-video/A bear is playing a guitar on Times Square.pt",
weights_only=False,
) )
assert_mean_pixel_difference(result, expected_result) assert_mean_pixel_difference(result, expected_result)
...@@ -24,11 +24,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject ...@@ -24,11 +24,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel from diffusers import AutoencoderKL, DDIMScheduler, TextToVideoZeroSDXLPipeline, UNet2DConditionModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
nightly, nightly,
require_accelerate_version_greater, require_accelerate_version_greater,
require_accelerator, require_torch_accelerator,
require_torch_gpu,
torch_device, torch_device,
) )
...@@ -220,7 +220,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -220,7 +220,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
self.assertLess(max_diff, expected_max_difference) self.assertLess(max_diff, expected_max_difference)
@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator @require_torch_accelerator
def test_float16_inference(self, expected_max_diff=5e-2): def test_float16_inference(self, expected_max_diff=5e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -262,7 +262,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -262,7 +262,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
def test_inference_batch_single_identical(self): def test_inference_batch_single_identical(self):
pass pass
@require_accelerator @require_torch_accelerator
@require_accelerate_version_greater("0.17.0") @require_accelerate_version_greater("0.17.0")
def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4): def test_model_cpu_offload_forward_pass(self, expected_max_diff=2e-4):
components = self.get_dummy_components() components = self.get_dummy_components()
...@@ -285,7 +285,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -285,7 +285,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
pass pass
@unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU") @unittest.skipIf(torch_device not in ["cuda", "xpu"], reason="float16 requires CUDA or XPU")
@require_accelerator @require_torch_accelerator
def test_save_load_float16(self, expected_max_diff=1e-2): def test_save_load_float16(self, expected_max_diff=1e-2):
components = self.get_dummy_components() components = self.get_dummy_components()
for name, module in components.items(): for name, module in components.items():
...@@ -337,7 +337,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -337,7 +337,7 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
def test_sequential_cpu_offload_forward_pass(self): def test_sequential_cpu_offload_forward_pass(self):
pass pass
@require_accelerator @require_torch_accelerator
def test_to_device(self): def test_to_device(self):
components = self.get_dummy_components() components = self.get_dummy_components()
pipe = self.pipeline_class(**components) pipe = self.pipeline_class(**components)
...@@ -365,19 +365,19 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe ...@@ -365,19 +365,19 @@ class TextToVideoZeroSDXLPipelineFastTests(PipelineTesterMixin, PipelineFromPipe
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class TextToVideoZeroSDXLPipelineSlowTests(unittest.TestCase): class TextToVideoZeroSDXLPipelineSlowTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_full_model(self): def test_full_model(self):
model_id = "stabilityai/stable-diffusion-xl-base-1.0" model_id = "stabilityai/stable-diffusion-xl-base-1.0"
......
...@@ -23,10 +23,14 @@ from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokeni ...@@ -23,10 +23,14 @@ from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokeni
from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel from diffusers import PriorTransformer, UnCLIPPipeline, UnCLIPScheduler, UNet2DConditionModel, UNet2DModel
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
backend_max_memory_allocated,
backend_reset_max_memory_allocated,
backend_reset_peak_memory_stats,
enable_full_determinism, enable_full_determinism,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_accelerator,
skip_mps, skip_mps,
torch_device, torch_device,
) )
...@@ -426,13 +430,13 @@ class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase): ...@@ -426,13 +430,13 @@ class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_unclip_karlo_cpu_fp32(self): def test_unclip_karlo_cpu_fp32(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -458,19 +462,19 @@ class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase): ...@@ -458,19 +462,19 @@ class UnCLIPPipelineCPUIntegrationTests(unittest.TestCase):
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class UnCLIPPipelineIntegrationTests(unittest.TestCase): class UnCLIPPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_unclip_karlo(self): def test_unclip_karlo(self):
expected_image = load_numpy( expected_image = load_numpy(
...@@ -496,9 +500,9 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase): ...@@ -496,9 +500,9 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase):
assert_mean_pixel_difference(image, expected_image) assert_mean_pixel_difference(image, expected_image)
def test_unclip_pipeline_with_sequential_cpu_offloading(self): def test_unclip_pipeline_with_sequential_cpu_offloading(self):
torch.cuda.empty_cache() backend_empty_cache(torch_device)
torch.cuda.reset_max_memory_allocated() backend_reset_max_memory_allocated(torch_device)
torch.cuda.reset_peak_memory_stats() backend_reset_peak_memory_stats(torch_device)
pipe = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha", torch_dtype=torch.float16) pipe = UnCLIPPipeline.from_pretrained("kakaobrain/karlo-v1-alpha", torch_dtype=torch.float16)
pipe.set_progress_bar_config(disable=None) pipe.set_progress_bar_config(disable=None)
...@@ -514,6 +518,6 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase): ...@@ -514,6 +518,6 @@ class UnCLIPPipelineIntegrationTests(unittest.TestCase):
output_type="np", output_type="np",
) )
mem_bytes = torch.cuda.max_memory_allocated() mem_bytes = backend_max_memory_allocated(torch_device)
# make sure that less than 7 GB is allocated # make sure that less than 7 GB is allocated
assert mem_bytes < 7 * 10**9 assert mem_bytes < 7 * 10**9
...@@ -37,12 +37,13 @@ from diffusers import ( ...@@ -37,12 +37,13 @@ from diffusers import (
) )
from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel from diffusers.pipelines.unclip.text_proj import UnCLIPTextProjModel
from diffusers.utils.testing_utils import ( from diffusers.utils.testing_utils import (
backend_empty_cache,
enable_full_determinism, enable_full_determinism,
floats_tensor, floats_tensor,
load_image, load_image,
load_numpy, load_numpy,
nightly, nightly,
require_torch_gpu, require_torch_accelerator,
skip_mps, skip_mps,
torch_device, torch_device,
) )
...@@ -496,19 +497,19 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa ...@@ -496,19 +497,19 @@ class UnCLIPImageVariationPipelineFastTests(PipelineTesterMixin, unittest.TestCa
@nightly @nightly
@require_torch_gpu @require_torch_accelerator
class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase): class UnCLIPImageVariationPipelineIntegrationTests(unittest.TestCase):
def setUp(self): def setUp(self):
# clean up the VRAM before each test # clean up the VRAM before each test
super().setUp() super().setUp()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def tearDown(self): def tearDown(self):
# clean up the VRAM after each test # clean up the VRAM after each test
super().tearDown() super().tearDown()
gc.collect() gc.collect()
torch.cuda.empty_cache() backend_empty_cache(torch_device)
def test_unclip_image_variation_karlo(self): def test_unclip_image_variation_karlo(self):
input_image = load_image( input_image = load_image(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment