Unverified Commit a75ac3fa authored by Pedro Cuenca's avatar Pedro Cuenca Committed by GitHub
Browse files

Sequential cpu offload: require accelerate 0.14.0 (#2517)

* Sequential cpu offload: require accelerate 0.14.0.

* Import is_accelerate_version

* Missing copy.
parent 477aaa96
...@@ -191,10 +191,10 @@ class AltDiffusionPipeline(DiffusionPipeline): ...@@ -191,10 +191,10 @@ class AltDiffusionPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -213,10 +213,10 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline): ...@@ -213,10 +213,10 @@ class AltDiffusionImg2ImgPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -230,10 +230,10 @@ class CycleDiffusionPipeline(DiffusionPipeline): ...@@ -230,10 +230,10 @@ class CycleDiffusionPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -194,10 +194,10 @@ class StableDiffusionPipeline(DiffusionPipeline): ...@@ -194,10 +194,10 @@ class StableDiffusionPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -24,7 +24,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer ...@@ -24,7 +24,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.cross_attention import CrossAttention from ...models.cross_attention import CrossAttention
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker from .safety_checker import StableDiffusionSafetyChecker
...@@ -256,10 +256,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline): ...@@ -256,10 +256,10 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -218,10 +218,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline): ...@@ -218,10 +218,10 @@ class StableDiffusionImg2ImgPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -265,10 +265,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline): ...@@ -265,10 +265,10 @@ class StableDiffusionInpaintPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -209,10 +209,10 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline): ...@@ -209,10 +209,10 @@ class StableDiffusionInpaintPipelineLegacy(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -398,10 +398,10 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline): ...@@ -398,10 +398,10 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -130,10 +130,10 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline): ...@@ -130,10 +130,10 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -19,7 +19,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer ...@@ -19,7 +19,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import DDIMScheduler, PNDMScheduler from ...schedulers import DDIMScheduler, PNDMScheduler
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker from .safety_checker import StableDiffusionSafetyChecker
...@@ -151,10 +151,10 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline): ...@@ -151,10 +151,10 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -365,10 +365,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): ...@@ -365,10 +365,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -22,7 +22,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer ...@@ -22,7 +22,7 @@ from transformers import CLIPFeatureExtractor, CLIPTextModel, CLIPTokenizer
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import is_accelerate_available, logging, randn_tensor, replace_example_docstring from ...utils import is_accelerate_available, is_accelerate_version, logging, randn_tensor, replace_example_docstring
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
from .safety_checker import StableDiffusionSafetyChecker from .safety_checker import StableDiffusionSafetyChecker
...@@ -169,10 +169,10 @@ class StableDiffusionSAGPipeline(DiffusionPipeline): ...@@ -169,10 +169,10 @@ class StableDiffusionSAGPipeline(DiffusionPipeline):
Note that offloading happens on a submodule basis. Memory savings are higher than with Note that offloading happens on a submodule basis. Memory savings are higher than with
`enable_model_cpu_offload`, but performance is lower. `enable_model_cpu_offload`, but performance is lower.
""" """
if is_accelerate_available(): if is_accelerate_available() and is_accelerate_version(">=", "0.14.0"):
from accelerate import cpu_offload from accelerate import cpu_offload
else: else:
raise ImportError("Please install accelerate via `pip install accelerate`") raise ImportError("`enable_sequential_cpu_offload` requires `accelerate v0.14.0` or higher")
device = torch.device(f"cuda:{gpu_id}") device = torch.device(f"cuda:{gpu_id}")
......
...@@ -39,7 +39,16 @@ from diffusers import ( ...@@ -39,7 +39,16 @@ from diffusers import (
StableDiffusionDepth2ImgPipeline, StableDiffusionDepth2ImgPipeline,
UNet2DConditionModel, UNet2DConditionModel,
) )
from diffusers.utils import floats_tensor, is_accelerate_available, load_image, load_numpy, nightly, slow, torch_device from diffusers.utils import (
floats_tensor,
is_accelerate_available,
is_accelerate_version,
load_image,
load_numpy,
nightly,
slow,
torch_device,
)
from diffusers.utils.testing_utils import require_torch_gpu, skip_mps from diffusers.utils.testing_utils import require_torch_gpu, skip_mps
from ...test_pipelines_common import PipelineTesterMixin from ...test_pipelines_common import PipelineTesterMixin
...@@ -227,8 +236,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te ...@@ -227,8 +236,8 @@ class StableDiffusionDepth2ImgPipelineFastTests(PipelineTesterMixin, unittest.Te
self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.") self.assertLess(max_diff, 1.3e-2, "The outputs of the fp16 and fp32 pipelines are too different.")
@unittest.skipIf( @unittest.skipIf(
torch_device != "cuda" or not is_accelerate_available(), torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
reason="CPU offload is only available with CUDA and `accelerate` installed", reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
) )
def test_cpu_offload_forward_pass(self): def test_cpu_offload_forward_pass(self):
components = self.get_dummy_components() components = self.get_dummy_components()
......
...@@ -13,7 +13,7 @@ import torch ...@@ -13,7 +13,7 @@ import torch
import diffusers import diffusers
from diffusers import DiffusionPipeline from diffusers import DiffusionPipeline
from diffusers.utils import logging from diffusers.utils import logging
from diffusers.utils.import_utils import is_accelerate_available, is_xformers_available from diffusers.utils.import_utils import is_accelerate_available, is_accelerate_version, is_xformers_available
from diffusers.utils.testing_utils import require_torch, torch_device from diffusers.utils.testing_utils import require_torch, torch_device
...@@ -417,8 +417,8 @@ class PipelineTesterMixin: ...@@ -417,8 +417,8 @@ class PipelineTesterMixin:
assert_mean_pixel_difference(output_with_slicing[0], output_without_slicing[0]) assert_mean_pixel_difference(output_with_slicing[0], output_without_slicing[0])
@unittest.skipIf( @unittest.skipIf(
torch_device != "cuda" or not is_accelerate_available(), torch_device != "cuda" or not is_accelerate_available() or is_accelerate_version("<", "0.14.0"),
reason="CPU offload is only available with CUDA and `accelerate` installed", reason="CPU offload is only available with CUDA and `accelerate v0.14.0` or higher",
) )
def test_cpu_offload_forward_pass(self): def test_cpu_offload_forward_pass(self):
if not self.test_cpu_offload: if not self.test_cpu_offload:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment