Unverified Commit 93579650 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Refactor model offload (#4514)



* [Draft] Refactor model offload

* [Draft] Refactor model offload

* Apply suggestions from code review

* cpu offlaod updates

* remove model cpu offload from individual pipelines

* add hook to offload models to cpu

* clean up

* model offload

* add model cpu offload string

* make style

* clean up

* fixes for offload issues

* fix tests issues

* resolve merge conflicts

* update src/diffusers/pipelines/pipeline_utils.py
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>

* make style

* Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py

---------
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent 16a056a7
...@@ -25,8 +25,6 @@ from ...models import PriorTransformer ...@@ -25,8 +25,6 @@ from ...models import PriorTransformer
from ...schedulers import HeunDiscreteScheduler from ...schedulers import HeunDiscreteScheduler
from ...utils import ( from ...utils import (
BaseOutput, BaseOutput,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -99,6 +97,9 @@ class ShapEPipeline(DiffusionPipeline): ...@@ -99,6 +97,9 @@ class ShapEPipeline(DiffusionPipeline):
rendering method. rendering method.
""" """
model_cpu_offload_seq = "text_encoder->prior"
_exclude_from_cpu_offload = ["shap_e_renderer"]
def __init__( def __init__(
self, self,
prior: PriorTransformer, prior: PriorTransformer,
...@@ -129,34 +130,6 @@ class ShapEPipeline(DiffusionPipeline): ...@@ -129,34 +130,6 @@ class ShapEPipeline(DiffusionPipeline):
latents = latents * scheduler.init_noise_sigma latents = latents * scheduler.init_noise_sigma
return latents return latents
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.prior, self.shap_e_renderer]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
def _encode_prompt( def _encode_prompt(
self, self,
prompt, prompt,
...@@ -318,6 +291,9 @@ class ShapEPipeline(DiffusionPipeline): ...@@ -318,6 +291,9 @@ class ShapEPipeline(DiffusionPipeline):
sample=latents, sample=latents,
).prev_sample ).prev_sample
# Offload all models
self.maybe_free_model_hooks()
if output_type not in ["np", "pil", "latent", "mesh"]: if output_type not in ["np", "pil", "latent", "mesh"]:
raise ValueError( raise ValueError(
f"Only the output types `pil`, `np`, `latent` and `mesh` are supported not output_type={output_type}" f"Only the output types `pil`, `np`, `latent` and `mesh` are supported not output_type={output_type}"
...@@ -352,10 +328,6 @@ class ShapEPipeline(DiffusionPipeline): ...@@ -352,10 +328,6 @@ class ShapEPipeline(DiffusionPipeline):
if output_type == "pil": if output_type == "pil":
images = [self.numpy_to_pil(image) for image in images] images = [self.numpy_to_pil(image) for image in images]
# Offload last model to CPU
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None:
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (images,) return (images,)
......
...@@ -98,6 +98,9 @@ class ShapEImg2ImgPipeline(DiffusionPipeline): ...@@ -98,6 +98,9 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
rendering method. rendering method.
""" """
model_cpu_offload_seq = "image_encoder->prior"
_exclude_from_cpu_offload = ["shap_e_renderer"]
def __init__( def __init__(
self, self,
prior: PriorTransformer, prior: PriorTransformer,
...@@ -309,9 +312,8 @@ class ShapEImg2ImgPipeline(DiffusionPipeline): ...@@ -309,9 +312,8 @@ class ShapEImg2ImgPipeline(DiffusionPipeline):
if output_type == "pil": if output_type == "pil":
images = [self.numpy_to_pil(image) for image in images] images = [self.numpy_to_pil(image) for image in images]
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (images,) return (images,)
......
...@@ -21,8 +21,6 @@ import torch ...@@ -21,8 +21,6 @@ import torch
from packaging import version from packaging import version
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer
from diffusers.utils import is_accelerate_available, is_accelerate_version
from ...configuration_utils import FrozenDict from ...configuration_utils import FrozenDict
from ...image_processor import PipelineImageInput, VaeImageProcessor from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
...@@ -150,6 +148,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -150,6 +148,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -228,35 +227,6 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -228,35 +227,6 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
......
...@@ -27,8 +27,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -27,8 +27,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -101,6 +99,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -101,6 +99,7 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -222,34 +221,6 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -222,34 +221,6 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
def _encode_prompt( def _encode_prompt(
self, self,
prompt, prompt,
...@@ -745,9 +716,8 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo ...@@ -745,9 +716,8 @@ class StableDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lo
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -189,6 +189,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion ...@@ -189,6 +189,7 @@ class StableDiffusionAttendAndExcitePipeline(DiffusionPipeline, TextualInversion
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
......
...@@ -85,6 +85,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader ...@@ -85,6 +85,7 @@ class StableDiffusionDepth2ImgPipeline(DiffusionPipeline, TextualInversionLoader
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
def __init__( def __init__(
self, self,
......
...@@ -32,8 +32,6 @@ from ...utils import ( ...@@ -32,8 +32,6 @@ from ...utils import (
PIL_INTERPOLATION, PIL_INTERPOLATION,
BaseOutput, BaseOutput,
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -272,6 +270,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -272,6 +270,7 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"] _optional_components = ["safety_checker", "feature_extractor", "inverse_scheduler"]
def __init__( def __init__(
...@@ -400,35 +399,6 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -400,35 +399,6 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -1070,9 +1040,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -1070,9 +1040,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
if output_type == "pil": if output_type == "pil":
mask_image = self.image_processor.numpy_to_pil(mask_image) mask_image = self.image_processor.numpy_to_pil(mask_image)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
return mask_image return mask_image
...@@ -1305,9 +1274,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -1305,9 +1274,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
if decode_latents and output_type == "pil": if decode_latents and output_type == "pil":
image = self.image_processor.numpy_to_pil(image) image = self.image_processor.numpy_to_pil(image)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (latents, image) return (latents, image)
...@@ -1548,9 +1516,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -1548,9 +1516,8 @@ class StableDiffusionDiffEditPipeline(DiffusionPipeline, TextualInversionLoaderM
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -28,8 +28,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -28,8 +28,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -125,6 +123,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline): ...@@ -125,6 +123,7 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
model_cpu_offload_seq = "text_encoder->unet->vae"
def __init__( def __init__(
self, self,
...@@ -197,34 +196,6 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline): ...@@ -197,34 +196,6 @@ class StableDiffusionGLIGENPipeline(DiffusionPipeline):
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
......
...@@ -33,8 +33,6 @@ from ...models.attention import GatedSelfAttentionDense ...@@ -33,8 +33,6 @@ from ...models.attention import GatedSelfAttentionDense
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -182,6 +180,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): ...@@ -182,6 +180,7 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -261,34 +260,6 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline): ...@@ -261,34 +260,6 @@ class StableDiffusionGLIGENTextImagePipeline(DiffusionPipeline):
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.encode_prompt
def encode_prompt( def encode_prompt(
self, self,
......
...@@ -65,6 +65,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -65,6 +65,7 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
# TODO: feature_extractor is required to encode images (if they are in PIL format), # TODO: feature_extractor is required to encode images (if they are in PIL format),
# we should give a descriptive message if the pipeline doesn't have one. # we should give a descriptive message if the pipeline doesn't have one.
_optional_components = ["safety_checker"] _optional_components = ["safety_checker"]
model_cpu_offload_seq = "image_encoder->unet->vae"
def __init__( def __init__(
self, self,
...@@ -392,6 +393,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -392,6 +393,8 @@ class StableDiffusionImageVariationPipeline(DiffusionPipeline):
if callback is not None and i % callback_steps == 0: if callback is not None and i % callback_steps == 0:
callback(i, t, latents) callback(i, t, latents)
self.maybe_free_model_hooks()
if not output_type == "latent": if not output_type == "latent":
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
image, has_nsfw_concept = self.run_safety_checker(image, device, image_embeddings.dtype) image, has_nsfw_concept = self.run_safety_checker(image, device, image_embeddings.dtype)
......
...@@ -30,8 +30,6 @@ from ...schedulers import KarrasDiffusionSchedulers ...@@ -30,8 +30,6 @@ from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
PIL_INTERPOLATION, PIL_INTERPOLATION,
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -129,6 +127,7 @@ class StableDiffusionImg2ImgPipeline( ...@@ -129,6 +127,7 @@ class StableDiffusionImg2ImgPipeline(
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -221,35 +220,6 @@ class StableDiffusionImg2ImgPipeline( ...@@ -221,35 +220,6 @@ class StableDiffusionImg2ImgPipeline(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -775,9 +745,8 @@ class StableDiffusionImg2ImgPipeline( ...@@ -775,9 +745,8 @@ class StableDiffusionImg2ImgPipeline(
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -27,7 +27,7 @@ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoa ...@@ -27,7 +27,7 @@ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoa
from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel from ...models import AsymmetricAutoencoderKL, AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
...@@ -192,6 +192,7 @@ class StableDiffusionInpaintPipeline( ...@@ -192,6 +192,7 @@ class StableDiffusionInpaintPipeline(
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -292,35 +293,6 @@ class StableDiffusionInpaintPipeline( ...@@ -292,35 +293,6 @@ class StableDiffusionInpaintPipeline(
) )
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -1064,9 +1036,8 @@ class StableDiffusionInpaintPipeline( ...@@ -1064,9 +1036,8 @@ class StableDiffusionInpaintPipeline(
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -27,7 +27,7 @@ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoa ...@@ -27,7 +27,7 @@ from ...loaders import FromSingleFileMixin, LoraLoaderMixin, TextualInversionLoa
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import PIL_INTERPOLATION, deprecate, is_accelerate_available, is_accelerate_version, logging from ...utils import PIL_INTERPOLATION, deprecate, logging
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
...@@ -115,6 +115,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -115,6 +115,7 @@ class StableDiffusionInpaintPipelineLegacy(
feature_extractor ([`CLIPImageProcessor`]): feature_extractor ([`CLIPImageProcessor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`. Model that extracts features from generated images to be used as inputs for the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["feature_extractor"] _optional_components = ["feature_extractor"]
def __init__( def __init__(
...@@ -214,35 +215,6 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -214,35 +215,6 @@ class StableDiffusionInpaintPipelineLegacy(
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -761,9 +733,8 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -761,9 +733,8 @@ class StableDiffusionInpaintPipelineLegacy(
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor ...@@ -24,7 +24,7 @@ from ...image_processor import PipelineImageInput, VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import PIL_INTERPOLATION, deprecate, is_accelerate_available, is_accelerate_version, logging from ...utils import PIL_INTERPOLATION, deprecate, logging
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
...@@ -89,6 +89,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion ...@@ -89,6 +89,7 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -392,44 +393,14 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion ...@@ -392,44 +393,14 @@ class StableDiffusionInstructPix2PixPipeline(DiffusionPipeline, TextualInversion
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept) return StableDiffusionPipelineOutput(images=image, nsfw_content_detected=has_nsfw_concept)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
def _encode_prompt( def _encode_prompt(
self, self,
prompt, prompt,
......
...@@ -24,7 +24,7 @@ from ...image_processor import VaeImageProcessor ...@@ -24,7 +24,7 @@ from ...image_processor import VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import LMSDiscreteScheduler from ...schedulers import LMSDiscreteScheduler
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
...@@ -80,6 +80,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade ...@@ -80,6 +80,7 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
feature_extractor ([`CLIPImageProcessor`]): feature_extractor ([`CLIPImageProcessor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`. Model that extracts features from generated images to be used as inputs for the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -128,35 +129,6 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade ...@@ -128,35 +129,6 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
sampling = getattr(library, "sampling") sampling = getattr(library, "sampling")
self.sampler = getattr(sampling, scheduler_type) self.sampler = getattr(sampling, scheduler_type)
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -630,9 +602,8 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade ...@@ -630,9 +602,8 @@ class StableDiffusionKDiffusionPipeline(DiffusionPipeline, TextualInversionLoade
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -78,6 +78,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline): ...@@ -78,6 +78,7 @@ class StableDiffusionLatentUpscalePipeline(DiffusionPipeline):
scheduler ([`SchedulerMixin`]): scheduler ([`SchedulerMixin`]):
A [`EulerDiscreteScheduler`] to be used in combination with `unet` to denoise the encoded image latents. A [`EulerDiscreteScheduler`] to be used in combination with `unet` to denoise the encoded image latents.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
def __init__( def __init__(
self, self,
......
...@@ -29,8 +29,6 @@ from ...schedulers import KarrasDiffusionSchedulers ...@@ -29,8 +29,6 @@ from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
BaseOutput, BaseOutput,
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -114,6 +112,7 @@ class StableDiffusionLDM3DPipeline( ...@@ -114,6 +112,7 @@ class StableDiffusionLDM3DPipeline(
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -191,35 +190,6 @@ class StableDiffusionLDM3DPipeline( ...@@ -191,35 +190,6 @@ class StableDiffusionLDM3DPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -696,9 +666,8 @@ class StableDiffusionLDM3DPipeline( ...@@ -696,9 +666,8 @@ class StableDiffusionLDM3DPipeline(
rgb, depth = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) rgb, depth = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return ((rgb, depth), has_nsfw_concept) return ((rgb, depth), has_nsfw_concept)
......
...@@ -66,6 +66,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa ...@@ -66,6 +66,7 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
with_augs ([`list`]): with_augs ([`list`]):
Textual augmentations to apply while editing the text-to-image model. Set to `[]` for no augmentations. Textual augmentations to apply while editing the text-to-image model. Set to `[]` for no augmentations.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -787,9 +788,8 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa ...@@ -787,9 +788,8 @@ class StableDiffusionModelEditingPipeline(DiffusionPipeline, TextualInversionLoa
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -78,6 +78,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM ...@@ -78,6 +78,7 @@ class StableDiffusionPanoramaPipeline(DiffusionPipeline, TextualInversionLoaderM
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
......
...@@ -25,8 +25,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -25,8 +25,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -95,6 +93,7 @@ class StableDiffusionParadigmsPipeline( ...@@ -95,6 +93,7 @@ class StableDiffusionParadigmsPipeline(
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -175,35 +174,6 @@ class StableDiffusionParadigmsPipeline( ...@@ -175,35 +174,6 @@ class StableDiffusionParadigmsPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -803,9 +773,8 @@ class StableDiffusionParadigmsPipeline( ...@@ -803,9 +773,8 @@ class StableDiffusionParadigmsPipeline(
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment