Unverified Commit 93579650 authored by Patrick von Platen's avatar Patrick von Platen Committed by GitHub
Browse files

Refactor model offload (#4514)



* [Draft] Refactor model offload

* [Draft] Refactor model offload

* Apply suggestions from code review

* cpu offlaod updates

* remove model cpu offload from individual pipelines

* add hook to offload models to cpu

* clean up

* model offload

* add model cpu offload string

* make style

* clean up

* fixes for offload issues

* fix tests issues

* resolve merge conflicts

* update src/diffusers/pipelines/pipeline_utils.py
Co-authored-by: default avatarPatrick von Platen <patrick.v.platen@gmail.com>

* make style

* Update src/diffusers/pipelines/latent_diffusion/pipeline_latent_diffusion.py

---------
Co-authored-by: default avatarDhruv Nair <dhruv.nair@gmail.com>
parent 16a056a7
...@@ -39,8 +39,6 @@ from ...utils import ( ...@@ -39,8 +39,6 @@ from ...utils import (
PIL_INTERPOLATION, PIL_INTERPOLATION,
BaseOutput, BaseOutput,
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -309,6 +307,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): ...@@ -309,6 +307,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
Whether the pipeline requires a safety checker. We recommend setting it to True if you're using the Whether the pipeline requires a safety checker. We recommend setting it to True if you're using the
pipeline publicly. pipeline publicly.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = [ _optional_components = [
"safety_checker", "safety_checker",
"feature_extractor", "feature_extractor",
...@@ -365,30 +364,6 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): ...@@ -365,30 +364,6 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor) self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor)
self.register_to_config(requires_safety_checker=requires_safety_checker) self.register_to_config(requires_safety_checker=requires_safety_checker)
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
hook = None
for cpu_offloaded_model in [self.vae, self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -1081,9 +1056,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): ...@@ -1081,9 +1056,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize) image = self.image_processor.postprocess(image, output_type=output_type, do_denormalize=do_denormalize)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
...@@ -1286,9 +1260,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline): ...@@ -1286,9 +1260,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline):
image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0] image = self.vae.decode(latents / self.vae.config.scaling_factor, return_dict=False)[0]
image = self.image_processor.postprocess(image, output_type=output_type) image = self.image_processor.postprocess(image, output_type=output_type)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (inverted_latents, image) return (inverted_latents, image)
......
...@@ -117,6 +117,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin) ...@@ -117,6 +117,7 @@ class StableDiffusionSAGPipeline(DiffusionPipeline, TextualInversionLoaderMixin)
feature_extractor ([`~transformers.CLIPImageProcessor`]): feature_extractor ([`~transformers.CLIPImageProcessor`]):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
......
...@@ -32,7 +32,7 @@ from ...models.attention_processor import ( ...@@ -32,7 +32,7 @@ from ...models.attention_processor import (
) )
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers from ...schedulers import DDPMScheduler, KarrasDiffusionSchedulers
from ...utils import deprecate, is_accelerate_available, is_accelerate_version, logging from ...utils import deprecate, logging
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline from ..pipeline_utils import DiffusionPipeline
from . import StableDiffusionPipelineOutput from . import StableDiffusionPipelineOutput
...@@ -90,6 +90,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi ...@@ -90,6 +90,7 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["watermarker", "safety_checker", "feature_extractor"] _optional_components = ["watermarker", "safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -140,32 +141,6 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi ...@@ -140,32 +141,6 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic") self.image_processor = VaeImageProcessor(vae_scale_factor=self.vae_scale_factor, resample="bicubic")
self.register_to_config(max_noise_level=max_noise_level) self.register_to_config(max_noise_level=max_noise_level)
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.unet, self.text_encoder, self.vae]:
if cpu_offloaded_model is not None:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
def run_safety_checker(self, image, device, dtype): def run_safety_checker(self, image, device, dtype):
if self.safety_checker is not None: if self.safety_checker is not None:
feature_extractor_input = self.image_processor.postprocess(image, output_type="pil") feature_extractor_input = self.image_processor.postprocess(image, output_type="pil")
......
...@@ -27,8 +27,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -27,8 +27,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -92,6 +90,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL ...@@ -92,6 +90,7 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
""" """
_exclude_from_cpu_offload = ["prior", "image_normalizer"] _exclude_from_cpu_offload = ["prior", "image_normalizer"]
model_cpu_offload_seq = "text_encoder->prior_text_encoder->unet->vae"
# prior components # prior components
prior_tokenizer: CLIPTokenizer prior_tokenizer: CLIPTokenizer
...@@ -164,31 +163,6 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL ...@@ -164,31 +163,6 @@ class StableUnCLIPPipeline(DiffusionPipeline, TextualInversionLoaderMixin, LoraL
""" """
self.vae.disable_slicing() self.vae.disable_slicing()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.prior_text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder # Copied from diffusers.pipelines.unclip.pipeline_unclip.UnCLIPPipeline._encode_prompt with _encode_prompt->_encode_prior_prompt, tokenizer->prior_tokenizer, text_encoder->prior_text_encoder
def _encode_prior_prompt( def _encode_prior_prompt(
self, self,
......
...@@ -19,15 +19,13 @@ import PIL ...@@ -19,15 +19,13 @@ import PIL
import torch import torch
from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
from diffusers.utils.import_utils import is_accelerate_available
from ...image_processor import VaeImageProcessor from ...image_processor import VaeImageProcessor
from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin from ...loaders import LoraLoaderMixin, TextualInversionLoaderMixin
from ...models import AutoencoderKL, UNet2DConditionModel from ...models import AutoencoderKL, UNet2DConditionModel
from ...models.embeddings import get_timestep_embedding from ...models.embeddings import get_timestep_embedding
from ...models.lora import adjust_lora_scale_text_encoder from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import deprecate, is_accelerate_version, logging, replace_example_docstring from ...utils import deprecate, logging, replace_example_docstring
from ...utils.torch_utils import randn_tensor from ...utils.torch_utils import randn_tensor
from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput from ..pipeline_utils import DiffusionPipeline, ImagePipelineOutput
from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer from .stable_unclip_image_normalizer import StableUnCLIPImageNormalizer
...@@ -94,6 +92,7 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin ...@@ -94,6 +92,7 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations. Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
""" """
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae"
_exclude_from_cpu_offload = ["image_normalizer"] _exclude_from_cpu_offload = ["image_normalizer"]
# image encoding components # image encoding components
...@@ -161,31 +160,6 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin ...@@ -161,31 +160,6 @@ class StableUnCLIPImg2ImgPipeline(DiffusionPipeline, TextualInversionLoaderMixin
""" """
self.vae.disable_slicing() self.vae.disable_slicing()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.image_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
......
...@@ -47,6 +47,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline): ...@@ -47,6 +47,7 @@ class StableDiffusionPipelineSafe(DiffusionPipeline):
A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`. A `CLIPImageProcessor` to extract features from generated images; used as inputs to the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
......
...@@ -129,6 +129,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad ...@@ -129,6 +129,7 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
watermark output images. If not defined, it will default to True if the package is installed, otherwise no watermark output images. If not defined, it will default to True if the package is installed, otherwise no
watermarker will be used. watermarker will be used.
""" """
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
def __init__( def __init__(
self, self,
...@@ -198,36 +199,6 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad ...@@ -198,36 +199,6 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
model_sequence = (
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
)
model_sequence.extend([self.unet, self.vae])
hook = None
for cpu_offloaded_model in model_sequence:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
def encode_prompt( def encode_prompt(
self, self,
prompt: str, prompt: str,
...@@ -900,17 +871,16 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad ...@@ -900,17 +871,16 @@ class StableDiffusionXLPipeline(DiffusionPipeline, FromSingleFileMixin, LoraLoad
self.vae.to(dtype=torch.float16) self.vae.to(dtype=torch.float16)
else: else:
image = latents image = latents
return StableDiffusionXLPipelineOutput(images=image)
# apply watermark if available if not output_type == "latent":
if self.watermark is not None: # apply watermark if available
image = self.watermark.apply_watermark(image) if self.watermark is not None:
image = self.watermark.apply_watermark(image)
image = self.image_processor.postprocess(image, output_type=output_type) image = self.image_processor.postprocess(image, output_type=output_type)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image,) return (image,)
......
...@@ -134,6 +134,8 @@ class StableDiffusionXLImg2ImgPipeline( ...@@ -134,6 +134,8 @@ class StableDiffusionXLImg2ImgPipeline(
watermark output images. If not defined, it will default to True if the package is installed, otherwise no watermark output images. If not defined, it will default to True if the package is installed, otherwise no
watermarker will be used. watermarker will be used.
""" """
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
_optional_components = ["tokenizer", "text_encoder"] _optional_components = ["tokenizer", "text_encoder"]
def __init__( def __init__(
...@@ -205,36 +207,6 @@ class StableDiffusionXLImg2ImgPipeline( ...@@ -205,36 +207,6 @@ class StableDiffusionXLImg2ImgPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
model_sequence = (
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
)
model_sequence.extend([self.unet, self.vae])
hook = None
for cpu_offloaded_model in model_sequence:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt( def encode_prompt(
self, self,
...@@ -1057,9 +1029,8 @@ class StableDiffusionXLImg2ImgPipeline( ...@@ -1057,9 +1029,8 @@ class StableDiffusionXLImg2ImgPipeline(
image = self.image_processor.postprocess(image, output_type=output_type) image = self.image_processor.postprocess(image, output_type=output_type)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image,) return (image,)
......
...@@ -280,6 +280,8 @@ class StableDiffusionXLInpaintPipeline( ...@@ -280,6 +280,8 @@ class StableDiffusionXLInpaintPipeline(
watermark output images. If not defined, it will default to True if the package is installed, otherwise no watermark output images. If not defined, it will default to True if the package is installed, otherwise no
watermarker will be used. watermarker will be used.
""" """
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
_optional_components = ["tokenizer", "text_encoder"] _optional_components = ["tokenizer", "text_encoder"]
def __init__( def __init__(
...@@ -354,37 +356,6 @@ class StableDiffusionXLInpaintPipeline( ...@@ -354,37 +356,6 @@ class StableDiffusionXLInpaintPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl_img2img.StableDiffusionXLImg2ImgPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
model_sequence = (
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
)
model_sequence.extend([self.unet, self.vae])
hook = None
for cpu_offloaded_model in model_sequence:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt( def encode_prompt(
self, self,
...@@ -1377,9 +1348,8 @@ class StableDiffusionXLInpaintPipeline( ...@@ -1377,9 +1348,8 @@ class StableDiffusionXLInpaintPipeline(
image = self.image_processor.postprocess(image, output_type=output_type) image = self.image_processor.postprocess(image, output_type=output_type)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image,) return (image,)
......
...@@ -32,8 +32,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -32,8 +32,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
is_invisible_watermark_available, is_invisible_watermark_available,
logging, logging,
replace_example_docstring, replace_example_docstring,
...@@ -143,6 +141,7 @@ class StableDiffusionXLInstructPix2PixPipeline( ...@@ -143,6 +141,7 @@ class StableDiffusionXLInstructPix2PixPipeline(
watermark output images. If not defined, it will default to True if the package is installed, otherwise no watermark output images. If not defined, it will default to True if the package is installed, otherwise no
watermarker will be used. watermarker will be used.
""" """
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
def __init__( def __init__(
self, self,
...@@ -211,38 +210,6 @@ class StableDiffusionXLInstructPix2PixPipeline( ...@@ -211,38 +210,6 @@ class StableDiffusionXLInstructPix2PixPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
model_sequence = (
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
)
model_sequence.extend([self.unet, self.vae])
hook = None
for cpu_offloaded_model in model_sequence:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt( def encode_prompt(
self, self,
prompt: str, prompt: str,
...@@ -965,9 +932,8 @@ class StableDiffusionXLInstructPix2PixPipeline( ...@@ -965,9 +932,8 @@ class StableDiffusionXLInstructPix2PixPipeline(
image = self.image_processor.postprocess(image, output_type=output_type) image = self.image_processor.postprocess(image, output_type=output_type)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image,) return (image,)
......
...@@ -30,8 +30,6 @@ from ...utils import ( ...@@ -30,8 +30,6 @@ from ...utils import (
PIL_INTERPOLATION, PIL_INTERPOLATION,
BaseOutput, BaseOutput,
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -151,6 +149,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline): ...@@ -151,6 +149,7 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
feature_extractor ([`CLIPFeatureExtractor`]): feature_extractor ([`CLIPFeatureExtractor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`. Model that extracts features from generated images to be used as inputs for the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->adapter->unet->vae"
_optional_components = ["safety_checker", "feature_extractor"] _optional_components = ["safety_checker", "feature_extractor"]
def __init__( def __init__(
...@@ -217,34 +216,6 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline): ...@@ -217,34 +216,6 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
""" """
self.vae.disable_slicing() self.vae.disable_slicing()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.adapter, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None:
_, hook = cpu_offload_with_hook(self.safety_checker, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -815,9 +786,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline): ...@@ -815,9 +786,8 @@ class StableDiffusionAdapterPipeline(DiffusionPipeline):
# 9. Run safety checker # 9. Run safety checker
image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -35,8 +35,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -35,8 +35,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
PIL_INTERPOLATION, PIL_INTERPOLATION,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -159,6 +157,7 @@ class StableDiffusionXLAdapterPipeline( ...@@ -159,6 +157,7 @@ class StableDiffusionXLAdapterPipeline(
feature_extractor ([`CLIPFeatureExtractor`]): feature_extractor ([`CLIPFeatureExtractor`]):
Model that extracts features from generated images to be used as inputs for the `safety_checker`. Model that extracts features from generated images to be used as inputs for the `safety_checker`.
""" """
model_cpu_offload_seq = "text_encoder->text_encoder_2->unet->vae"
def __init__( def __init__(
self, self,
...@@ -222,37 +221,6 @@ class StableDiffusionXLAdapterPipeline( ...@@ -222,37 +221,6 @@ class StableDiffusionXLAdapterPipeline(
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
model_sequence = (
[self.text_encoder, self.text_encoder_2] if self.text_encoder is not None else [self.text_encoder_2]
)
model_sequence.extend([self.unet, self.vae])
hook = None
for cpu_offloaded_model in model_sequence:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt # Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.encode_prompt
def encode_prompt( def encode_prompt(
self, self,
......
...@@ -25,8 +25,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -25,8 +25,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -95,6 +93,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora ...@@ -95,6 +93,7 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
def __init__( def __init__(
self, self,
...@@ -148,31 +147,6 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora ...@@ -148,31 +147,6 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -678,9 +652,8 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora ...@@ -678,9 +652,8 @@ class TextToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lora
else: else:
video = tensor2vid(video_tensor) video = tensor2vid(video_tensor)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (video,) return (video,)
......
...@@ -26,8 +26,6 @@ from ...models.lora import adjust_lora_scale_text_encoder ...@@ -26,8 +26,6 @@ from ...models.lora import adjust_lora_scale_text_encoder
from ...schedulers import KarrasDiffusionSchedulers from ...schedulers import KarrasDiffusionSchedulers
from ...utils import ( from ...utils import (
deprecate, deprecate,
is_accelerate_available,
is_accelerate_version,
logging, logging,
replace_example_docstring, replace_example_docstring,
) )
...@@ -157,6 +155,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -157,6 +155,7 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "text_encoder->unet->vae"
def __init__( def __init__(
self, self,
...@@ -210,31 +209,6 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -210,31 +209,6 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
""" """
self.vae.disable_tiling() self.vae.disable_tiling()
def enable_model_cpu_offload(self, gpu_id=0):
r"""
Offload all models to CPU to reduce memory usage with a low impact on performance. Moves one whole model at a
time to the GPU when its `forward` method is called, and the model remains in GPU until the next model runs.
Memory savings are lower than using `enable_sequential_cpu_offload`, but performance is much better due to the
iterative execution of the `unet`.
"""
if is_accelerate_available() and is_accelerate_version(">=", "0.17.0.dev0"):
from accelerate import cpu_offload_with_hook
else:
raise ImportError("`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher.")
device = torch.device(f"cuda:{gpu_id}")
if self.device.type != "cpu":
self.to("cpu", silence_dtype_warnings=True)
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None
for cpu_offloaded_model in [self.text_encoder, self.vae, self.unet]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
# We'll offload the last model manually.
self.final_offload_hook = hook
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt # Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._encode_prompt
def _encode_prompt( def _encode_prompt(
self, self,
...@@ -753,9 +727,8 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -753,9 +727,8 @@ class VideoToVideoSDPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
else: else:
video = tensor2vid(video_tensor) video = tensor2vid(video_tensor)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (video,) return (video,)
......
...@@ -635,9 +635,8 @@ class TextToVideoZeroPipeline(StableDiffusionPipeline): ...@@ -635,9 +635,8 @@ class TextToVideoZeroPipeline(StableDiffusionPipeline):
# Run safety checker # Run safety checker
image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype) image, has_nsfw_concept = self.run_safety_checker(image, device, prompt_embeds.dtype)
# Offload last model to CPU # Offload all models
if hasattr(self, "final_offload_hook") and self.final_offload_hook is not None: self.maybe_free_model_hooks()
self.final_offload_hook.offload()
if not return_dict: if not return_dict:
return (image, has_nsfw_concept) return (image, has_nsfw_concept)
......
...@@ -76,6 +76,8 @@ class UnCLIPPipeline(DiffusionPipeline): ...@@ -76,6 +76,8 @@ class UnCLIPPipeline(DiffusionPipeline):
decoder_scheduler: UnCLIPScheduler decoder_scheduler: UnCLIPScheduler
super_res_scheduler: UnCLIPScheduler super_res_scheduler: UnCLIPScheduler
model_cpu_offload_seq = "text_encoder->text_proj->decoder->super_res_first->super_res_last"
def __init__( def __init__(
self, self,
prior: PriorTransformer, prior: PriorTransformer,
......
...@@ -77,6 +77,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline): ...@@ -77,6 +77,7 @@ class UnCLIPImageVariationPipeline(DiffusionPipeline):
decoder_scheduler: UnCLIPScheduler decoder_scheduler: UnCLIPScheduler
super_res_scheduler: UnCLIPScheduler super_res_scheduler: UnCLIPScheduler
model_cpu_offload_seq = "text_encoder->image_encoder->text_proj->decoder->super_res_first->super_res_last"
def __init__( def __init__(
self, self,
......
...@@ -103,6 +103,9 @@ class UniDiffuserPipeline(DiffusionPipeline): ...@@ -103,6 +103,9 @@ class UniDiffuserPipeline(DiffusionPipeline):
original UniDiffuser paper uses the [`DPMSolverMultistepScheduler`] scheduler. original UniDiffuser paper uses the [`DPMSolverMultistepScheduler`] scheduler.
""" """
# TODO: support for moving submodules for components with enable_model_cpu_offload
model_cpu_offload_seq = "text_encoder->image_encoder->unet->vae->text_decoder"
def __init__( def __init__(
self, self,
vae: AutoencoderKL, vae: AutoencoderKL,
...@@ -173,7 +176,15 @@ class UniDiffuserPipeline(DiffusionPipeline): ...@@ -173,7 +176,15 @@ class UniDiffuserPipeline(DiffusionPipeline):
torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist) torch.cuda.empty_cache() # otherwise we don't see the memory savings (but they probably exist)
hook = None hook = None
for cpu_offloaded_model in [self.text_encoder, self.unet, self.vae, self.image_encoder, self.text_decoder]: for cpu_offloaded_model in [
self.text_encoder.text_model,
self.image_encoder,
self.unet,
self.vae,
self.text_decoder.encode_prefix,
self.text_decoder.decode_prefix,
self.text_decoder,
]:
_, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook) _, hook = cpu_offload_with_hook(cpu_offloaded_model, device, prev_module_hook=hook)
if self.safety_checker is not None: if self.safety_checker is not None:
...@@ -1344,6 +1355,8 @@ class UniDiffuserPipeline(DiffusionPipeline): ...@@ -1344,6 +1355,8 @@ class UniDiffuserPipeline(DiffusionPipeline):
for output, length in zip(output_list, seq_lengths) for output, length in zip(output_list, seq_lengths)
] ]
self.maybe_free_model_hooks()
# 10. Convert to PIL # 10. Convert to PIL
if output_type == "pil" and gen_image is not None: if output_type == "pil" and gen_image is not None:
gen_image = self.numpy_to_pil(gen_image) gen_image = self.numpy_to_pil(gen_image)
......
...@@ -58,6 +58,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -58,6 +58,8 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "bert->unet->vqvae"
tokenizer: CLIPTokenizer tokenizer: CLIPTokenizer
image_feature_extractor: CLIPImageProcessor image_feature_extractor: CLIPImageProcessor
text_encoder: CLIPTextModelWithProjection text_encoder: CLIPTextModelWithProjection
......
...@@ -52,6 +52,8 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -52,6 +52,8 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of A scheduler to be used in combination with `unet` to denoise the encoded image latents. Can be one of
[`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`]. [`DDIMScheduler`], [`LMSDiscreteScheduler`], or [`PNDMScheduler`].
""" """
model_cpu_offload_seq = "bert->unet->vqvae"
image_feature_extractor: CLIPImageProcessor image_feature_extractor: CLIPImageProcessor
image_encoder: CLIPVisionModelWithProjection image_encoder: CLIPVisionModelWithProjection
image_unet: UNet2DConditionModel image_unet: UNet2DConditionModel
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment