Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
9c856118
Unverified
Commit
9c856118
authored
Apr 21, 2023
by
Patrick von Platen
Committed by
GitHub
Apr 21, 2023
Browse files
Add model offload to x4 upscaler (#3187)
* Add model offload to x4 upscaler * fix
parent
9bce375f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
2 deletions
+32
-2
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
...nes/stable_diffusion/pipeline_stable_diffusion_upscale.py
+32
-2
No files found.
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
View file @
9c856118
...
...
@@ -23,7 +23,7 @@ from transformers import CLIPTextModel, CLIPTokenizer
from
...loaders
import
TextualInversionLoaderMixin
from
...models
import
AutoencoderKL
,
UNet2DConditionModel
from
...schedulers
import
DDPMScheduler
,
KarrasDiffusionSchedulers
from
...utils
import
deprecate
,
is_accelerate_available
,
logging
,
randn_tensor
from
...utils
import
deprecate
,
is_accelerate_available
,
is_accelerate_version
,
logging
,
randn_tensor
from
..pipeline_utils
import
DiffusionPipeline
,
ImagePipelineOutput
...
...
@@ -129,10 +129,36 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
device
=
torch
.
device
(
f
"cuda:
{
gpu_id
}
"
)
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder
]:
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder
,
self
.
vae
]:
if
cpu_offloaded_model
is
not
None
:
cpu_offload
(
cpu_offloaded_model
,
device
)
def
enable_model_cpu_offload
(
self
,
gpu_id
=
0
):
r
"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
to `enable_sequential_cpu_offload`, this method moves one whole model at a time to the GPU when its `forward`
method is called, and the model remains in GPU until the next model runs. Memory savings are lower than with
`enable_sequential_cpu_offload`, but performance is much better due to the iterative execution of the `unet`.
"""
if
is_accelerate_available
()
and
is_accelerate_version
(
">="
,
"0.17.0.dev0"
):
from
accelerate
import
cpu_offload_with_hook
else
:
raise
ImportError
(
"`enable_model_cpu_offload` requires `accelerate v0.17.0` or higher."
)
device
=
torch
.
device
(
f
"cuda:
{
gpu_id
}
"
)
if
self
.
device
.
type
!=
"cpu"
:
self
.
to
(
"cpu"
,
silence_dtype_warnings
=
True
)
torch
.
cuda
.
empty_cache
()
# otherwise we don't see the memory savings (but they probably exist)
hook
=
None
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder
,
self
.
vae
]:
if
cpu_offloaded_model
is
not
None
:
_
,
hook
=
cpu_offload_with_hook
(
cpu_offloaded_model
,
device
,
prev_module_hook
=
hook
)
# We'll offload the last model manually.
self
.
final_offload_hook
=
hook
@
property
# Copied from diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline._execution_device
def
_execution_device
(
self
):
...
...
@@ -647,6 +673,10 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline, TextualInversionLoaderMi
self
.
vae
.
to
(
dtype
=
torch
.
float32
)
image
=
self
.
decode_latents
(
latents
.
float
())
# Offload last model to CPU
if
hasattr
(
self
,
"final_offload_hook"
)
and
self
.
final_offload_hook
is
not
None
:
self
.
final_offload_hook
.
offload
()
# 11. Convert to PIL
if
output_type
==
"pil"
:
image
=
self
.
numpy_to_pil
(
image
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment