Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
diffusers
Commits
98c9aac1
Unverified
Commit
98c9aac1
authored
Jul 10, 2023
by
Patrick von Platen
Committed by
GitHub
Jul 10, 2023
Browse files
[SDXL] Fix all sequential offload (#4010)
* Fix all sequential offload * make style * make style
parent
e3d71ad8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
52 additions
and
69 deletions
+52
-69
src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
...able_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+4
-3
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
...pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
+24
-33
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
...s/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
+24
-33
No files found.
src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
View file @
98c9aac1
...
@@ -176,7 +176,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
...
@@ -176,7 +176,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
"""
"""
self
.
vae
.
disable_tiling
()
self
.
vae
.
disable_tiling
()
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
def
enable_sequential_cpu_offload
(
self
,
gpu_id
=
0
):
def
enable_sequential_cpu_offload
(
self
,
gpu_id
=
0
):
r
"""
r
"""
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
...
@@ -196,10 +195,12 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
...
@@ -196,10 +195,12 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
self
.
to
(
"cpu"
,
silence_dtype_warnings
=
True
)
self
.
to
(
"cpu"
,
silence_dtype_warnings
=
True
)
torch
.
cuda
.
empty_cache
()
# otherwise we don't see the memory savings (but they probably exist)
torch
.
cuda
.
empty_cache
()
# otherwise we don't see the memory savings (but they probably exist)
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder
,
self
.
text_encoder_2
,
self
.
vae
]:
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder_2
,
self
.
vae
]:
cpu_offload
(
cpu_offloaded_model
,
device
)
cpu_offload
(
cpu_offloaded_model
,
device
)
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
if
self
.
text_encoder
is
not
None
:
cpu_offload
(
self
.
text_encoder
,
device
)
def
enable_model_cpu_offload
(
self
,
gpu_id
=
0
):
def
enable_model_cpu_offload
(
self
,
gpu_id
=
0
):
r
"""
r
"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
...
...
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
View file @
98c9aac1
...
@@ -13,7 +13,6 @@
...
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
gc
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -22,12 +21,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
...
@@ -22,12 +21,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
from
diffusers
import
(
from
diffusers
import
(
AutoencoderKL
,
AutoencoderKL
,
DiffusionPipeline
,
EulerDiscreteScheduler
,
EulerDiscreteScheduler
,
StableDiffusionXLPipeline
,
StableDiffusionXLPipeline
,
UNet2DConditionModel
,
UNet2DConditionModel
,
)
)
from
diffusers.utils
import
slow
,
torch_device
from
diffusers.utils
import
torch_device
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
..pipeline_params
import
TEXT_TO_IMAGE_BATCH_PARAMS
,
TEXT_TO_IMAGE_IMAGE_PARAMS
,
TEXT_TO_IMAGE_PARAMS
from
..pipeline_params
import
TEXT_TO_IMAGE_BATCH_PARAMS
,
TEXT_TO_IMAGE_IMAGE_PARAMS
,
TEXT_TO_IMAGE_PARAMS
...
@@ -190,38 +188,31 @@ class StableDiffusionXLPipelineFastTests(PipelineLatentTesterMixin, PipelineTest
...
@@ -190,38 +188,31 @@ class StableDiffusionXLPipelineFastTests(PipelineLatentTesterMixin, PipelineTest
def
test_inference_batch_single_identical
(
self
):
def
test_inference_batch_single_identical
(
self
):
super
().
test_inference_batch_single_identical
(
expected_max_diff
=
3e-3
)
super
().
test_inference_batch_single_identical
(
expected_max_diff
=
3e-3
)
@
require_torch_gpu
def
test_stable_diffusion_xl_offloads
(
self
):
pipes
=
[]
components
=
self
.
get_dummy_components
()
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
).
to
(
torch_device
)
pipes
.
append
(
sd_pipe
)
@
slow
components
=
self
.
get_dummy_components
()
@
require_torch_gpu
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
)
class
StableDiffusionXLPipelineSlowTests
(
unittest
.
TestCase
):
sd_pipe
.
enable_model_cpu_offload
()
def
tearDown
(
self
):
pipes
.
append
(
sd_pipe
)
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
get_inputs
(
self
,
device
,
generator_device
=
"cpu"
,
dtype
=
torch
.
float32
,
seed
=
0
):
components
=
self
.
get_dummy_components
()
generator
=
torch
.
Generator
(
device
=
generator_device
).
manual_seed
(
seed
)
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
)
latents
=
np
.
random
.
RandomState
(
seed
).
standard_normal
((
1
,
4
,
64
,
64
))
sd_pipe
.
enable_sequential_cpu_offload
()
latents
=
torch
.
from_numpy
(
latents
).
to
(
device
=
device
,
dtype
=
dtype
)
pipes
.
append
(
sd_pipe
)
inputs
=
{
"prompt"
:
"a photograph of an astronaut riding a horse"
,
image_slices
=
[]
"latents"
:
latents
,
for
pipe
in
pipes
:
"generator"
:
generator
,
pipe
.
unet
.
set_default_attn_processor
()
"num_inference_steps"
:
3
,
"guidance_scale"
:
7.5
,
"output_type"
:
"numpy"
,
}
return
inputs
def
test_stable_diffusion_default_euler
(
self
):
inputs
=
self
.
get_dummy_inputs
(
torch_device
)
pipe
=
DiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2-base"
)
image
=
pipe
(
**
inputs
).
images
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
inputs
=
self
.
get_inputs
(
torch_device
)
image_slices
.
append
(
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
())
image
=
pipe
(
**
inputs
).
images
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
()
assert
image
.
shape
==
(
1
,
512
,
512
,
3
)
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
1
]).
max
()
<
1e-3
expected_slice
=
np
.
array
([
0.49493
,
0.47896
,
0.40798
,
0.54214
,
0.53212
,
0.48202
,
0.47656
,
0.46329
,
0.48506
])
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
2
]).
max
()
<
1e-3
assert
np
.
abs
(
image_slice
-
expected_slice
).
max
()
<
7e-3
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
View file @
98c9aac1
...
@@ -13,7 +13,6 @@
...
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
gc
import
random
import
random
import
unittest
import
unittest
...
@@ -23,12 +22,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
...
@@ -23,12 +22,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
from
diffusers
import
(
from
diffusers
import
(
AutoencoderKL
,
AutoencoderKL
,
DiffusionPipeline
,
EulerDiscreteScheduler
,
EulerDiscreteScheduler
,
StableDiffusionXLImg2ImgPipeline
,
StableDiffusionXLImg2ImgPipeline
,
UNet2DConditionModel
,
UNet2DConditionModel
,
)
)
from
diffusers.utils
import
floats_tensor
,
slow
,
torch_device
from
diffusers.utils
import
floats_tensor
,
torch_device
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
..pipeline_params
import
(
from
..pipeline_params
import
(
...
@@ -205,38 +203,31 @@ class StableDiffusionXLImg2ImgPipelineFastTests(PipelineLatentTesterMixin, Pipel
...
@@ -205,38 +203,31 @@ class StableDiffusionXLImg2ImgPipelineFastTests(PipelineLatentTesterMixin, Pipel
# make sure that it's equal
# make sure that it's equal
assert
np
.
abs
(
image_slice_1
.
flatten
()
-
image_slice_2
.
flatten
()).
max
()
<
1e-4
assert
np
.
abs
(
image_slice_1
.
flatten
()
-
image_slice_2
.
flatten
()).
max
()
<
1e-4
@
require_torch_gpu
def
test_stable_diffusion_xl_offloads
(
self
):
pipes
=
[]
components
=
self
.
get_dummy_components
()
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
).
to
(
torch_device
)
pipes
.
append
(
sd_pipe
)
@
slow
components
=
self
.
get_dummy_components
()
@
require_torch_gpu
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
)
class
StableDiffusionXLImg2ImgPipelineSlowTests
(
unittest
.
TestCase
):
sd_pipe
.
enable_model_cpu_offload
()
def
tearDown
(
self
):
pipes
.
append
(
sd_pipe
)
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
get_inputs
(
self
,
device
,
generator_device
=
"cpu"
,
dtype
=
torch
.
float32
,
seed
=
0
):
components
=
self
.
get_dummy_components
()
generator
=
torch
.
Generator
(
device
=
generator_device
).
manual_seed
(
seed
)
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
)
latents
=
np
.
random
.
RandomState
(
seed
).
standard_normal
((
1
,
4
,
64
,
64
))
sd_pipe
.
enable_sequential_cpu_offload
()
latents
=
torch
.
from_numpy
(
latents
).
to
(
device
=
device
,
dtype
=
dtype
)
pipes
.
append
(
sd_pipe
)
inputs
=
{
"prompt"
:
"a photograph of an astronaut riding a horse"
,
image_slices
=
[]
"latents"
:
latents
,
for
pipe
in
pipes
:
"generator"
:
generator
,
pipe
.
unet
.
set_default_attn_processor
()
"num_inference_steps"
:
3
,
"guidance_scale"
:
7.5
,
"output_type"
:
"numpy"
,
}
return
inputs
def
test_stable_diffusion_default_euler
(
self
):
inputs
=
self
.
get_dummy_inputs
(
torch_device
)
pipe
=
DiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2-base"
)
image
=
pipe
(
**
inputs
).
images
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
inputs
=
self
.
get_inputs
(
torch_device
)
image_slices
.
append
(
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
())
image
=
pipe
(
**
inputs
).
images
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
()
assert
image
.
shape
==
(
1
,
512
,
512
,
3
)
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
1
]).
max
()
<
1e-3
expected_slice
=
np
.
array
([
0.49493
,
0.47896
,
0.40798
,
0.54214
,
0.53212
,
0.48202
,
0.47656
,
0.46329
,
0.48506
])
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
2
]).
max
()
<
1e-3
assert
np
.
abs
(
image_slice
-
expected_slice
).
max
()
<
7e-3
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment