Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
98c9aac1
Unverified
Commit
98c9aac1
authored
Jul 10, 2023
by
Patrick von Platen
Committed by
GitHub
Jul 10, 2023
Browse files
[SDXL] Fix all sequential offload (#4010)
* Fix all sequential offload * make style * make style
parent
e3d71ad8
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
52 additions
and
69 deletions
+52
-69
src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
...able_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
+4
-3
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
...pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
+24
-33
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
...s/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
+24
-33
No files found.
src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py
View file @
98c9aac1
...
@@ -176,7 +176,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
...
@@ -176,7 +176,6 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
"""
"""
self
.
vae
.
disable_tiling
()
self
.
vae
.
disable_tiling
()
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_sequential_cpu_offload
def
enable_sequential_cpu_offload
(
self
,
gpu_id
=
0
):
def
enable_sequential_cpu_offload
(
self
,
gpu_id
=
0
):
r
"""
r
"""
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
Offloads all models to CPU using accelerate, significantly reducing memory usage. When called, unet,
...
@@ -196,10 +195,12 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
...
@@ -196,10 +195,12 @@ class StableDiffusionXLImg2ImgPipeline(DiffusionPipeline, FromSingleFileMixin):
self
.
to
(
"cpu"
,
silence_dtype_warnings
=
True
)
self
.
to
(
"cpu"
,
silence_dtype_warnings
=
True
)
torch
.
cuda
.
empty_cache
()
# otherwise we don't see the memory savings (but they probably exist)
torch
.
cuda
.
empty_cache
()
# otherwise we don't see the memory savings (but they probably exist)
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder
,
self
.
text_encoder_2
,
self
.
vae
]:
for
cpu_offloaded_model
in
[
self
.
unet
,
self
.
text_encoder_2
,
self
.
vae
]:
cpu_offload
(
cpu_offloaded_model
,
device
)
cpu_offload
(
cpu_offloaded_model
,
device
)
# Copied from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl.StableDiffusionXLPipeline.enable_model_cpu_offload
if
self
.
text_encoder
is
not
None
:
cpu_offload
(
self
.
text_encoder
,
device
)
def
enable_model_cpu_offload
(
self
,
gpu_id
=
0
):
def
enable_model_cpu_offload
(
self
,
gpu_id
=
0
):
r
"""
r
"""
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
Offloads all models to CPU using accelerate, reducing memory usage with a low impact on performance. Compared
...
...
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl.py
View file @
98c9aac1
...
@@ -13,7 +13,6 @@
...
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
gc
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -22,12 +21,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
...
@@ -22,12 +21,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
from
diffusers
import
(
from
diffusers
import
(
AutoencoderKL
,
AutoencoderKL
,
DiffusionPipeline
,
EulerDiscreteScheduler
,
EulerDiscreteScheduler
,
StableDiffusionXLPipeline
,
StableDiffusionXLPipeline
,
UNet2DConditionModel
,
UNet2DConditionModel
,
)
)
from
diffusers.utils
import
slow
,
torch_device
from
diffusers.utils
import
torch_device
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
..pipeline_params
import
TEXT_TO_IMAGE_BATCH_PARAMS
,
TEXT_TO_IMAGE_IMAGE_PARAMS
,
TEXT_TO_IMAGE_PARAMS
from
..pipeline_params
import
TEXT_TO_IMAGE_BATCH_PARAMS
,
TEXT_TO_IMAGE_IMAGE_PARAMS
,
TEXT_TO_IMAGE_PARAMS
...
@@ -190,38 +188,31 @@ class StableDiffusionXLPipelineFastTests(PipelineLatentTesterMixin, PipelineTest
...
@@ -190,38 +188,31 @@ class StableDiffusionXLPipelineFastTests(PipelineLatentTesterMixin, PipelineTest
def
test_inference_batch_single_identical
(
self
):
def
test_inference_batch_single_identical
(
self
):
super
().
test_inference_batch_single_identical
(
expected_max_diff
=
3e-3
)
super
().
test_inference_batch_single_identical
(
expected_max_diff
=
3e-3
)
@
require_torch_gpu
def
test_stable_diffusion_xl_offloads
(
self
):
pipes
=
[]
components
=
self
.
get_dummy_components
()
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
).
to
(
torch_device
)
pipes
.
append
(
sd_pipe
)
@
slow
components
=
self
.
get_dummy_components
()
@
require_torch_gpu
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
)
class
StableDiffusionXLPipelineSlowTests
(
unittest
.
TestCase
):
sd_pipe
.
enable_model_cpu_offload
()
def
tearDown
(
self
):
pipes
.
append
(
sd_pipe
)
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
get_inputs
(
self
,
device
,
generator_device
=
"cpu"
,
dtype
=
torch
.
float32
,
seed
=
0
):
components
=
self
.
get_dummy_components
()
generator
=
torch
.
Generator
(
device
=
generator_device
).
manual_seed
(
seed
)
sd_pipe
=
StableDiffusionXLPipeline
(
**
components
)
latents
=
np
.
random
.
RandomState
(
seed
).
standard_normal
((
1
,
4
,
64
,
64
))
sd_pipe
.
enable_sequential_cpu_offload
()
latents
=
torch
.
from_numpy
(
latents
).
to
(
device
=
device
,
dtype
=
dtype
)
pipes
.
append
(
sd_pipe
)
inputs
=
{
"prompt"
:
"a photograph of an astronaut riding a horse"
,
image_slices
=
[]
"latents"
:
latents
,
for
pipe
in
pipes
:
"generator"
:
generator
,
pipe
.
unet
.
set_default_attn_processor
()
"num_inference_steps"
:
3
,
"guidance_scale"
:
7.5
,
"output_type"
:
"numpy"
,
}
return
inputs
def
test_stable_diffusion_default_euler
(
self
):
inputs
=
self
.
get_dummy_inputs
(
torch_device
)
pipe
=
DiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2-base"
)
image
=
pipe
(
**
inputs
).
images
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
inputs
=
self
.
get_inputs
(
torch_device
)
image_slices
.
append
(
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
())
image
=
pipe
(
**
inputs
).
images
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
()
assert
image
.
shape
==
(
1
,
512
,
512
,
3
)
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
1
]).
max
()
<
1e-3
expected_slice
=
np
.
array
([
0.49493
,
0.47896
,
0.40798
,
0.54214
,
0.53212
,
0.48202
,
0.47656
,
0.46329
,
0.48506
])
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
2
]).
max
()
<
1e-3
assert
np
.
abs
(
image_slice
-
expected_slice
).
max
()
<
7e-3
tests/pipelines/stable_diffusion_xl/test_stable_diffusion_xl_img2img.py
View file @
98c9aac1
...
@@ -13,7 +13,6 @@
...
@@ -13,7 +13,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
gc
import
random
import
random
import
unittest
import
unittest
...
@@ -23,12 +22,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
...
@@ -23,12 +22,11 @@ from transformers import CLIPTextConfig, CLIPTextModel, CLIPTextModelWithProject
from
diffusers
import
(
from
diffusers
import
(
AutoencoderKL
,
AutoencoderKL
,
DiffusionPipeline
,
EulerDiscreteScheduler
,
EulerDiscreteScheduler
,
StableDiffusionXLImg2ImgPipeline
,
StableDiffusionXLImg2ImgPipeline
,
UNet2DConditionModel
,
UNet2DConditionModel
,
)
)
from
diffusers.utils
import
floats_tensor
,
slow
,
torch_device
from
diffusers.utils
import
floats_tensor
,
torch_device
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
enable_full_determinism
,
require_torch_gpu
from
..pipeline_params
import
(
from
..pipeline_params
import
(
...
@@ -205,38 +203,31 @@ class StableDiffusionXLImg2ImgPipelineFastTests(PipelineLatentTesterMixin, Pipel
...
@@ -205,38 +203,31 @@ class StableDiffusionXLImg2ImgPipelineFastTests(PipelineLatentTesterMixin, Pipel
# make sure that it's equal
# make sure that it's equal
assert
np
.
abs
(
image_slice_1
.
flatten
()
-
image_slice_2
.
flatten
()).
max
()
<
1e-4
assert
np
.
abs
(
image_slice_1
.
flatten
()
-
image_slice_2
.
flatten
()).
max
()
<
1e-4
@
require_torch_gpu
def
test_stable_diffusion_xl_offloads
(
self
):
pipes
=
[]
components
=
self
.
get_dummy_components
()
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
).
to
(
torch_device
)
pipes
.
append
(
sd_pipe
)
@
slow
components
=
self
.
get_dummy_components
()
@
require_torch_gpu
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
)
class
StableDiffusionXLImg2ImgPipelineSlowTests
(
unittest
.
TestCase
):
sd_pipe
.
enable_model_cpu_offload
()
def
tearDown
(
self
):
pipes
.
append
(
sd_pipe
)
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
get_inputs
(
self
,
device
,
generator_device
=
"cpu"
,
dtype
=
torch
.
float32
,
seed
=
0
):
components
=
self
.
get_dummy_components
()
generator
=
torch
.
Generator
(
device
=
generator_device
).
manual_seed
(
seed
)
sd_pipe
=
StableDiffusionXLImg2ImgPipeline
(
**
components
)
latents
=
np
.
random
.
RandomState
(
seed
).
standard_normal
((
1
,
4
,
64
,
64
))
sd_pipe
.
enable_sequential_cpu_offload
()
latents
=
torch
.
from_numpy
(
latents
).
to
(
device
=
device
,
dtype
=
dtype
)
pipes
.
append
(
sd_pipe
)
inputs
=
{
"prompt"
:
"a photograph of an astronaut riding a horse"
,
image_slices
=
[]
"latents"
:
latents
,
for
pipe
in
pipes
:
"generator"
:
generator
,
pipe
.
unet
.
set_default_attn_processor
()
"num_inference_steps"
:
3
,
"guidance_scale"
:
7.5
,
"output_type"
:
"numpy"
,
}
return
inputs
def
test_stable_diffusion_default_euler
(
self
):
inputs
=
self
.
get_dummy_inputs
(
torch_device
)
pipe
=
DiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2-base"
)
image
=
pipe
(
**
inputs
).
images
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
inputs
=
self
.
get_inputs
(
torch_device
)
image_slices
.
append
(
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
())
image
=
pipe
(
**
inputs
).
images
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
].
flatten
()
assert
image
.
shape
==
(
1
,
512
,
512
,
3
)
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
1
]).
max
()
<
1e-3
expected_slice
=
np
.
array
([
0.49493
,
0.47896
,
0.40798
,
0.54214
,
0.53212
,
0.48202
,
0.47656
,
0.46329
,
0.48506
])
assert
np
.
abs
(
image_slices
[
0
]
-
image_slices
[
2
]).
max
()
<
1e-3
assert
np
.
abs
(
image_slice
-
expected_slice
).
max
()
<
7e-3
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment