Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
diffusers
Commits
76845183
Commit
76845183
authored
Nov 25, 2022
by
Patrick von Platen
Browse files
Merge branch 'main' of
https://github.com/huggingface/diffusers
into main
parents
520bb082
9ec5084a
Changes
10
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1391 additions
and
31 deletions
+1391
-31
docs/source/api/pipelines/stable_diffusion.mdx
docs/source/api/pipelines/stable_diffusion.mdx
+7
-0
src/diffusers/__init__.py
src/diffusers/__init__.py
+1
-0
src/diffusers/pipeline_utils.py
src/diffusers/pipeline_utils.py
+5
-3
src/diffusers/pipelines/__init__.py
src/diffusers/pipelines/__init__.py
+1
-0
src/diffusers/pipelines/stable_diffusion/__init__.py
src/diffusers/pipelines/stable_diffusion/__init__.py
+1
-0
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
...nes/stable_diffusion/pipeline_stable_diffusion_upscale.py
+551
-0
src/diffusers/utils/dummy_torch_and_transformers_objects.py
src/diffusers/utils/dummy_torch_and_transformers_objects.py
+15
-0
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+21
-28
tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
...lines/stable_diffusion_2/test_stable_diffusion_upscale.py
+315
-0
tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
...elines/stable_diffusion_2/test_stable_diffusion_v_pred.py
+474
-0
No files found.
docs/source/api/pipelines/stable_diffusion.mdx
View file @
76845183
...
@@ -95,3 +95,10 @@ If you want to use all possible use cases in a single `DiffusionPipeline` you ca
...
@@ -95,3 +95,10 @@ If you want to use all possible use cases in a single `DiffusionPipeline` you ca
- __call__
- __call__
- enable_attention_slicing
- enable_attention_slicing
- disable_attention_slicing
- disable_attention_slicing
## StableDiffusionUpscalePipeline
[[autodoc]] StableDiffusionUpscalePipeline
- __call__
- enable_attention_slicing
- disable_attention_slicing
src/diffusers/__init__.py
View file @
76845183
...
@@ -75,6 +75,7 @@ if is_torch_available() and is_transformers_available():
...
@@ -75,6 +75,7 @@ if is_torch_available() and is_transformers_available():
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionPipeline
,
StableDiffusionPipeline
,
StableDiffusionPipelineSafe
,
StableDiffusionPipelineSafe
,
StableDiffusionUpscalePipeline
,
VersatileDiffusionDualGuidedPipeline
,
VersatileDiffusionDualGuidedPipeline
,
VersatileDiffusionImageVariationPipeline
,
VersatileDiffusionImageVariationPipeline
,
VersatileDiffusionPipeline
,
VersatileDiffusionPipeline
,
...
...
src/diffusers/pipeline_utils.py
View file @
76845183
...
@@ -554,7 +554,9 @@ class DiffusionPipeline(ConfigMixin):
...
@@ -554,7 +554,9 @@ class DiffusionPipeline(ConfigMixin):
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
if
len
(
unused_kwargs
)
>
0
:
if
len
(
unused_kwargs
)
>
0
:
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
not recognized."
)
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
are not expected by
{
pipeline_class
.
__name__
}
and will be ignored."
)
# import it here to avoid circular import
# import it here to avoid circular import
from
diffusers
import
pipelines
from
diffusers
import
pipelines
...
@@ -680,8 +682,8 @@ class DiffusionPipeline(ConfigMixin):
...
@@ -680,8 +682,8 @@ class DiffusionPipeline(ConfigMixin):
@
staticmethod
@
staticmethod
def
_get_signature_keys
(
obj
):
def
_get_signature_keys
(
obj
):
parameters
=
inspect
.
signature
(
obj
.
__init__
).
parameters
parameters
=
inspect
.
signature
(
obj
.
__init__
).
parameters
required_parameters
=
{
k
:
v
for
k
,
v
in
parameters
.
items
()
if
v
.
default
is
not
True
}
required_parameters
=
{
k
:
v
for
k
,
v
in
parameters
.
items
()
if
v
.
default
==
inspect
.
_empty
}
optional_parameters
=
set
({
k
for
k
,
v
in
parameters
.
items
()
if
v
.
default
is
True
})
optional_parameters
=
set
({
k
for
k
,
v
in
parameters
.
items
()
if
v
.
default
!=
inspect
.
_empty
})
expected_modules
=
set
(
required_parameters
.
keys
())
-
set
([
"self"
])
expected_modules
=
set
(
required_parameters
.
keys
())
-
set
([
"self"
])
return
expected_modules
,
optional_parameters
return
expected_modules
,
optional_parameters
...
...
src/diffusers/pipelines/__init__.py
View file @
76845183
...
@@ -24,6 +24,7 @@ if is_torch_available() and is_transformers_available():
...
@@ -24,6 +24,7 @@ if is_torch_available() and is_transformers_available():
StableDiffusionInpaintPipeline
,
StableDiffusionInpaintPipeline
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionPipeline
,
StableDiffusionPipeline
,
StableDiffusionUpscalePipeline
,
)
)
from
.stable_diffusion_safe
import
StableDiffusionPipelineSafe
from
.stable_diffusion_safe
import
StableDiffusionPipelineSafe
from
.versatile_diffusion
import
(
from
.versatile_diffusion
import
(
...
...
src/diffusers/pipelines/stable_diffusion/__init__.py
View file @
76845183
...
@@ -40,6 +40,7 @@ if is_transformers_available() and is_torch_available():
...
@@ -40,6 +40,7 @@ if is_transformers_available() and is_torch_available():
from
.pipeline_stable_diffusion_img2img
import
StableDiffusionImg2ImgPipeline
from
.pipeline_stable_diffusion_img2img
import
StableDiffusionImg2ImgPipeline
from
.pipeline_stable_diffusion_inpaint
import
StableDiffusionInpaintPipeline
from
.pipeline_stable_diffusion_inpaint
import
StableDiffusionInpaintPipeline
from
.pipeline_stable_diffusion_inpaint_legacy
import
StableDiffusionInpaintPipelineLegacy
from
.pipeline_stable_diffusion_inpaint_legacy
import
StableDiffusionInpaintPipelineLegacy
from
.pipeline_stable_diffusion_upscale
import
StableDiffusionUpscalePipeline
from
.safety_checker
import
StableDiffusionSafetyChecker
from
.safety_checker
import
StableDiffusionSafetyChecker
if
is_transformers_available
()
and
is_torch_available
()
and
is_transformers_version
(
">="
,
"4.25.0.dev0"
):
if
is_transformers_available
()
and
is_torch_available
()
and
is_transformers_version
(
">="
,
"4.25.0.dev0"
):
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
0 → 100644
View file @
76845183
This diff is collapsed.
Click to expand it.
src/diffusers/utils/dummy_torch_and_transformers_objects.py
View file @
76845183
...
@@ -154,6 +154,21 @@ class StableDiffusionPipelineSafe(metaclass=DummyObject):
...
@@ -154,6 +154,21 @@ class StableDiffusionPipelineSafe(metaclass=DummyObject):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
class
StableDiffusionUpscalePipeline
(
metaclass
=
DummyObject
):
_backends
=
[
"torch"
,
"transformers"
]
def
__init__
(
self
,
*
args
,
**
kwargs
):
requires_backends
(
self
,
[
"torch"
,
"transformers"
])
@
classmethod
def
from_config
(
cls
,
*
args
,
**
kwargs
):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
@
classmethod
def
from_pretrained
(
cls
,
*
args
,
**
kwargs
):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
class
VersatileDiffusionDualGuidedPipeline
(
metaclass
=
DummyObject
):
class
VersatileDiffusionDualGuidedPipeline
(
metaclass
=
DummyObject
):
_backends
=
[
"torch"
,
"transformers"
]
_backends
=
[
"torch"
,
"transformers"
]
...
...
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
View file @
76845183
...
@@ -34,7 +34,7 @@ from diffusers import (
...
@@ -34,7 +34,7 @@ from diffusers import (
)
)
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
CaptureLogger
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
CaptureLogger
,
require_torch_gpu
from
transformers
import
CLIPFeatureExtractor
,
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
from
...test_pipelines_common
import
PipelineTesterMixin
...
@@ -100,21 +100,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -100,21 +100,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)
)
return
CLIPTextModel
(
config
)
return
CLIPTextModel
(
config
)
@
property
def
dummy_extractor
(
self
):
def
extract
(
*
args
,
**
kwargs
):
class
Out
:
def
__init__
(
self
):
self
.
pixel_values
=
torch
.
ones
([
0
])
def
to
(
self
,
device
):
self
.
pixel_values
.
to
(
device
)
return
self
return
Out
()
return
extract
def
test_save_pretrained_from_pretrained
(
self
):
def
test_save_pretrained_from_pretrained
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
unet
=
self
.
dummy_cond_unet
...
@@ -129,7 +114,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -129,7 +114,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae
=
self
.
dummy_vae
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
feature_extractor
=
CLIPFeatureExtractor
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
sd_pipe
=
StableDiffusionPipeline
(
...
@@ -139,7 +123,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -139,7 +123,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
feature_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -185,7 +170,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -185,7 +170,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -231,7 +217,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -231,7 +217,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -276,7 +263,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -276,7 +263,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -321,7 +309,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -321,7 +309,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -366,7 +355,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -366,7 +355,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -411,7 +401,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -411,7 +401,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -449,7 +440,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -449,7 +440,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -475,7 +467,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -475,7 +467,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -572,7 +565,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
...
@@ -572,7 +565,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
expected_slice
=
np
.
array
([
0.0548
,
0.0626
,
0.0612
,
0.0611
,
0.0706
,
0.0586
,
0.0843
,
0.0333
,
0.1197
])
expected_slice
=
np
.
array
([
0.0548
,
0.0626
,
0.0612
,
0.0611
,
0.0706
,
0.0586
,
0.0843
,
0.0333
,
0.1197
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_
memory_chunk
ing
(
self
):
def
test_stable_diffusion_
attention_slic
ing
(
self
):
torch
.
cuda
.
reset_peak_memory_stats
()
torch
.
cuda
.
reset_peak_memory_stats
()
model_id
=
"stabilityai/stable-diffusion-2-base"
model_id
=
"stabilityai/stable-diffusion-2-base"
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
...
@@ -651,7 +644,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
...
@@ -651,7 +644,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
prompt
=
"astronaut riding a horse"
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
strength
=
0.75
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
image
.
shape
==
(
512
,
512
,
3
)
...
...
tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
0 → 100644
View file @
76845183
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
gc
import
random
import
unittest
import
numpy
as
np
import
torch
from
diffusers
import
AutoencoderKL
,
DDIMScheduler
,
DDPMScheduler
,
StableDiffusionUpscalePipeline
,
UNet2DConditionModel
from
diffusers.utils
import
floats_tensor
,
load_image
,
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
require_torch_gpu
from
PIL
import
Image
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
class
StableDiffusionUpscalePipelineFastTests
(
PipelineTesterMixin
,
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
property
def
dummy_image
(
self
):
batch_size
=
1
num_channels
=
3
sizes
=
(
32
,
32
)
image
=
floats_tensor
((
batch_size
,
num_channels
)
+
sizes
,
rng
=
random
.
Random
(
0
)).
to
(
torch_device
)
return
image
@
property
def
dummy_cond_unet_upscale
(
self
):
torch
.
manual_seed
(
0
)
model
=
UNet2DConditionModel
(
block_out_channels
=
(
32
,
32
,
64
),
layers_per_block
=
2
,
sample_size
=
32
,
in_channels
=
7
,
out_channels
=
4
,
down_block_types
=
(
"DownBlock2D"
,
"CrossAttnDownBlock2D"
,
"CrossAttnDownBlock2D"
),
up_block_types
=
(
"CrossAttnUpBlock2D"
,
"CrossAttnUpBlock2D"
,
"UpBlock2D"
),
cross_attention_dim
=
32
,
# SD2-specific config below
attention_head_dim
=
8
,
use_linear_projection
=
True
,
only_cross_attention
=
(
True
,
True
,
False
),
num_class_embeds
=
100
,
)
return
model
@
property
def
dummy_vae
(
self
):
torch
.
manual_seed
(
0
)
model
=
AutoencoderKL
(
block_out_channels
=
[
32
,
32
,
64
],
in_channels
=
3
,
out_channels
=
3
,
down_block_types
=
[
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
],
up_block_types
=
[
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
],
latent_channels
=
4
,
)
return
model
@
property
def
dummy_text_encoder
(
self
):
torch
.
manual_seed
(
0
)
config
=
CLIPTextConfig
(
bos_token_id
=
0
,
eos_token_id
=
2
,
hidden_size
=
32
,
intermediate_size
=
37
,
layer_norm_eps
=
1e-05
,
num_attention_heads
=
4
,
num_hidden_layers
=
5
,
pad_token_id
=
1
,
vocab_size
=
1000
,
# SD2-specific config below
hidden_act
=
"gelu"
,
projection_dim
=
512
,
)
return
CLIPTextModel
(
config
)
def
test_stable_diffusion_upscale
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet_upscale
low_res_scheduler
=
DDPMScheduler
()
scheduler
=
DDIMScheduler
(
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
low_res_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionUpscalePipeline
(
unet
=
unet
,
low_res_scheduler
=
low_res_scheduler
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
max_noise_level
=
350
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
guidance_scale
=
6.0
,
noise_level
=
20
,
num_inference_steps
=
2
,
output_type
=
"np"
,
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
guidance_scale
=
6.0
,
noise_level
=
20
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_height_width
=
low_res_image
.
size
[
0
]
*
4
assert
image
.
shape
==
(
1
,
expected_height_width
,
expected_height_width
,
3
)
expected_slice
=
np
.
array
([
0.2562
,
0.3606
,
0.4204
,
0.4469
,
0.4822
,
0.4647
,
0.5315
,
0.5748
,
0.5606
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
@
unittest
.
skipIf
(
torch_device
!=
"cuda"
,
"This test requires a GPU"
)
def
test_stable_diffusion_upscale_fp16
(
self
):
"""Test that stable diffusion upscale works with fp16"""
unet
=
self
.
dummy_cond_unet_upscale
low_res_scheduler
=
DDPMScheduler
()
scheduler
=
DDIMScheduler
(
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
low_res_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# put models in fp16, except vae as it overflows in fp16
unet
=
unet
.
half
()
text_encoder
=
text_encoder
.
half
()
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionUpscalePipeline
(
unet
=
unet
,
low_res_scheduler
=
low_res_scheduler
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
max_noise_level
=
350
,
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
num_inference_steps
=
2
,
output_type
=
"np"
,
).
images
expected_height_width
=
low_res_image
.
size
[
0
]
*
4
assert
image
.
shape
==
(
1
,
expected_height_width
,
expected_height_width
,
3
)
@
slow
@
require_torch_gpu
class
StableDiffusionUpscalePipelineIntegrationTests
(
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
test_stable_diffusion_upscale_pipeline
(
self
):
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale"
"/upsampled_cat.npy"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
1e-3
def
test_stable_diffusion_upscale_pipeline_fp16
(
self
):
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale"
"/upsampled_cat_fp16.npy"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-1
def
test_stable_diffusion_pipeline_with_sequential_cpu_offloading
(
self
):
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
reset_max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
(
1
)
pipe
.
enable_sequential_cpu_offload
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
_
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
num_inference_steps
=
5
,
output_type
=
"np"
,
)
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
# make sure that less than 2.65 GB is allocated
assert
mem_bytes
<
2.65
*
10
**
9
tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
0 → 100644
View file @
76845183
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
gc
import
time
import
unittest
import
numpy
as
np
import
torch
from
diffusers
import
(
AutoencoderKL
,
DDIMScheduler
,
DPMSolverMultistepScheduler
,
EulerDiscreteScheduler
,
StableDiffusionPipeline
,
UNet2DConditionModel
,
)
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
require_torch_gpu
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
class
StableDiffusion2VPredictionPipelineFastTests
(
PipelineTesterMixin
,
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
property
def
dummy_cond_unet
(
self
):
torch
.
manual_seed
(
0
)
model
=
UNet2DConditionModel
(
block_out_channels
=
(
32
,
64
),
layers_per_block
=
2
,
sample_size
=
32
,
in_channels
=
4
,
out_channels
=
4
,
down_block_types
=
(
"DownBlock2D"
,
"CrossAttnDownBlock2D"
),
up_block_types
=
(
"CrossAttnUpBlock2D"
,
"UpBlock2D"
),
cross_attention_dim
=
32
,
# SD2-specific config below
attention_head_dim
=
(
2
,
4
,
8
,
8
),
use_linear_projection
=
True
,
)
return
model
@
property
def
dummy_vae
(
self
):
torch
.
manual_seed
(
0
)
model
=
AutoencoderKL
(
block_out_channels
=
[
32
,
64
],
in_channels
=
3
,
out_channels
=
3
,
down_block_types
=
[
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
],
up_block_types
=
[
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
],
latent_channels
=
4
,
sample_size
=
128
,
)
return
model
@
property
def
dummy_text_encoder
(
self
):
torch
.
manual_seed
(
0
)
config
=
CLIPTextConfig
(
bos_token_id
=
0
,
eos_token_id
=
2
,
hidden_size
=
32
,
intermediate_size
=
37
,
layer_norm_eps
=
1e-05
,
num_attention_heads
=
4
,
num_hidden_layers
=
5
,
pad_token_id
=
1
,
vocab_size
=
1000
,
# SD2-specific config below
hidden_act
=
"gelu"
,
projection_dim
=
64
,
)
return
CLIPTextModel
(
config
)
def
test_stable_diffusion_v_pred_ddim
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
scheduler
=
DDIMScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
clip_sample
=
False
,
set_alpha_to_one
=
False
,
prediction_type
=
"v_prediction"
,
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.6424
,
0.6109
,
0.494
,
0.5088
,
0.4984
,
0.4525
,
0.5059
,
0.5068
,
0.4474
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_k_euler
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
scheduler
=
EulerDiscreteScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.4616
,
0.5184
,
0.4887
,
0.5111
,
0.4839
,
0.48
,
0.5119
,
0.5263
,
0.4776
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
@
unittest
.
skipIf
(
torch_device
!=
"cuda"
,
"This test requires a GPU"
)
def
test_stable_diffusion_v_pred_fp16
(
self
):
"""Test that stable diffusion v-prediction works with fp16"""
unet
=
self
.
dummy_cond_unet
scheduler
=
DDIMScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
clip_sample
=
False
,
set_alpha_to_one
=
False
,
prediction_type
=
"v_prediction"
,
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# put models in fp16
unet
=
unet
.
half
()
vae
=
vae
.
half
()
bert
=
bert
.
half
()
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
([
prompt
],
generator
=
generator
,
num_inference_steps
=
2
,
output_type
=
"np"
).
images
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
@
slow
@
require_torch_gpu
class
StableDiffusion2VPredictionPipelineIntegrationTests
(
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
test_stable_diffusion_v_pred_default
(
self
):
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
20
,
output_type
=
"np"
)
image
=
output
.
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0567
,
0.057
,
0.0416
,
0.0463
,
0.0433
,
0.06
,
0.0517
,
0.0526
,
0.0866
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_euler
(
self
):
scheduler
=
EulerDiscreteScheduler
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
subfolder
=
"scheduler"
)
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
scheduler
=
scheduler
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
num_inference_steps
=
5
,
output_type
=
"numpy"
)
image
=
output
.
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0351
,
0.0376
,
0.0505
,
0.0424
,
0.0551
,
0.0656
,
0.0471
,
0.0276
,
0.0596
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_dpm
(
self
):
"""
TODO: update this test after making DPM compatible with V-prediction!
"""
scheduler
=
DPMSolverMultistepScheduler
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
subfolder
=
"scheduler"
)
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
scheduler
=
scheduler
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"a photograph of an astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
5
,
output_type
=
"numpy"
).
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_attention_slicing_v_pred
(
self
):
torch
.
cuda
.
reset_peak_memory_stats
()
model_id
=
"stabilityai/stable-diffusion-2"
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"a photograph of an astronaut riding a horse"
# make attention efficient
pipe
.
enable_attention_slicing
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
output_chunked
=
pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
10
,
output_type
=
"numpy"
)
image_chunked
=
output_chunked
.
images
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
# make sure that less than 5.5 GB is allocated
assert
mem_bytes
<
5.5
*
10
**
9
# disable slicing
pipe
.
disable_attention_slicing
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
output
=
pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
10
,
output_type
=
"numpy"
)
image
=
output
.
images
# make sure that more than 5.5 GB is allocated
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
assert
mem_bytes
>
5.5
*
10
**
9
assert
np
.
abs
(
image_chunked
.
flatten
()
-
image
.
flatten
()).
max
()
<
1e-3
def
test_stable_diffusion_text2img_pipeline_v_pred_default
(
self
):
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/"
"sd2-text2img/astronaut_riding_a_horse_v_pred.npy"
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
)
pipe
.
to
(
torch_device
)
pipe
.
enable_attention_slicing
()
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
768
,
768
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-3
def
test_stable_diffusion_text2img_pipeline_v_pred_fp16
(
self
):
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/"
"sd2-text2img/astronaut_riding_a_horse_v_pred_fp16.npy"
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
768
,
768
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-3
def
test_stable_diffusion_text2img_intermediate_state_v_pred
(
self
):
number_of_steps
=
0
def
test_callback_fn
(
step
:
int
,
timestep
:
int
,
latents
:
torch
.
FloatTensor
)
->
None
:
test_callback_fn
.
has_been_called
=
True
nonlocal
number_of_steps
number_of_steps
+=
1
if
step
==
0
:
latents
=
latents
.
detach
().
cpu
().
numpy
()
assert
latents
.
shape
==
(
1
,
4
,
96
,
96
)
latents_slice
=
latents
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_slice
=
np
.
array
(
[
-
0.2543
,
-
1.2755
,
0.4261
,
-
0.9555
,
-
1.173
,
-
0.5892
,
2.4159
,
0.1554
,
-
1.2098
]
)
assert
np
.
abs
(
latents_slice
.
flatten
()
-
expected_slice
).
max
()
<
5e-3
elif
step
==
19
:
latents
=
latents
.
detach
().
cpu
().
numpy
()
assert
latents
.
shape
==
(
1
,
4
,
96
,
96
)
latents_slice
=
latents
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_slice
=
np
.
array
(
[
-
0.9572
,
-
0.967
,
-
0.6152
,
0.0894
,
-
0.699
,
-
0.2344
,
1.5465
,
-
0.0357
,
-
0.1141
]
)
assert
np
.
abs
(
latents_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
test_callback_fn
.
has_been_called
=
False
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
=
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"Andromeda galaxy in a bottle"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
pipe
(
prompt
=
prompt
,
num_inference_steps
=
20
,
guidance_scale
=
7.5
,
generator
=
generator
,
callback
=
test_callback_fn
,
callback_steps
=
1
,
)
assert
test_callback_fn
.
has_been_called
assert
number_of_steps
==
20
def
test_stable_diffusion_low_cpu_mem_usage_v_pred
(
self
):
pipeline_id
=
"stabilityai/stable-diffusion-2"
start_time
=
time
.
time
()
pipeline_low_cpu_mem_usage
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipeline_low_cpu_mem_usage
.
to
(
torch_device
)
low_cpu_mem_usage_time
=
time
.
time
()
-
start_time
start_time
=
time
.
time
()
_
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
low_cpu_mem_usage
=
False
)
normal_load_time
=
time
.
time
()
-
start_time
assert
2
*
low_cpu_mem_usage_time
<
normal_load_time
def
test_stable_diffusion_pipeline_with_sequential_cpu_offloading_v_pred
(
self
):
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
reset_max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
pipeline_id
=
"stabilityai/stable-diffusion-2"
prompt
=
"Andromeda galaxy in a bottle"
pipeline
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipeline
=
pipeline
.
to
(
torch_device
)
pipeline
.
enable_attention_slicing
(
1
)
pipeline
.
enable_sequential_cpu_offload
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
_
=
pipeline
(
prompt
,
generator
=
generator
,
num_inference_steps
=
5
)
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
# make sure that less than 2.8 GB is allocated
assert
mem_bytes
<
2.8
*
10
**
9
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment