Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
76845183
Commit
76845183
authored
Nov 25, 2022
by
Patrick von Platen
Browse files
Merge branch 'main' of
https://github.com/huggingface/diffusers
into main
parents
520bb082
9ec5084a
Changes
10
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1391 additions
and
31 deletions
+1391
-31
docs/source/api/pipelines/stable_diffusion.mdx
docs/source/api/pipelines/stable_diffusion.mdx
+7
-0
src/diffusers/__init__.py
src/diffusers/__init__.py
+1
-0
src/diffusers/pipeline_utils.py
src/diffusers/pipeline_utils.py
+5
-3
src/diffusers/pipelines/__init__.py
src/diffusers/pipelines/__init__.py
+1
-0
src/diffusers/pipelines/stable_diffusion/__init__.py
src/diffusers/pipelines/stable_diffusion/__init__.py
+1
-0
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
...nes/stable_diffusion/pipeline_stable_diffusion_upscale.py
+551
-0
src/diffusers/utils/dummy_torch_and_transformers_objects.py
src/diffusers/utils/dummy_torch_and_transformers_objects.py
+15
-0
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
+21
-28
tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
...lines/stable_diffusion_2/test_stable_diffusion_upscale.py
+315
-0
tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
...elines/stable_diffusion_2/test_stable_diffusion_v_pred.py
+474
-0
No files found.
docs/source/api/pipelines/stable_diffusion.mdx
View file @
76845183
...
@@ -95,3 +95,10 @@ If you want to use all possible use cases in a single `DiffusionPipeline` you ca
...
@@ -95,3 +95,10 @@ If you want to use all possible use cases in a single `DiffusionPipeline` you ca
- __call__
- __call__
- enable_attention_slicing
- enable_attention_slicing
- disable_attention_slicing
- disable_attention_slicing
## StableDiffusionUpscalePipeline
[[autodoc]] StableDiffusionUpscalePipeline
- __call__
- enable_attention_slicing
- disable_attention_slicing
src/diffusers/__init__.py
View file @
76845183
...
@@ -75,6 +75,7 @@ if is_torch_available() and is_transformers_available():
...
@@ -75,6 +75,7 @@ if is_torch_available() and is_transformers_available():
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionPipeline
,
StableDiffusionPipeline
,
StableDiffusionPipelineSafe
,
StableDiffusionPipelineSafe
,
StableDiffusionUpscalePipeline
,
VersatileDiffusionDualGuidedPipeline
,
VersatileDiffusionDualGuidedPipeline
,
VersatileDiffusionImageVariationPipeline
,
VersatileDiffusionImageVariationPipeline
,
VersatileDiffusionPipeline
,
VersatileDiffusionPipeline
,
...
...
src/diffusers/pipeline_utils.py
View file @
76845183
...
@@ -554,7 +554,9 @@ class DiffusionPipeline(ConfigMixin):
...
@@ -554,7 +554,9 @@ class DiffusionPipeline(ConfigMixin):
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
init_dict
=
{
k
:
v
for
k
,
v
in
init_dict
.
items
()
if
load_module
(
k
,
v
)}
if
len
(
unused_kwargs
)
>
0
:
if
len
(
unused_kwargs
)
>
0
:
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
not recognized."
)
logger
.
warning
(
f
"Keyword arguments
{
unused_kwargs
}
are not expected by
{
pipeline_class
.
__name__
}
and will be ignored."
)
# import it here to avoid circular import
# import it here to avoid circular import
from
diffusers
import
pipelines
from
diffusers
import
pipelines
...
@@ -680,8 +682,8 @@ class DiffusionPipeline(ConfigMixin):
...
@@ -680,8 +682,8 @@ class DiffusionPipeline(ConfigMixin):
@
staticmethod
@
staticmethod
def
_get_signature_keys
(
obj
):
def
_get_signature_keys
(
obj
):
parameters
=
inspect
.
signature
(
obj
.
__init__
).
parameters
parameters
=
inspect
.
signature
(
obj
.
__init__
).
parameters
required_parameters
=
{
k
:
v
for
k
,
v
in
parameters
.
items
()
if
v
.
default
is
not
True
}
required_parameters
=
{
k
:
v
for
k
,
v
in
parameters
.
items
()
if
v
.
default
==
inspect
.
_empty
}
optional_parameters
=
set
({
k
for
k
,
v
in
parameters
.
items
()
if
v
.
default
is
True
})
optional_parameters
=
set
({
k
for
k
,
v
in
parameters
.
items
()
if
v
.
default
!=
inspect
.
_empty
})
expected_modules
=
set
(
required_parameters
.
keys
())
-
set
([
"self"
])
expected_modules
=
set
(
required_parameters
.
keys
())
-
set
([
"self"
])
return
expected_modules
,
optional_parameters
return
expected_modules
,
optional_parameters
...
...
src/diffusers/pipelines/__init__.py
View file @
76845183
...
@@ -24,6 +24,7 @@ if is_torch_available() and is_transformers_available():
...
@@ -24,6 +24,7 @@ if is_torch_available() and is_transformers_available():
StableDiffusionInpaintPipeline
,
StableDiffusionInpaintPipeline
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionInpaintPipelineLegacy
,
StableDiffusionPipeline
,
StableDiffusionPipeline
,
StableDiffusionUpscalePipeline
,
)
)
from
.stable_diffusion_safe
import
StableDiffusionPipelineSafe
from
.stable_diffusion_safe
import
StableDiffusionPipelineSafe
from
.versatile_diffusion
import
(
from
.versatile_diffusion
import
(
...
...
src/diffusers/pipelines/stable_diffusion/__init__.py
View file @
76845183
...
@@ -40,6 +40,7 @@ if is_transformers_available() and is_torch_available():
...
@@ -40,6 +40,7 @@ if is_transformers_available() and is_torch_available():
from
.pipeline_stable_diffusion_img2img
import
StableDiffusionImg2ImgPipeline
from
.pipeline_stable_diffusion_img2img
import
StableDiffusionImg2ImgPipeline
from
.pipeline_stable_diffusion_inpaint
import
StableDiffusionInpaintPipeline
from
.pipeline_stable_diffusion_inpaint
import
StableDiffusionInpaintPipeline
from
.pipeline_stable_diffusion_inpaint_legacy
import
StableDiffusionInpaintPipelineLegacy
from
.pipeline_stable_diffusion_inpaint_legacy
import
StableDiffusionInpaintPipelineLegacy
from
.pipeline_stable_diffusion_upscale
import
StableDiffusionUpscalePipeline
from
.safety_checker
import
StableDiffusionSafetyChecker
from
.safety_checker
import
StableDiffusionSafetyChecker
if
is_transformers_available
()
and
is_torch_available
()
and
is_transformers_version
(
">="
,
"4.25.0.dev0"
):
if
is_transformers_available
()
and
is_torch_available
()
and
is_transformers_version
(
">="
,
"4.25.0.dev0"
):
...
...
src/diffusers/pipelines/stable_diffusion/pipeline_stable_diffusion_upscale.py
0 → 100644
View file @
76845183
This diff is collapsed.
Click to expand it.
src/diffusers/utils/dummy_torch_and_transformers_objects.py
View file @
76845183
...
@@ -154,6 +154,21 @@ class StableDiffusionPipelineSafe(metaclass=DummyObject):
...
@@ -154,6 +154,21 @@ class StableDiffusionPipelineSafe(metaclass=DummyObject):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
class
StableDiffusionUpscalePipeline
(
metaclass
=
DummyObject
):
_backends
=
[
"torch"
,
"transformers"
]
def
__init__
(
self
,
*
args
,
**
kwargs
):
requires_backends
(
self
,
[
"torch"
,
"transformers"
])
@
classmethod
def
from_config
(
cls
,
*
args
,
**
kwargs
):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
@
classmethod
def
from_pretrained
(
cls
,
*
args
,
**
kwargs
):
requires_backends
(
cls
,
[
"torch"
,
"transformers"
])
class
VersatileDiffusionDualGuidedPipeline
(
metaclass
=
DummyObject
):
class
VersatileDiffusionDualGuidedPipeline
(
metaclass
=
DummyObject
):
_backends
=
[
"torch"
,
"transformers"
]
_backends
=
[
"torch"
,
"transformers"
]
...
...
tests/pipelines/stable_diffusion_2/test_stable_diffusion.py
View file @
76845183
...
@@ -34,7 +34,7 @@ from diffusers import (
...
@@ -34,7 +34,7 @@ from diffusers import (
)
)
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
CaptureLogger
,
require_torch_gpu
from
diffusers.utils.testing_utils
import
CaptureLogger
,
require_torch_gpu
from
transformers
import
CLIPFeatureExtractor
,
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
from
...test_pipelines_common
import
PipelineTesterMixin
...
@@ -100,21 +100,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -100,21 +100,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
)
)
return
CLIPTextModel
(
config
)
return
CLIPTextModel
(
config
)
@
property
def
dummy_extractor
(
self
):
def
extract
(
*
args
,
**
kwargs
):
class
Out
:
def
__init__
(
self
):
self
.
pixel_values
=
torch
.
ones
([
0
])
def
to
(
self
,
device
):
self
.
pixel_values
.
to
(
device
)
return
self
return
Out
()
return
extract
def
test_save_pretrained_from_pretrained
(
self
):
def
test_save_pretrained_from_pretrained
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
unet
=
self
.
dummy_cond_unet
...
@@ -129,7 +114,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -129,7 +114,6 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
vae
=
self
.
dummy_vae
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
feature_extractor
=
CLIPFeatureExtractor
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
sd_pipe
=
StableDiffusionPipeline
(
...
@@ -139,7 +123,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -139,7 +123,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
feature_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -185,7 +170,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -185,7 +170,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -231,7 +217,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -231,7 +217,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -276,7 +263,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -276,7 +263,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -321,7 +309,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -321,7 +309,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -366,7 +355,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -366,7 +355,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -411,7 +401,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -411,7 +401,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -449,7 +440,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -449,7 +440,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -475,7 +467,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
...
@@ -475,7 +467,8 @@ class StableDiffusion2PipelineFastTests(PipelineTesterMixin, unittest.TestCase):
text_encoder
=
bert
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
safety_checker
=
None
,
feature_extractor
=
self
.
dummy_extractor
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
...
@@ -572,7 +565,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
...
@@ -572,7 +565,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
expected_slice
=
np
.
array
([
0.0548
,
0.0626
,
0.0612
,
0.0611
,
0.0706
,
0.0586
,
0.0843
,
0.0333
,
0.1197
])
expected_slice
=
np
.
array
([
0.0548
,
0.0626
,
0.0612
,
0.0611
,
0.0706
,
0.0586
,
0.0843
,
0.0333
,
0.1197
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_
memory_chunk
ing
(
self
):
def
test_stable_diffusion_
attention_slic
ing
(
self
):
torch
.
cuda
.
reset_peak_memory_stats
()
torch
.
cuda
.
reset_peak_memory_stats
()
model_id
=
"stabilityai/stable-diffusion-2-base"
model_id
=
"stabilityai/stable-diffusion-2-base"
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
...
@@ -651,7 +644,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
...
@@ -651,7 +644,7 @@ class StableDiffusion2PipelineIntegrationTests(unittest.TestCase):
prompt
=
"astronaut riding a horse"
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
strength
=
0.75
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
image
.
shape
==
(
512
,
512
,
3
)
...
...
tests/pipelines/stable_diffusion_2/test_stable_diffusion_upscale.py
0 → 100644
View file @
76845183
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
gc
import
random
import
unittest
import
numpy
as
np
import
torch
from
diffusers
import
AutoencoderKL
,
DDIMScheduler
,
DDPMScheduler
,
StableDiffusionUpscalePipeline
,
UNet2DConditionModel
from
diffusers.utils
import
floats_tensor
,
load_image
,
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
require_torch_gpu
from
PIL
import
Image
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
class
StableDiffusionUpscalePipelineFastTests
(
PipelineTesterMixin
,
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
property
def
dummy_image
(
self
):
batch_size
=
1
num_channels
=
3
sizes
=
(
32
,
32
)
image
=
floats_tensor
((
batch_size
,
num_channels
)
+
sizes
,
rng
=
random
.
Random
(
0
)).
to
(
torch_device
)
return
image
@
property
def
dummy_cond_unet_upscale
(
self
):
torch
.
manual_seed
(
0
)
model
=
UNet2DConditionModel
(
block_out_channels
=
(
32
,
32
,
64
),
layers_per_block
=
2
,
sample_size
=
32
,
in_channels
=
7
,
out_channels
=
4
,
down_block_types
=
(
"DownBlock2D"
,
"CrossAttnDownBlock2D"
,
"CrossAttnDownBlock2D"
),
up_block_types
=
(
"CrossAttnUpBlock2D"
,
"CrossAttnUpBlock2D"
,
"UpBlock2D"
),
cross_attention_dim
=
32
,
# SD2-specific config below
attention_head_dim
=
8
,
use_linear_projection
=
True
,
only_cross_attention
=
(
True
,
True
,
False
),
num_class_embeds
=
100
,
)
return
model
@
property
def
dummy_vae
(
self
):
torch
.
manual_seed
(
0
)
model
=
AutoencoderKL
(
block_out_channels
=
[
32
,
32
,
64
],
in_channels
=
3
,
out_channels
=
3
,
down_block_types
=
[
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
],
up_block_types
=
[
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
],
latent_channels
=
4
,
)
return
model
@
property
def
dummy_text_encoder
(
self
):
torch
.
manual_seed
(
0
)
config
=
CLIPTextConfig
(
bos_token_id
=
0
,
eos_token_id
=
2
,
hidden_size
=
32
,
intermediate_size
=
37
,
layer_norm_eps
=
1e-05
,
num_attention_heads
=
4
,
num_hidden_layers
=
5
,
pad_token_id
=
1
,
vocab_size
=
1000
,
# SD2-specific config below
hidden_act
=
"gelu"
,
projection_dim
=
512
,
)
return
CLIPTextModel
(
config
)
def
test_stable_diffusion_upscale
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet_upscale
low_res_scheduler
=
DDPMScheduler
()
scheduler
=
DDIMScheduler
(
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
low_res_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionUpscalePipeline
(
unet
=
unet
,
low_res_scheduler
=
low_res_scheduler
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
max_noise_level
=
350
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
guidance_scale
=
6.0
,
noise_level
=
20
,
num_inference_steps
=
2
,
output_type
=
"np"
,
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
guidance_scale
=
6.0
,
noise_level
=
20
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_height_width
=
low_res_image
.
size
[
0
]
*
4
assert
image
.
shape
==
(
1
,
expected_height_width
,
expected_height_width
,
3
)
expected_slice
=
np
.
array
([
0.2562
,
0.3606
,
0.4204
,
0.4469
,
0.4822
,
0.4647
,
0.5315
,
0.5748
,
0.5606
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
@
unittest
.
skipIf
(
torch_device
!=
"cuda"
,
"This test requires a GPU"
)
def
test_stable_diffusion_upscale_fp16
(
self
):
"""Test that stable diffusion upscale works with fp16"""
unet
=
self
.
dummy_cond_unet_upscale
low_res_scheduler
=
DDPMScheduler
()
scheduler
=
DDIMScheduler
(
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
low_res_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# put models in fp16, except vae as it overflows in fp16
unet
=
unet
.
half
()
text_encoder
=
text_encoder
.
half
()
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionUpscalePipeline
(
unet
=
unet
,
low_res_scheduler
=
low_res_scheduler
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
max_noise_level
=
350
,
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
(
[
prompt
],
image
=
low_res_image
,
generator
=
generator
,
num_inference_steps
=
2
,
output_type
=
"np"
,
).
images
expected_height_width
=
low_res_image
.
size
[
0
]
*
4
assert
image
.
shape
==
(
1
,
expected_height_width
,
expected_height_width
,
3
)
@
slow
@
require_torch_gpu
class
StableDiffusionUpscalePipelineIntegrationTests
(
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
test_stable_diffusion_upscale_pipeline
(
self
):
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale"
"/upsampled_cat.npy"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
1e-3
def
test_stable_diffusion_upscale_pipeline_fp16
(
self
):
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-upscale"
"/upsampled_cat_fp16.npy"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-1
def
test_stable_diffusion_pipeline_with_sequential_cpu_offloading
(
self
):
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
reset_max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-upscale/low_res_cat.png"
)
model_id
=
"stabilityai/stable-diffusion-x4-upscaler"
pipe
=
StableDiffusionUpscalePipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
(
1
)
pipe
.
enable_sequential_cpu_offload
()
prompt
=
"a cat sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
_
=
pipe
(
prompt
=
prompt
,
image
=
image
,
generator
=
generator
,
num_inference_steps
=
5
,
output_type
=
"np"
,
)
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
# make sure that less than 2.65 GB is allocated
assert
mem_bytes
<
2.65
*
10
**
9
tests/pipelines/stable_diffusion_2/test_stable_diffusion_v_pred.py
0 → 100644
View file @
76845183
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
gc
import
time
import
unittest
import
numpy
as
np
import
torch
from
diffusers
import
(
AutoencoderKL
,
DDIMScheduler
,
DPMSolverMultistepScheduler
,
EulerDiscreteScheduler
,
StableDiffusionPipeline
,
UNet2DConditionModel
,
)
from
diffusers.utils
import
load_numpy
,
slow
,
torch_device
from
diffusers.utils.testing_utils
import
require_torch_gpu
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
class
StableDiffusion2VPredictionPipelineFastTests
(
PipelineTesterMixin
,
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
property
def
dummy_cond_unet
(
self
):
torch
.
manual_seed
(
0
)
model
=
UNet2DConditionModel
(
block_out_channels
=
(
32
,
64
),
layers_per_block
=
2
,
sample_size
=
32
,
in_channels
=
4
,
out_channels
=
4
,
down_block_types
=
(
"DownBlock2D"
,
"CrossAttnDownBlock2D"
),
up_block_types
=
(
"CrossAttnUpBlock2D"
,
"UpBlock2D"
),
cross_attention_dim
=
32
,
# SD2-specific config below
attention_head_dim
=
(
2
,
4
,
8
,
8
),
use_linear_projection
=
True
,
)
return
model
@
property
def
dummy_vae
(
self
):
torch
.
manual_seed
(
0
)
model
=
AutoencoderKL
(
block_out_channels
=
[
32
,
64
],
in_channels
=
3
,
out_channels
=
3
,
down_block_types
=
[
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
],
up_block_types
=
[
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
],
latent_channels
=
4
,
sample_size
=
128
,
)
return
model
@
property
def
dummy_text_encoder
(
self
):
torch
.
manual_seed
(
0
)
config
=
CLIPTextConfig
(
bos_token_id
=
0
,
eos_token_id
=
2
,
hidden_size
=
32
,
intermediate_size
=
37
,
layer_norm_eps
=
1e-05
,
num_attention_heads
=
4
,
num_hidden_layers
=
5
,
pad_token_id
=
1
,
vocab_size
=
1000
,
# SD2-specific config below
hidden_act
=
"gelu"
,
projection_dim
=
64
,
)
return
CLIPTextModel
(
config
)
def
test_stable_diffusion_v_pred_ddim
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
scheduler
=
DDIMScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
clip_sample
=
False
,
set_alpha_to_one
=
False
,
prediction_type
=
"v_prediction"
,
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.6424
,
0.6109
,
0.494
,
0.5088
,
0.4984
,
0.4525
,
0.5059
,
0.5068
,
0.4474
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_k_euler
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet
scheduler
=
EulerDiscreteScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
prediction_type
=
"v_prediction"
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.4616
,
0.5184
,
0.4887
,
0.5111
,
0.4839
,
0.48
,
0.5119
,
0.5263
,
0.4776
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
@
unittest
.
skipIf
(
torch_device
!=
"cuda"
,
"This test requires a GPU"
)
def
test_stable_diffusion_v_pred_fp16
(
self
):
"""Test that stable diffusion v-prediction works with fp16"""
unet
=
self
.
dummy_cond_unet
scheduler
=
DDIMScheduler
(
beta_start
=
0.00085
,
beta_end
=
0.012
,
beta_schedule
=
"scaled_linear"
,
clip_sample
=
False
,
set_alpha_to_one
=
False
,
prediction_type
=
"v_prediction"
,
)
vae
=
self
.
dummy_vae
bert
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
# put models in fp16
unet
=
unet
.
half
()
vae
=
vae
.
half
()
bert
=
bert
.
half
()
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
bert
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
requires_safety_checker
=
False
,
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
([
prompt
],
generator
=
generator
,
num_inference_steps
=
2
,
output_type
=
"np"
).
images
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
@
slow
@
require_torch_gpu
class
StableDiffusion2VPredictionPipelineIntegrationTests
(
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
test_stable_diffusion_v_pred_default
(
self
):
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
20
,
output_type
=
"np"
)
image
=
output
.
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0567
,
0.057
,
0.0416
,
0.0463
,
0.0433
,
0.06
,
0.0517
,
0.0526
,
0.0866
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_euler
(
self
):
scheduler
=
EulerDiscreteScheduler
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
subfolder
=
"scheduler"
)
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
scheduler
=
scheduler
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
sd_pipe
([
prompt
],
generator
=
generator
,
num_inference_steps
=
5
,
output_type
=
"numpy"
)
image
=
output
.
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0351
,
0.0376
,
0.0505
,
0.0424
,
0.0551
,
0.0656
,
0.0471
,
0.0276
,
0.0596
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_v_pred_dpm
(
self
):
"""
TODO: update this test after making DPM compatible with V-prediction!
"""
scheduler
=
DPMSolverMultistepScheduler
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
subfolder
=
"scheduler"
)
sd_pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
scheduler
=
scheduler
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
enable_attention_slicing
()
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"a photograph of an astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
5
,
output_type
=
"numpy"
).
images
image_slice
=
image
[
0
,
253
:
256
,
253
:
256
,
-
1
]
assert
image
.
shape
==
(
1
,
768
,
768
,
3
)
expected_slice
=
np
.
array
([
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
,
0.0
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
def
test_stable_diffusion_attention_slicing_v_pred
(
self
):
torch
.
cuda
.
reset_peak_memory_stats
()
model_id
=
"stabilityai/stable-diffusion-2"
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"a photograph of an astronaut riding a horse"
# make attention efficient
pipe
.
enable_attention_slicing
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
output_chunked
=
pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
10
,
output_type
=
"numpy"
)
image_chunked
=
output_chunked
.
images
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
# make sure that less than 5.5 GB is allocated
assert
mem_bytes
<
5.5
*
10
**
9
# disable slicing
pipe
.
disable_attention_slicing
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
output
=
pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
7.5
,
num_inference_steps
=
10
,
output_type
=
"numpy"
)
image
=
output
.
images
# make sure that more than 5.5 GB is allocated
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
assert
mem_bytes
>
5.5
*
10
**
9
assert
np
.
abs
(
image_chunked
.
flatten
()
-
image
.
flatten
()).
max
()
<
1e-3
def
test_stable_diffusion_text2img_pipeline_v_pred_default
(
self
):
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/"
"sd2-text2img/astronaut_riding_a_horse_v_pred.npy"
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
)
pipe
.
to
(
torch_device
)
pipe
.
enable_attention_slicing
()
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
768
,
768
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-3
def
test_stable_diffusion_text2img_pipeline_v_pred_fp16
(
self
):
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/"
"sd2-text2img/astronaut_riding_a_horse_v_pred_fp16.npy"
)
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"astronaut riding a horse"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
guidance_scale
=
7.5
,
generator
=
generator
,
output_type
=
"np"
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
768
,
768
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-3
def
test_stable_diffusion_text2img_intermediate_state_v_pred
(
self
):
number_of_steps
=
0
def
test_callback_fn
(
step
:
int
,
timestep
:
int
,
latents
:
torch
.
FloatTensor
)
->
None
:
test_callback_fn
.
has_been_called
=
True
nonlocal
number_of_steps
number_of_steps
+=
1
if
step
==
0
:
latents
=
latents
.
detach
().
cpu
().
numpy
()
assert
latents
.
shape
==
(
1
,
4
,
96
,
96
)
latents_slice
=
latents
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_slice
=
np
.
array
(
[
-
0.2543
,
-
1.2755
,
0.4261
,
-
0.9555
,
-
1.173
,
-
0.5892
,
2.4159
,
0.1554
,
-
1.2098
]
)
assert
np
.
abs
(
latents_slice
.
flatten
()
-
expected_slice
).
max
()
<
5e-3
elif
step
==
19
:
latents
=
latents
.
detach
().
cpu
().
numpy
()
assert
latents
.
shape
==
(
1
,
4
,
96
,
96
)
latents_slice
=
latents
[
0
,
-
3
:,
-
3
:,
-
1
]
expected_slice
=
np
.
array
(
[
-
0.9572
,
-
0.967
,
-
0.6152
,
0.0894
,
-
0.699
,
-
0.2344
,
1.5465
,
-
0.0357
,
-
0.1141
]
)
assert
np
.
abs
(
latents_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
test_callback_fn
.
has_been_called
=
False
pipe
=
StableDiffusionPipeline
.
from_pretrained
(
"stabilityai/stable-diffusion-2"
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipe
=
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"Andromeda galaxy in a bottle"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
with
torch
.
autocast
(
torch_device
):
pipe
(
prompt
=
prompt
,
num_inference_steps
=
20
,
guidance_scale
=
7.5
,
generator
=
generator
,
callback
=
test_callback_fn
,
callback_steps
=
1
,
)
assert
test_callback_fn
.
has_been_called
assert
number_of_steps
==
20
def
test_stable_diffusion_low_cpu_mem_usage_v_pred
(
self
):
pipeline_id
=
"stabilityai/stable-diffusion-2"
start_time
=
time
.
time
()
pipeline_low_cpu_mem_usage
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipeline_low_cpu_mem_usage
.
to
(
torch_device
)
low_cpu_mem_usage_time
=
time
.
time
()
-
start_time
start_time
=
time
.
time
()
_
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
low_cpu_mem_usage
=
False
)
normal_load_time
=
time
.
time
()
-
start_time
assert
2
*
low_cpu_mem_usage_time
<
normal_load_time
def
test_stable_diffusion_pipeline_with_sequential_cpu_offloading_v_pred
(
self
):
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
reset_max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
pipeline_id
=
"stabilityai/stable-diffusion-2"
prompt
=
"Andromeda galaxy in a bottle"
pipeline
=
StableDiffusionPipeline
.
from_pretrained
(
pipeline_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
)
pipeline
=
pipeline
.
to
(
torch_device
)
pipeline
.
enable_attention_slicing
(
1
)
pipeline
.
enable_sequential_cpu_offload
()
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
_
=
pipeline
(
prompt
,
generator
=
generator
,
num_inference_steps
=
5
)
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
# make sure that less than 2.8 GB is allocated
assert
mem_bytes
<
2.8
*
10
**
9
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment