Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
diffusers
Commits
5c10e68a
Unverified
Commit
5c10e68a
authored
Nov 25, 2022
by
Anton Lozhkov
Committed by
GitHub
Nov 25, 2022
Browse files
Add SD2 inpainting integration tests (#1412)
SD2 inpainting integration tests
parent
d50e3217
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
345 additions
and
0 deletions
+345
-0
tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py
...lines/stable_diffusion_2/test_stable_diffusion_inpaint.py
+345
-0
No files found.
tests/pipelines/stable_diffusion_2/test_stable_diffusion_inpaint.py
0 → 100644
View file @
5c10e68a
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
gc
import
random
import
unittest
import
numpy
as
np
import
torch
from
diffusers
import
AutoencoderKL
,
PNDMScheduler
,
StableDiffusionInpaintPipeline
,
UNet2DConditionModel
from
diffusers.utils
import
floats_tensor
,
load_image
,
load_numpy
,
torch_device
from
diffusers.utils.testing_utils
import
require_torch_gpu
from
PIL
import
Image
from
transformers
import
CLIPTextConfig
,
CLIPTextModel
,
CLIPTokenizer
from
...test_pipelines_common
import
PipelineTesterMixin
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
class
StableDiffusionInpaintPipelineFastTests
(
PipelineTesterMixin
,
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
@
property
def
dummy_image
(
self
):
batch_size
=
1
num_channels
=
3
sizes
=
(
32
,
32
)
image
=
floats_tensor
((
batch_size
,
num_channels
)
+
sizes
,
rng
=
random
.
Random
(
0
)).
to
(
torch_device
)
return
image
@
property
def
dummy_cond_unet_inpaint
(
self
):
torch
.
manual_seed
(
0
)
model
=
UNet2DConditionModel
(
block_out_channels
=
(
32
,
64
),
layers_per_block
=
2
,
sample_size
=
32
,
in_channels
=
9
,
out_channels
=
4
,
down_block_types
=
(
"DownBlock2D"
,
"CrossAttnDownBlock2D"
),
up_block_types
=
(
"CrossAttnUpBlock2D"
,
"UpBlock2D"
),
cross_attention_dim
=
32
,
# SD2-specific config below
attention_head_dim
=
(
2
,
4
,
8
,
8
),
use_linear_projection
=
True
,
)
return
model
@
property
def
dummy_vae
(
self
):
torch
.
manual_seed
(
0
)
model
=
AutoencoderKL
(
block_out_channels
=
[
32
,
64
],
in_channels
=
3
,
out_channels
=
3
,
down_block_types
=
[
"DownEncoderBlock2D"
,
"DownEncoderBlock2D"
],
up_block_types
=
[
"UpDecoderBlock2D"
,
"UpDecoderBlock2D"
],
latent_channels
=
4
,
)
return
model
@
property
def
dummy_text_encoder
(
self
):
torch
.
manual_seed
(
0
)
config
=
CLIPTextConfig
(
bos_token_id
=
0
,
eos_token_id
=
2
,
hidden_size
=
32
,
intermediate_size
=
37
,
layer_norm_eps
=
1e-05
,
num_attention_heads
=
4
,
num_hidden_layers
=
5
,
pad_token_id
=
1
,
vocab_size
=
1000
,
# SD2-specific config below
hidden_act
=
"gelu"
,
projection_dim
=
512
,
)
return
CLIPTextModel
(
config
)
@
property
def
dummy_extractor
(
self
):
def
extract
(
*
args
,
**
kwargs
):
class
Out
:
def
__init__
(
self
):
self
.
pixel_values
=
torch
.
ones
([
0
])
def
to
(
self
,
device
):
self
.
pixel_values
.
to
(
device
)
return
self
return
Out
()
return
extract
def
test_stable_diffusion_inpaint
(
self
):
device
=
"cpu"
# ensure determinism for the device-dependent torch.Generator
unet
=
self
.
dummy_cond_unet_inpaint
scheduler
=
PNDMScheduler
(
skip_prk_steps
=
True
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
init_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
mask_image
=
Image
.
fromarray
(
np
.
uint8
(
image
+
4
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionInpaintPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
)
sd_pipe
=
sd_pipe
.
to
(
device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
output
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
image
=
init_image
,
mask_image
=
mask_image
,
)
image
=
output
.
images
generator
=
torch
.
Generator
(
device
=
device
).
manual_seed
(
0
)
image_from_tuple
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
guidance_scale
=
6.0
,
num_inference_steps
=
2
,
output_type
=
"np"
,
image
=
init_image
,
mask_image
=
mask_image
,
return_dict
=
False
,
)[
0
]
image_slice
=
image
[
0
,
-
3
:,
-
3
:,
-
1
]
image_from_tuple_slice
=
image_from_tuple
[
0
,
-
3
:,
-
3
:,
-
1
]
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
expected_slice
=
np
.
array
([
0.4727
,
0.5735
,
0.3941
,
0.5446
,
0.5926
,
0.4394
,
0.5062
,
0.4654
,
0.4476
])
assert
np
.
abs
(
image_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
assert
np
.
abs
(
image_from_tuple_slice
.
flatten
()
-
expected_slice
).
max
()
<
1e-2
@
unittest
.
skipIf
(
torch_device
!=
"cuda"
,
"This test requires a GPU"
)
def
test_stable_diffusion_inpaint_fp16
(
self
):
"""Test that stable diffusion inpaint works with fp16"""
unet
=
self
.
dummy_cond_unet_inpaint
scheduler
=
PNDMScheduler
(
skip_prk_steps
=
True
)
vae
=
self
.
dummy_vae
text_encoder
=
self
.
dummy_text_encoder
tokenizer
=
CLIPTokenizer
.
from_pretrained
(
"hf-internal-testing/tiny-random-clip"
)
image
=
self
.
dummy_image
.
cpu
().
permute
(
0
,
2
,
3
,
1
)[
0
]
init_image
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
mask_image
=
Image
.
fromarray
(
np
.
uint8
(
image
+
4
)).
convert
(
"RGB"
).
resize
((
64
,
64
))
# put models in fp16
unet
=
unet
.
half
()
vae
=
vae
.
half
()
text_encoder
=
text_encoder
.
half
()
# make sure here that pndm scheduler skips prk
sd_pipe
=
StableDiffusionInpaintPipeline
(
unet
=
unet
,
scheduler
=
scheduler
,
vae
=
vae
,
text_encoder
=
text_encoder
,
tokenizer
=
tokenizer
,
safety_checker
=
None
,
feature_extractor
=
None
,
)
sd_pipe
=
sd_pipe
.
to
(
torch_device
)
sd_pipe
.
set_progress_bar_config
(
disable
=
None
)
prompt
=
"A painting of a squirrel eating a burger"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
image
=
sd_pipe
(
[
prompt
],
generator
=
generator
,
num_inference_steps
=
2
,
output_type
=
"np"
,
image
=
init_image
,
mask_image
=
mask_image
,
).
images
assert
image
.
shape
==
(
1
,
64
,
64
,
3
)
# @slow
@
require_torch_gpu
class
StableDiffusionInpaintPipelineIntegrationTests
(
unittest
.
TestCase
):
def
tearDown
(
self
):
# clean up the VRAM after each test
super
().
tearDown
()
gc
.
collect
()
torch
.
cuda
.
empty_cache
()
def
test_stable_diffusion_inpaint_pipeline
(
self
):
init_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-inpaint/init_image.png"
)
mask_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint"
"/yellow_cat_sitting_on_a_park_bench.npy"
)
model_id
=
"stabilityai/stable-diffusion-2-inpainting"
pipe
=
StableDiffusionInpaintPipeline
.
from_pretrained
(
model_id
,
safety_checker
=
None
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"Face of a yellow cat, high resolution, sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
init_image
,
mask_image
=
mask_image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
1e-3
def
test_stable_diffusion_inpaint_pipeline_fp16
(
self
):
init_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-inpaint/init_image.png"
)
mask_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png"
)
expected_image
=
load_numpy
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint"
"/yellow_cat_sitting_on_a_park_bench_fp16.npy"
)
model_id
=
"stabilityai/stable-diffusion-2-inpainting"
pipe
=
StableDiffusionInpaintPipeline
.
from_pretrained
(
model_id
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
safety_checker
=
None
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
()
prompt
=
"Face of a yellow cat, high resolution, sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
output
=
pipe
(
prompt
=
prompt
,
image
=
init_image
,
mask_image
=
mask_image
,
generator
=
generator
,
output_type
=
"np"
,
)
image
=
output
.
images
[
0
]
assert
image
.
shape
==
(
512
,
512
,
3
)
assert
np
.
abs
(
expected_image
-
image
).
max
()
<
5e-1
def
test_stable_diffusion_pipeline_with_sequential_cpu_offloading
(
self
):
torch
.
cuda
.
empty_cache
()
torch
.
cuda
.
reset_max_memory_allocated
()
torch
.
cuda
.
reset_peak_memory_stats
()
init_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/sd2-inpaint/init_image.png"
)
mask_image
=
load_image
(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/sd2-inpaint/mask.png"
)
model_id
=
"stabilityai/stable-diffusion-2-inpainting"
pndm
=
PNDMScheduler
.
from_pretrained
(
model_id
,
subfolder
=
"scheduler"
)
pipe
=
StableDiffusionInpaintPipeline
.
from_pretrained
(
model_id
,
safety_checker
=
None
,
scheduler
=
pndm
,
device_map
=
"auto"
,
revision
=
"fp16"
,
torch_dtype
=
torch
.
float16
,
)
pipe
.
to
(
torch_device
)
pipe
.
set_progress_bar_config
(
disable
=
None
)
pipe
.
enable_attention_slicing
(
1
)
pipe
.
enable_sequential_cpu_offload
()
prompt
=
"Face of a yellow cat, high resolution, sitting on a park bench"
generator
=
torch
.
Generator
(
device
=
torch_device
).
manual_seed
(
0
)
_
=
pipe
(
prompt
=
prompt
,
image
=
init_image
,
mask_image
=
mask_image
,
generator
=
generator
,
num_inference_steps
=
5
,
output_type
=
"np"
,
)
mem_bytes
=
torch
.
cuda
.
max_memory_allocated
()
# make sure that less than 2.65 GB is allocated
assert
mem_bytes
<
2.65
*
10
**
9
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment