"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "cd634a8fbba38883d421248e910c5d5bac219549"
Unverified Commit 9920c333 authored by Sean Sube's avatar Sean Sube Committed by GitHub
Browse files

add OnnxStableDiffusionUpscalePipeline pipeline (#2158)

* [Onnx] add Stable Diffusion Upscale pipeline

* add a test for the OnnxStableDiffusionUpscalePipeline

* check for VAE config before adjusting scaling factor

* update test assertions, lint fixes

* run fix-copies target

* switch test checkpoint to one hosted on huggingface

* partially restore attention mask

* reshape embeddings after running text encoder

* add longer nightly test for ONNX upscale pipeline

* use package import to fix tests

* fix scheduler compatibility and class labels dtype

* use more precise type

* remove LMS from fast tests

* lookup latent and timestamp types

* add docs for ONNX upscaling, rename lookup table

* replace deprecated pipeline names in ONNX docs
parent f38e3626
...@@ -21,13 +21,13 @@ specific language governing permissions and limitations under the License. ...@@ -21,13 +21,13 @@ specific language governing permissions and limitations under the License.
## Stable Diffusion Inference ## Stable Diffusion Inference
The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use. The snippet below demonstrates how to use the ONNX runtime. You need to use `OnnxStableDiffusionPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use.
```python ```python
# make sure you're logged in with `huggingface-cli login` # make sure you're logged in with `huggingface-cli login`
from diffusers import StableDiffusionOnnxPipeline from diffusers import OnnxStableDiffusionPipeline
pipe = StableDiffusionOnnxPipeline.from_pretrained( pipe = OnnxStableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", "runwayml/stable-diffusion-v1-5",
revision="onnx", revision="onnx",
provider="CUDAExecutionProvider", provider="CUDAExecutionProvider",
...@@ -37,6 +37,37 @@ prompt = "a photo of an astronaut riding a horse on mars" ...@@ -37,6 +37,37 @@ prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0] image = pipe(prompt).images[0]
``` ```
The snippet below demonstrates how to use the ONNX runtime with the Stable Diffusion upscaling pipeline.
```python
from diffusers import OnnxStableDiffusionPipeline, OnnxStableDiffusionUpscalePipeline
prompt = "a photo of an astronaut riding a horse on mars"
steps = 50
txt2img = OnnxStableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
revision="onnx",
provider="CUDAExecutionProvider",
)
small_image = txt2img(
prompt,
num_inference_steps=steps,
).images[0]
generator = torch.manual_seed(0)
upscale = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
provider="CUDAExecutionProvider",
)
large_image = upscale(
prompt,
small_image,
generator=generator,
num_inference_steps=steps,
).images[0]
```
## Known Issues ## Known Issues
- Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching. - Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
...@@ -158,6 +158,7 @@ else: ...@@ -158,6 +158,7 @@ else:
OnnxStableDiffusionInpaintPipeline, OnnxStableDiffusionInpaintPipeline,
OnnxStableDiffusionInpaintPipelineLegacy, OnnxStableDiffusionInpaintPipelineLegacy,
OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline,
OnnxStableDiffusionUpscalePipeline,
StableDiffusionOnnxPipeline, StableDiffusionOnnxPipeline,
) )
......
...@@ -93,6 +93,7 @@ else: ...@@ -93,6 +93,7 @@ else:
OnnxStableDiffusionInpaintPipeline, OnnxStableDiffusionInpaintPipeline,
OnnxStableDiffusionInpaintPipelineLegacy, OnnxStableDiffusionInpaintPipelineLegacy,
OnnxStableDiffusionPipeline, OnnxStableDiffusionPipeline,
OnnxStableDiffusionUpscalePipeline,
StableDiffusionOnnxPipeline, StableDiffusionOnnxPipeline,
) )
......
...@@ -104,6 +104,7 @@ else: ...@@ -104,6 +104,7 @@ else:
from .pipeline_onnx_stable_diffusion_img2img import OnnxStableDiffusionImg2ImgPipeline from .pipeline_onnx_stable_diffusion_img2img import OnnxStableDiffusionImg2ImgPipeline
from .pipeline_onnx_stable_diffusion_inpaint import OnnxStableDiffusionInpaintPipeline from .pipeline_onnx_stable_diffusion_inpaint import OnnxStableDiffusionInpaintPipeline
from .pipeline_onnx_stable_diffusion_inpaint_legacy import OnnxStableDiffusionInpaintPipelineLegacy from .pipeline_onnx_stable_diffusion_inpaint_legacy import OnnxStableDiffusionInpaintPipelineLegacy
from .pipeline_onnx_stable_diffusion_upscale import OnnxStableDiffusionUpscalePipeline
if is_transformers_available() and is_flax_available(): if is_transformers_available() and is_flax_available():
import flax import flax
......
from logging import getLogger
from typing import Any, Callable, List, Optional, Union
import numpy as np
import PIL
import torch
from ...schedulers import DDPMScheduler
from ..onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel
from ..pipeline_utils import ImagePipelineOutput
from . import StableDiffusionUpscalePipeline
logger = getLogger(__name__)
NUM_LATENT_CHANNELS = 4
NUM_UNET_INPUT_CHANNELS = 7
ORT_TO_PT_TYPE = {
"float16": torch.float16,
"float32": torch.float32,
}
def preprocess(image):
if isinstance(image, torch.Tensor):
return image
elif isinstance(image, PIL.Image.Image):
image = [image]
if isinstance(image[0], PIL.Image.Image):
w, h = image[0].size
w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 32
image = [np.array(i.resize((w, h)))[None, :] for i in image]
image = np.concatenate(image, axis=0)
image = np.array(image).astype(np.float32) / 255.0
image = image.transpose(0, 3, 1, 2)
image = 2.0 * image - 1.0
image = torch.from_numpy(image)
elif isinstance(image[0], torch.Tensor):
image = torch.cat(image, dim=0)
return image
class OnnxStableDiffusionUpscalePipeline(StableDiffusionUpscalePipeline):
def __init__(
self,
vae: OnnxRuntimeModel,
text_encoder: OnnxRuntimeModel,
tokenizer: Any,
unet: OnnxRuntimeModel,
low_res_scheduler: DDPMScheduler,
scheduler: Any,
max_noise_level: int = 350,
):
super().__init__(vae, text_encoder, tokenizer, unet, low_res_scheduler, scheduler, max_noise_level)
def __call__(
self,
prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image, List[PIL.Image.Image]],
num_inference_steps: int = 75,
guidance_scale: float = 9.0,
noise_level: int = 20,
negative_prompt: Optional[Union[str, List[str]]] = None,
num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: Optional[int] = 1,
):
# 1. Check inputs
self.check_inputs(prompt, image, noise_level, callback_steps)
# 2. Define call parameters
batch_size = 1 if isinstance(prompt, str) else len(prompt)
device = self._execution_device
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
do_classifier_free_guidance = guidance_scale > 1.0
# 3. Encode input prompt
text_embeddings = self._encode_prompt(
prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
)
latents_dtype = ORT_TO_PT_TYPE[str(text_embeddings.dtype)]
# 4. Preprocess image
image = preprocess(image)
image = image.cpu()
# 5. set timesteps
self.scheduler.set_timesteps(num_inference_steps, device=device)
timesteps = self.scheduler.timesteps
# 5. Add noise to image
noise_level = torch.tensor([noise_level], dtype=torch.long, device=device)
noise = torch.randn(image.shape, generator=generator, device=device, dtype=latents_dtype)
image = self.low_res_scheduler.add_noise(image, noise, noise_level)
batch_multiplier = 2 if do_classifier_free_guidance else 1
image = np.concatenate([image] * batch_multiplier * num_images_per_prompt)
noise_level = np.concatenate([noise_level] * image.shape[0])
# 6. Prepare latent variables
height, width = image.shape[2:]
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
NUM_LATENT_CHANNELS,
height,
width,
latents_dtype,
device,
generator,
latents,
)
# 7. Check that sizes of image and latents match
num_channels_image = image.shape[1]
if NUM_LATENT_CHANNELS + num_channels_image != NUM_UNET_INPUT_CHANNELS:
raise ValueError(
"Incorrect configuration settings! The config of `pipeline.unet` expects"
f" {NUM_UNET_INPUT_CHANNELS} but received `num_channels_latents`: {NUM_LATENT_CHANNELS} +"
f" `num_channels_image`: {num_channels_image} "
f" = {NUM_LATENT_CHANNELS+num_channels_image}. Please verify the config of"
" `pipeline.unet` or your `image` input."
)
# 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
timestep_dtype = next(
(input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)"
)
timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
# 9. Denoising loop
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents
# concat latents, mask, masked_image_latents in the channel dimension
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
latent_model_input = np.concatenate([latent_model_input, image], axis=1)
# timestep to tensor
timestep = np.array([t], dtype=timestep_dtype)
# predict the noise residual
noise_pred = self.unet(
sample=latent_model_input,
timestep=timestep,
encoder_hidden_states=text_embeddings,
class_labels=noise_level.astype(np.int64),
)[0]
# perform guidance
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
latents = self.scheduler.step(
torch.from_numpy(noise_pred), t, latents, **extra_step_kwargs
).prev_sample
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
progress_bar.update()
if callback is not None and i % callback_steps == 0:
callback(i, t, latents)
# 10. Post-processing
image = self.decode_latents(latents.float())
# 11. Convert to PIL
if output_type == "pil":
image = self.numpy_to_pil(image)
if not return_dict:
return (image,)
return ImagePipelineOutput(images=image)
def decode_latents(self, latents):
latents = 1 / 0.08333 * latents
image = self.vae(latent_sample=latents)[0]
image = np.clip(image / 2 + 0.5, 0, 1)
image = image.transpose((0, 2, 3, 1))
return image
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
batch_size = len(prompt) if isinstance(prompt, list) else 1
text_inputs = self.tokenizer(
prompt,
padding="max_length",
max_length=self.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1])
logger.warning(
"The following part of your input was truncated because CLIP can only handle sequences up to"
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
)
# if hasattr(text_inputs, "attention_mask"):
# attention_mask = text_inputs.attention_mask.to(device)
# else:
# attention_mask = None
# no positional arguments to text_encoder
text_embeddings = self.text_encoder(
input_ids=text_input_ids.int().to(device),
# attention_mask=attention_mask,
)
text_embeddings = text_embeddings[0]
bs_embed, seq_len, _ = text_embeddings.shape
# duplicate text embeddings for each generation per prompt, using mps friendly method
text_embeddings = text_embeddings.repeat(1, num_images_per_prompt)
text_embeddings = text_embeddings.reshape(bs_embed * num_images_per_prompt, seq_len, -1)
# get unconditional embeddings for classifier free guidance
if do_classifier_free_guidance:
uncond_tokens: List[str]
if negative_prompt is None:
uncond_tokens = [""] * batch_size
elif type(prompt) is not type(negative_prompt):
raise TypeError(
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
f" {type(prompt)}."
)
elif isinstance(negative_prompt, str):
uncond_tokens = [negative_prompt]
elif batch_size != len(negative_prompt):
raise ValueError(
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
" the batch size of `prompt`."
)
else:
uncond_tokens = negative_prompt
max_length = text_input_ids.shape[-1]
uncond_input = self.tokenizer(
uncond_tokens,
padding="max_length",
max_length=max_length,
truncation=True,
return_tensors="pt",
)
# if hasattr(uncond_input, "attention_mask"):
# attention_mask = uncond_input.attention_mask.to(device)
# else:
# attention_mask = None
uncond_embeddings = self.text_encoder(
input_ids=uncond_input.input_ids.int().to(device),
# attention_mask=attention_mask,
)
uncond_embeddings = uncond_embeddings[0]
seq_len = uncond_embeddings.shape[1]
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt)
uncond_embeddings = uncond_embeddings.reshape(batch_size * num_images_per_prompt, seq_len, -1)
# For classifier free guidance, we need to do two forward passes.
# Here we concatenate the unconditional and text embeddings into a single batch
# to avoid doing two forward passes
text_embeddings = np.concatenate([uncond_embeddings, text_embeddings])
return text_embeddings
...@@ -88,21 +88,22 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline): ...@@ -88,21 +88,22 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline):
): ):
super().__init__() super().__init__()
# check if vae has a config attribute `scaling_factor` and if it is set to 0.08333, else set it to 0.08333 and deprecate if hasattr(vae, "config"):
is_vae_scaling_factor_set_to_0_08333 = ( # check if vae has a config attribute `scaling_factor` and if it is set to 0.08333, else set it to 0.08333 and deprecate
hasattr(vae.config, "scaling_factor") and vae.config.scaling_factor == 0.08333 is_vae_scaling_factor_set_to_0_08333 = (
) hasattr(vae.config, "scaling_factor") and vae.config.scaling_factor == 0.08333
if not is_vae_scaling_factor_set_to_0_08333:
deprecation_message = (
"The configuration file of the vae does not contain `scaling_factor` or it is set to"
f" {vae.config.scaling_factor}, which seems highly unlikely. If your checkpoint is a fine-tuned"
" version of `stabilityai/stable-diffusion-x4-upscaler` you should change 'scaling_factor' to 0.08333"
" Please make sure to update the config accordingly, as not doing so might lead to incorrect results"
" in future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be"
" very nice if you could open a Pull Request for the `vae/config.json` file"
) )
deprecate("wrong scaling_factor", "1.0.0", deprecation_message, standard_warn=False) if not is_vae_scaling_factor_set_to_0_08333:
vae.register_to_config(scaling_factor=0.08333) deprecation_message = (
"The configuration file of the vae does not contain `scaling_factor` or it is set to"
f" {vae.config.scaling_factor}, which seems highly unlikely. If your checkpoint is a fine-tuned"
" version of `stabilityai/stable-diffusion-x4-upscaler` you should change 'scaling_factor' to"
" 0.08333 Please make sure to update the config accordingly, as not doing so might lead to"
" incorrect results in future versions. If you have downloaded this checkpoint from the Hugging"
" Face Hub, it would be very nice if you could open a Pull Request for the `vae/config.json` file"
)
deprecate("wrong scaling_factor", "1.0.0", deprecation_message, standard_warn=False)
vae.register_to_config(scaling_factor=0.08333)
self.register_modules( self.register_modules(
vae=vae, vae=vae,
......
...@@ -62,6 +62,21 @@ class OnnxStableDiffusionPipeline(metaclass=DummyObject): ...@@ -62,6 +62,21 @@ class OnnxStableDiffusionPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers", "onnx"]) requires_backends(cls, ["torch", "transformers", "onnx"])
class OnnxStableDiffusionUpscalePipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "onnx"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers", "onnx"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "onnx"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "onnx"])
class StableDiffusionOnnxPipeline(metaclass=DummyObject): class StableDiffusionOnnxPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "onnx"] _backends = ["torch", "transformers", "onnx"]
......
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import unittest
import numpy as np
import torch
from diffusers import (
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
LMSDiscreteScheduler,
OnnxStableDiffusionUpscalePipeline,
PNDMScheduler,
)
from diffusers.utils import floats_tensor
from diffusers.utils.testing_utils import (
is_onnx_available,
load_image,
nightly,
require_onnxruntime,
require_torch_gpu,
)
from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin
if is_onnx_available():
import onnxruntime as ort
class OnnxStableDiffusionUpscalePipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase):
# TODO: is there an appropriate internal test set?
hub_checkpoint = "ssube/stable-diffusion-x4-upscaler-onnx"
def get_dummy_inputs(self, seed=0):
image = floats_tensor((1, 3, 128, 128), rng=random.Random(seed))
generator = torch.manual_seed(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"image": image,
"generator": generator,
"num_inference_steps": 3,
"guidance_scale": 7.5,
"output_type": "numpy",
}
return inputs
def test_pipeline_default_ddpm(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
# started as 128, should now be 512
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6974782, 0.68902093, 0.70135885, 0.7583618, 0.7804545, 0.7854912, 0.78667426, 0.78743863, 0.78070223]
)
assert np.abs(image_slice - expected_slice).max() < 1e-1
def test_pipeline_pndm(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config, skip_prk_steps=True)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6898892, 0.59240556, 0.52499527, 0.58866215, 0.52258235, 0.52572715, 0.62414473, 0.6174387, 0.6214964]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_dpm_multistep(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.7659278, 0.76437664, 0.75579107, 0.7691116, 0.77666986, 0.7727672, 0.7758664, 0.7812226, 0.76942515]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_euler(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6974782, 0.68902093, 0.70135885, 0.7583618, 0.7804545, 0.7854912, 0.78667426, 0.78743863, 0.78070223]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_euler_ancestral(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.77424496, 0.773601, 0.7645288, 0.7769598, 0.7772739, 0.7738688, 0.78187233, 0.77879584, 0.767043]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
@nightly
@require_onnxruntime
@require_torch_gpu
class OnnxStableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
@property
def gpu_provider(self):
return (
"CUDAExecutionProvider",
{
"gpu_mem_limit": "15000000000", # 15GB
"arena_extend_strategy": "kSameAsRequested",
},
)
@property
def gpu_options(self):
options = ort.SessionOptions()
options.enable_mem_pattern = False
return options
def test_inference_default_ddpm(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
init_image = init_image.resize((128, 128))
# using the PNDM scheduler by default
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
provider=self.gpu_provider,
sess_options=self.gpu_options,
)
pipe.set_progress_bar_config(disable=None)
prompt = "A fantasy landscape, trending on artstation"
generator = torch.manual_seed(0)
output = pipe(
prompt=prompt,
image=init_image,
guidance_scale=7.5,
num_inference_steps=10,
generator=generator,
output_type="np",
)
images = output.images
image_slice = images[0, 255:258, 383:386, -1]
assert images.shape == (1, 512, 512, 3)
expected_slice = np.array([0.4883, 0.4947, 0.4980, 0.4975, 0.4982, 0.4980, 0.5000, 0.5006, 0.4972])
# TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
def test_inference_k_lms(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
init_image = init_image.resize((128, 128))
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx", subfolder="scheduler"
)
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
scheduler=lms_scheduler,
provider=self.gpu_provider,
sess_options=self.gpu_options,
)
pipe.set_progress_bar_config(disable=None)
prompt = "A fantasy landscape, trending on artstation"
generator = torch.manual_seed(0)
output = pipe(
prompt=prompt,
image=init_image,
guidance_scale=7.5,
num_inference_steps=20,
generator=generator,
output_type="np",
)
images = output.images
image_slice = images[0, 255:258, 383:386, -1]
assert images.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.50173753, 0.50223356, 0.502039, 0.50233036, 0.5023725, 0.5022601, 0.5018758, 0.50234085, 0.50241566]
)
# TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment