Unverified Commit 9920c333 authored by Sean Sube's avatar Sean Sube Committed by GitHub
Browse files

add OnnxStableDiffusionUpscalePipeline pipeline (#2158)

* [Onnx] add Stable Diffusion Upscale pipeline

* add a test for the OnnxStableDiffusionUpscalePipeline

* check for VAE config before adjusting scaling factor

* update test assertions, lint fixes

* run fix-copies target

* switch test checkpoint to one hosted on huggingface

* partially restore attention mask

* reshape embeddings after running text encoder

* add longer nightly test for ONNX upscale pipeline

* use package import to fix tests

* fix scheduler compatibility and class labels dtype

* use more precise type

* remove LMS from fast tests

* lookup latent and timestamp types

* add docs for ONNX upscaling, rename lookup table

* replace deprecated pipeline names in ONNX docs
parent f38e3626
......@@ -21,13 +21,13 @@ specific language governing permissions and limitations under the License.
## Stable Diffusion Inference
The snippet below demonstrates how to use the ONNX runtime. You need to use `StableDiffusionOnnxPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use.
The snippet below demonstrates how to use the ONNX runtime. You need to use `OnnxStableDiffusionPipeline` instead of `StableDiffusionPipeline`. You also need to download the weights from the `onnx` branch of the repository, and indicate the runtime provider you want to use.
```python
# make sure you're logged in with `huggingface-cli login`
from diffusers import StableDiffusionOnnxPipeline
from diffusers import OnnxStableDiffusionPipeline
pipe = StableDiffusionOnnxPipeline.from_pretrained(
pipe = OnnxStableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
revision="onnx",
provider="CUDAExecutionProvider",
......@@ -37,6 +37,37 @@ prompt = "a photo of an astronaut riding a horse on mars"
image = pipe(prompt).images[0]
```
The snippet below demonstrates how to use the ONNX runtime with the Stable Diffusion upscaling pipeline.
```python
from diffusers import OnnxStableDiffusionPipeline, OnnxStableDiffusionUpscalePipeline
prompt = "a photo of an astronaut riding a horse on mars"
steps = 50
txt2img = OnnxStableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5",
revision="onnx",
provider="CUDAExecutionProvider",
)
small_image = txt2img(
prompt,
num_inference_steps=steps,
).images[0]
generator = torch.manual_seed(0)
upscale = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
provider="CUDAExecutionProvider",
)
large_image = upscale(
prompt,
small_image,
generator=generator,
num_inference_steps=steps,
).images[0]
```
## Known Issues
- Generating multiple prompts in a batch seems to take too much memory. While we look into it, you may need to iterate instead of batching.
......@@ -158,6 +158,7 @@ else:
OnnxStableDiffusionInpaintPipeline,
OnnxStableDiffusionInpaintPipelineLegacy,
OnnxStableDiffusionPipeline,
OnnxStableDiffusionUpscalePipeline,
StableDiffusionOnnxPipeline,
)
......
......@@ -93,6 +93,7 @@ else:
OnnxStableDiffusionInpaintPipeline,
OnnxStableDiffusionInpaintPipelineLegacy,
OnnxStableDiffusionPipeline,
OnnxStableDiffusionUpscalePipeline,
StableDiffusionOnnxPipeline,
)
......
......@@ -104,6 +104,7 @@ else:
from .pipeline_onnx_stable_diffusion_img2img import OnnxStableDiffusionImg2ImgPipeline
from .pipeline_onnx_stable_diffusion_inpaint import OnnxStableDiffusionInpaintPipeline
from .pipeline_onnx_stable_diffusion_inpaint_legacy import OnnxStableDiffusionInpaintPipelineLegacy
from .pipeline_onnx_stable_diffusion_upscale import OnnxStableDiffusionUpscalePipeline
if is_transformers_available() and is_flax_available():
import flax
......
from logging import getLogger
from typing import Any, Callable, List, Optional, Union
import numpy as np
import PIL
import torch
from ...schedulers import DDPMScheduler
from ..onnx_utils import ORT_TO_NP_TYPE, OnnxRuntimeModel
from ..pipeline_utils import ImagePipelineOutput
from . import StableDiffusionUpscalePipeline
logger = getLogger(__name__)
NUM_LATENT_CHANNELS = 4
NUM_UNET_INPUT_CHANNELS = 7
ORT_TO_PT_TYPE = {
"float16": torch.float16,
"float32": torch.float32,
}
def preprocess(image):
if isinstance(image, torch.Tensor):
return image
elif isinstance(image, PIL.Image.Image):
image = [image]
if isinstance(image[0], PIL.Image.Image):
w, h = image[0].size
w, h = map(lambda x: x - x % 64, (w, h)) # resize to integer multiple of 32
image = [np.array(i.resize((w, h)))[None, :] for i in image]
image = np.concatenate(image, axis=0)
image = np.array(image).astype(np.float32) / 255.0
image = image.transpose(0, 3, 1, 2)
image = 2.0 * image - 1.0
image = torch.from_numpy(image)
elif isinstance(image[0], torch.Tensor):
image = torch.cat(image, dim=0)
return image
class OnnxStableDiffusionUpscalePipeline(StableDiffusionUpscalePipeline):
def __init__(
self,
vae: OnnxRuntimeModel,
text_encoder: OnnxRuntimeModel,
tokenizer: Any,
unet: OnnxRuntimeModel,
low_res_scheduler: DDPMScheduler,
scheduler: Any,
max_noise_level: int = 350,
):
super().__init__(vae, text_encoder, tokenizer, unet, low_res_scheduler, scheduler, max_noise_level)
def __call__(
self,
prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image, List[PIL.Image.Image]],
num_inference_steps: int = 75,
guidance_scale: float = 9.0,
noise_level: int = 20,
negative_prompt: Optional[Union[str, List[str]]] = None,
num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None,
output_type: Optional[str] = "pil",
return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: Optional[int] = 1,
):
# 1. Check inputs
self.check_inputs(prompt, image, noise_level, callback_steps)
# 2. Define call parameters
batch_size = 1 if isinstance(prompt, str) else len(prompt)
device = self._execution_device
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
do_classifier_free_guidance = guidance_scale > 1.0
# 3. Encode input prompt
text_embeddings = self._encode_prompt(
prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt
)
latents_dtype = ORT_TO_PT_TYPE[str(text_embeddings.dtype)]
# 4. Preprocess image
image = preprocess(image)
image = image.cpu()
# 5. set timesteps
self.scheduler.set_timesteps(num_inference_steps, device=device)
timesteps = self.scheduler.timesteps
# 5. Add noise to image
noise_level = torch.tensor([noise_level], dtype=torch.long, device=device)
noise = torch.randn(image.shape, generator=generator, device=device, dtype=latents_dtype)
image = self.low_res_scheduler.add_noise(image, noise, noise_level)
batch_multiplier = 2 if do_classifier_free_guidance else 1
image = np.concatenate([image] * batch_multiplier * num_images_per_prompt)
noise_level = np.concatenate([noise_level] * image.shape[0])
# 6. Prepare latent variables
height, width = image.shape[2:]
latents = self.prepare_latents(
batch_size * num_images_per_prompt,
NUM_LATENT_CHANNELS,
height,
width,
latents_dtype,
device,
generator,
latents,
)
# 7. Check that sizes of image and latents match
num_channels_image = image.shape[1]
if NUM_LATENT_CHANNELS + num_channels_image != NUM_UNET_INPUT_CHANNELS:
raise ValueError(
"Incorrect configuration settings! The config of `pipeline.unet` expects"
f" {NUM_UNET_INPUT_CHANNELS} but received `num_channels_latents`: {NUM_LATENT_CHANNELS} +"
f" `num_channels_image`: {num_channels_image} "
f" = {NUM_LATENT_CHANNELS+num_channels_image}. Please verify the config of"
" `pipeline.unet` or your `image` input."
)
# 8. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
timestep_dtype = next(
(input.type for input in self.unet.model.get_inputs() if input.name == "timestep"), "tensor(float)"
)
timestep_dtype = ORT_TO_NP_TYPE[timestep_dtype]
# 9. Denoising loop
num_warmup_steps = len(timesteps) - num_inference_steps * self.scheduler.order
with self.progress_bar(total=num_inference_steps) as progress_bar:
for i, t in enumerate(timesteps):
# expand the latents if we are doing classifier free guidance
latent_model_input = np.concatenate([latents] * 2) if do_classifier_free_guidance else latents
# concat latents, mask, masked_image_latents in the channel dimension
latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
latent_model_input = np.concatenate([latent_model_input, image], axis=1)
# timestep to tensor
timestep = np.array([t], dtype=timestep_dtype)
# predict the noise residual
noise_pred = self.unet(
sample=latent_model_input,
timestep=timestep,
encoder_hidden_states=text_embeddings,
class_labels=noise_level.astype(np.int64),
)[0]
# perform guidance
if do_classifier_free_guidance:
noise_pred_uncond, noise_pred_text = np.split(noise_pred, 2)
noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
# compute the previous noisy sample x_t -> x_t-1
latents = self.scheduler.step(
torch.from_numpy(noise_pred), t, latents, **extra_step_kwargs
).prev_sample
# call the callback, if provided
if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
progress_bar.update()
if callback is not None and i % callback_steps == 0:
callback(i, t, latents)
# 10. Post-processing
image = self.decode_latents(latents.float())
# 11. Convert to PIL
if output_type == "pil":
image = self.numpy_to_pil(image)
if not return_dict:
return (image,)
return ImagePipelineOutput(images=image)
def decode_latents(self, latents):
latents = 1 / 0.08333 * latents
image = self.vae(latent_sample=latents)[0]
image = np.clip(image / 2 + 0.5, 0, 1)
image = image.transpose((0, 2, 3, 1))
return image
def _encode_prompt(self, prompt, device, num_images_per_prompt, do_classifier_free_guidance, negative_prompt):
batch_size = len(prompt) if isinstance(prompt, list) else 1
text_inputs = self.tokenizer(
prompt,
padding="max_length",
max_length=self.tokenizer.model_max_length,
truncation=True,
return_tensors="pt",
)
text_input_ids = text_inputs.input_ids
untruncated_ids = self.tokenizer(prompt, padding="longest", return_tensors="pt").input_ids
if untruncated_ids.shape[-1] >= text_input_ids.shape[-1] and not torch.equal(text_input_ids, untruncated_ids):
removed_text = self.tokenizer.batch_decode(untruncated_ids[:, self.tokenizer.model_max_length - 1 : -1])
logger.warning(
"The following part of your input was truncated because CLIP can only handle sequences up to"
f" {self.tokenizer.model_max_length} tokens: {removed_text}"
)
# if hasattr(text_inputs, "attention_mask"):
# attention_mask = text_inputs.attention_mask.to(device)
# else:
# attention_mask = None
# no positional arguments to text_encoder
text_embeddings = self.text_encoder(
input_ids=text_input_ids.int().to(device),
# attention_mask=attention_mask,
)
text_embeddings = text_embeddings[0]
bs_embed, seq_len, _ = text_embeddings.shape
# duplicate text embeddings for each generation per prompt, using mps friendly method
text_embeddings = text_embeddings.repeat(1, num_images_per_prompt)
text_embeddings = text_embeddings.reshape(bs_embed * num_images_per_prompt, seq_len, -1)
# get unconditional embeddings for classifier free guidance
if do_classifier_free_guidance:
uncond_tokens: List[str]
if negative_prompt is None:
uncond_tokens = [""] * batch_size
elif type(prompt) is not type(negative_prompt):
raise TypeError(
f"`negative_prompt` should be the same type to `prompt`, but got {type(negative_prompt)} !="
f" {type(prompt)}."
)
elif isinstance(negative_prompt, str):
uncond_tokens = [negative_prompt]
elif batch_size != len(negative_prompt):
raise ValueError(
f"`negative_prompt`: {negative_prompt} has batch size {len(negative_prompt)}, but `prompt`:"
f" {prompt} has batch size {batch_size}. Please make sure that passed `negative_prompt` matches"
" the batch size of `prompt`."
)
else:
uncond_tokens = negative_prompt
max_length = text_input_ids.shape[-1]
uncond_input = self.tokenizer(
uncond_tokens,
padding="max_length",
max_length=max_length,
truncation=True,
return_tensors="pt",
)
# if hasattr(uncond_input, "attention_mask"):
# attention_mask = uncond_input.attention_mask.to(device)
# else:
# attention_mask = None
uncond_embeddings = self.text_encoder(
input_ids=uncond_input.input_ids.int().to(device),
# attention_mask=attention_mask,
)
uncond_embeddings = uncond_embeddings[0]
seq_len = uncond_embeddings.shape[1]
# duplicate unconditional embeddings for each generation per prompt, using mps friendly method
uncond_embeddings = uncond_embeddings.repeat(1, num_images_per_prompt)
uncond_embeddings = uncond_embeddings.reshape(batch_size * num_images_per_prompt, seq_len, -1)
# For classifier free guidance, we need to do two forward passes.
# Here we concatenate the unconditional and text embeddings into a single batch
# to avoid doing two forward passes
text_embeddings = np.concatenate([uncond_embeddings, text_embeddings])
return text_embeddings
......@@ -88,21 +88,22 @@ class StableDiffusionUpscalePipeline(DiffusionPipeline):
):
super().__init__()
# check if vae has a config attribute `scaling_factor` and if it is set to 0.08333, else set it to 0.08333 and deprecate
is_vae_scaling_factor_set_to_0_08333 = (
hasattr(vae.config, "scaling_factor") and vae.config.scaling_factor == 0.08333
)
if not is_vae_scaling_factor_set_to_0_08333:
deprecation_message = (
"The configuration file of the vae does not contain `scaling_factor` or it is set to"
f" {vae.config.scaling_factor}, which seems highly unlikely. If your checkpoint is a fine-tuned"
" version of `stabilityai/stable-diffusion-x4-upscaler` you should change 'scaling_factor' to 0.08333"
" Please make sure to update the config accordingly, as not doing so might lead to incorrect results"
" in future versions. If you have downloaded this checkpoint from the Hugging Face Hub, it would be"
" very nice if you could open a Pull Request for the `vae/config.json` file"
if hasattr(vae, "config"):
# check if vae has a config attribute `scaling_factor` and if it is set to 0.08333, else set it to 0.08333 and deprecate
is_vae_scaling_factor_set_to_0_08333 = (
hasattr(vae.config, "scaling_factor") and vae.config.scaling_factor == 0.08333
)
deprecate("wrong scaling_factor", "1.0.0", deprecation_message, standard_warn=False)
vae.register_to_config(scaling_factor=0.08333)
if not is_vae_scaling_factor_set_to_0_08333:
deprecation_message = (
"The configuration file of the vae does not contain `scaling_factor` or it is set to"
f" {vae.config.scaling_factor}, which seems highly unlikely. If your checkpoint is a fine-tuned"
" version of `stabilityai/stable-diffusion-x4-upscaler` you should change 'scaling_factor' to"
" 0.08333 Please make sure to update the config accordingly, as not doing so might lead to"
" incorrect results in future versions. If you have downloaded this checkpoint from the Hugging"
" Face Hub, it would be very nice if you could open a Pull Request for the `vae/config.json` file"
)
deprecate("wrong scaling_factor", "1.0.0", deprecation_message, standard_warn=False)
vae.register_to_config(scaling_factor=0.08333)
self.register_modules(
vae=vae,
......
......@@ -62,6 +62,21 @@ class OnnxStableDiffusionPipeline(metaclass=DummyObject):
requires_backends(cls, ["torch", "transformers", "onnx"])
class OnnxStableDiffusionUpscalePipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "onnx"]
def __init__(self, *args, **kwargs):
requires_backends(self, ["torch", "transformers", "onnx"])
@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "onnx"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch", "transformers", "onnx"])
class StableDiffusionOnnxPipeline(metaclass=DummyObject):
_backends = ["torch", "transformers", "onnx"]
......
# coding=utf-8
# Copyright 2022 HuggingFace Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import random
import unittest
import numpy as np
import torch
from diffusers import (
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
LMSDiscreteScheduler,
OnnxStableDiffusionUpscalePipeline,
PNDMScheduler,
)
from diffusers.utils import floats_tensor
from diffusers.utils.testing_utils import (
is_onnx_available,
load_image,
nightly,
require_onnxruntime,
require_torch_gpu,
)
from ...test_pipelines_onnx_common import OnnxPipelineTesterMixin
if is_onnx_available():
import onnxruntime as ort
class OnnxStableDiffusionUpscalePipelineFastTests(OnnxPipelineTesterMixin, unittest.TestCase):
# TODO: is there an appropriate internal test set?
hub_checkpoint = "ssube/stable-diffusion-x4-upscaler-onnx"
def get_dummy_inputs(self, seed=0):
image = floats_tensor((1, 3, 128, 128), rng=random.Random(seed))
generator = torch.manual_seed(seed)
inputs = {
"prompt": "A painting of a squirrel eating a burger",
"image": image,
"generator": generator,
"num_inference_steps": 3,
"guidance_scale": 7.5,
"output_type": "numpy",
}
return inputs
def test_pipeline_default_ddpm(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1].flatten()
# started as 128, should now be 512
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6974782, 0.68902093, 0.70135885, 0.7583618, 0.7804545, 0.7854912, 0.78667426, 0.78743863, 0.78070223]
)
assert np.abs(image_slice - expected_slice).max() < 1e-1
def test_pipeline_pndm(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = PNDMScheduler.from_config(pipe.scheduler.config, skip_prk_steps=True)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6898892, 0.59240556, 0.52499527, 0.58866215, 0.52258235, 0.52572715, 0.62414473, 0.6174387, 0.6214964]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_dpm_multistep(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.7659278, 0.76437664, 0.75579107, 0.7691116, 0.77666986, 0.7727672, 0.7758664, 0.7812226, 0.76942515]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_euler(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.6974782, 0.68902093, 0.70135885, 0.7583618, 0.7804545, 0.7854912, 0.78667426, 0.78743863, 0.78070223]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
def test_pipeline_euler_ancestral(self):
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(self.hub_checkpoint, provider="CPUExecutionProvider")
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe.set_progress_bar_config(disable=None)
inputs = self.get_dummy_inputs()
image = pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]
assert image.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.77424496, 0.773601, 0.7645288, 0.7769598, 0.7772739, 0.7738688, 0.78187233, 0.77879584, 0.767043]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 1e-1
@nightly
@require_onnxruntime
@require_torch_gpu
class OnnxStableDiffusionUpscalePipelineIntegrationTests(unittest.TestCase):
@property
def gpu_provider(self):
return (
"CUDAExecutionProvider",
{
"gpu_mem_limit": "15000000000", # 15GB
"arena_extend_strategy": "kSameAsRequested",
},
)
@property
def gpu_options(self):
options = ort.SessionOptions()
options.enable_mem_pattern = False
return options
def test_inference_default_ddpm(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
init_image = init_image.resize((128, 128))
# using the PNDM scheduler by default
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
provider=self.gpu_provider,
sess_options=self.gpu_options,
)
pipe.set_progress_bar_config(disable=None)
prompt = "A fantasy landscape, trending on artstation"
generator = torch.manual_seed(0)
output = pipe(
prompt=prompt,
image=init_image,
guidance_scale=7.5,
num_inference_steps=10,
generator=generator,
output_type="np",
)
images = output.images
image_slice = images[0, 255:258, 383:386, -1]
assert images.shape == (1, 512, 512, 3)
expected_slice = np.array([0.4883, 0.4947, 0.4980, 0.4975, 0.4982, 0.4980, 0.5000, 0.5006, 0.4972])
# TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
def test_inference_k_lms(self):
init_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/img2img/sketch-mountains-input.jpg"
)
init_image = init_image.resize((128, 128))
lms_scheduler = LMSDiscreteScheduler.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx", subfolder="scheduler"
)
pipe = OnnxStableDiffusionUpscalePipeline.from_pretrained(
"ssube/stable-diffusion-x4-upscaler-onnx",
scheduler=lms_scheduler,
provider=self.gpu_provider,
sess_options=self.gpu_options,
)
pipe.set_progress_bar_config(disable=None)
prompt = "A fantasy landscape, trending on artstation"
generator = torch.manual_seed(0)
output = pipe(
prompt=prompt,
image=init_image,
guidance_scale=7.5,
num_inference_steps=20,
generator=generator,
output_type="np",
)
images = output.images
image_slice = images[0, 255:258, 383:386, -1]
assert images.shape == (1, 512, 512, 3)
expected_slice = np.array(
[0.50173753, 0.50223356, 0.502039, 0.50233036, 0.5023725, 0.5022601, 0.5018758, 0.50234085, 0.50241566]
)
# TODO: lower the tolerance after finding the cause of onnxruntime reproducibility issues
assert np.abs(image_slice.flatten() - expected_slice).max() < 2e-2
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment