Unverified Commit be4afa0b authored by Mark Van Aken's avatar Mark Van Aken Committed by GitHub
Browse files

#7535 Update FloatTensor type hints to Tensor (#7883)

* find & replace all FloatTensors to Tensor

* apply formatting

* Update torch.FloatTensor to torch.Tensor in the remaining files

* formatting

* Fix the rest of the places where FloatTensor is used as well as in documentation

* formatting

* Update new file from FloatTensor to Tensor
parent 04f4bd54
...@@ -318,8 +318,8 @@ class AltDiffusionImg2ImgPipeline( ...@@ -318,8 +318,8 @@ class AltDiffusionImg2ImgPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -350,8 +350,8 @@ class AltDiffusionImg2ImgPipeline( ...@@ -350,8 +350,8 @@ class AltDiffusionImg2ImgPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -371,10 +371,10 @@ class AltDiffusionImg2ImgPipeline( ...@@ -371,10 +371,10 @@ class AltDiffusionImg2ImgPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -721,7 +721,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -721,7 +721,7 @@ class AltDiffusionImg2ImgPipeline(
data type of the generated embeddings data type of the generated embeddings
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
...@@ -774,8 +774,8 @@ class AltDiffusionImg2ImgPipeline( ...@@ -774,8 +774,8 @@ class AltDiffusionImg2ImgPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: Optional[float] = 0.0, eta: Optional[float] = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
ip_adapter_image: Optional[PipelineImageInput] = None, ip_adapter_image: Optional[PipelineImageInput] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
...@@ -791,7 +791,7 @@ class AltDiffusionImg2ImgPipeline( ...@@ -791,7 +791,7 @@ class AltDiffusionImg2ImgPipeline(
Args: Args:
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`. The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`): image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
`Image`, numpy array or tensor representing an image batch to be used as the starting point. For both `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
...@@ -824,10 +824,10 @@ class AltDiffusionImg2ImgPipeline( ...@@ -824,10 +824,10 @@ class AltDiffusionImg2ImgPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters. ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
......
...@@ -112,9 +112,9 @@ class RePaintPipeline(DiffusionPipeline): ...@@ -112,9 +112,9 @@ class RePaintPipeline(DiffusionPipeline):
The call function to the pipeline for generation. The call function to the pipeline for generation.
Args: Args:
image (`torch.FloatTensor` or `PIL.Image.Image`): image (`torch.Tensor` or `PIL.Image.Image`):
The original image to inpaint on. The original image to inpaint on.
mask_image (`torch.FloatTensor` or `PIL.Image.Image`): mask_image (`torch.Tensor` or `PIL.Image.Image`):
The mask_image where 0.0 define which part of the original image to inpaint. The mask_image where 0.0 define which part of the original image to inpaint.
num_inference_steps (`int`, *optional*, defaults to 1000): num_inference_steps (`int`, *optional*, defaults to 1000):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the The number of denoising steps. More denoising steps usually lead to a higher quality image at the
......
...@@ -134,7 +134,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline): ...@@ -134,7 +134,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
num_inference_steps: int = 100, num_inference_steps: int = 100,
return_dict: bool = True, return_dict: bool = True,
output_type: str = "np", output_type: str = "np",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
) -> Union[AudioPipelineOutput, Tuple]: ) -> Union[AudioPipelineOutput, Tuple]:
if (callback_steps is None) or ( if (callback_steps is None) or (
...@@ -161,7 +161,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline): ...@@ -161,7 +161,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
The output format of the generated audio. The output format of the generated audio.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -255,8 +255,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -255,8 +255,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -288,8 +288,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -288,8 +288,8 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -309,10 +309,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -309,10 +309,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -638,10 +638,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -638,10 +638,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: Optional[float] = 0.1, eta: Optional[float] = 0.1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
...@@ -652,7 +652,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -652,7 +652,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image (`torch.FloatTensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`): image (`torch.Tensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
`Image` or tensor representing an image batch to be used as the starting point. Can also accept image `Image` or tensor representing an image batch to be used as the starting point. Can also accept image
latents as `image`, but if passing latents directly it is not encoded again. latents as `image`, but if passing latents directly it is not encoded again.
strength (`float`, *optional*, defaults to 0.8): strength (`float`, *optional*, defaults to 0.8):
...@@ -678,10 +678,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -678,10 +678,10 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
...@@ -691,7 +691,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor ...@@ -691,7 +691,7 @@ class CycleDiffusionPipeline(DiffusionPipeline, TextualInversionLoaderMixin, Lor
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -48,7 +48,7 @@ def preprocess_image(image, batch_size): ...@@ -48,7 +48,7 @@ def preprocess_image(image, batch_size):
def preprocess_mask(mask, batch_size, scale_factor=8): def preprocess_mask(mask, batch_size, scale_factor=8):
if not isinstance(mask, torch.FloatTensor): if not isinstance(mask, torch.Tensor):
mask = mask.convert("L") mask = mask.convert("L")
w, h = mask.size w, h = mask.size
w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8 w, h = (x - x % 8 for x in (w, h)) # resize to integer multiple of 8
...@@ -225,8 +225,8 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -225,8 +225,8 @@ class StableDiffusionInpaintPipelineLegacy(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -258,8 +258,8 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -258,8 +258,8 @@ class StableDiffusionInpaintPipelineLegacy(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -279,10 +279,10 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -279,10 +279,10 @@ class StableDiffusionInpaintPipelineLegacy(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -557,8 +557,8 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -557,8 +557,8 @@ class StableDiffusionInpaintPipelineLegacy(
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None, mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: Optional[int] = 50, num_inference_steps: Optional[int] = 50,
guidance_scale: Optional[float] = 7.5, guidance_scale: Optional[float] = 7.5,
...@@ -567,11 +567,11 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -567,11 +567,11 @@ class StableDiffusionInpaintPipelineLegacy(
add_predicted_noise: Optional[bool] = False, add_predicted_noise: Optional[bool] = False,
eta: Optional[float] = 0.0, eta: Optional[float] = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
...@@ -583,10 +583,10 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -583,10 +583,10 @@ class StableDiffusionInpaintPipelineLegacy(
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
image (`torch.FloatTensor` or `PIL.Image.Image`): image (`torch.Tensor` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. This is the image whose masked region will be inpainted. process. This is the image whose masked region will be inpainted.
mask_image (`torch.FloatTensor` or `PIL.Image.Image`): mask_image (`torch.Tensor` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be `Image`, or tensor representing an image batch, to mask `image`. White pixels in the mask will be
replaced by noise and therefore repainted, while black pixels will be preserved. If `mask_image` is a replaced by noise and therefore repainted, while black pixels will be preserved. If `mask_image` is a
PIL image, it will be converted to a single channel (luminance) before use. If mask is a tensor, the PIL image, it will be converted to a single channel (luminance) before use. If mask is a tensor, the
...@@ -620,10 +620,10 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -620,10 +620,10 @@ class StableDiffusionInpaintPipelineLegacy(
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -635,7 +635,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -635,7 +635,7 @@ class StableDiffusionInpaintPipelineLegacy(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
...@@ -693,7 +693,7 @@ class StableDiffusionInpaintPipelineLegacy( ...@@ -693,7 +693,7 @@ class StableDiffusionInpaintPipelineLegacy(
prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds])
# 4. Preprocess image and mask # 4. Preprocess image and mask
if not isinstance(image, torch.FloatTensor): if not isinstance(image, torch.Tensor):
image = preprocess_image(image, batch_size) image = preprocess_image(image, batch_size)
mask_image = preprocess_mask(mask_image, batch_size, self.vae_scale_factor) mask_image = preprocess_mask(mask_image, batch_size, self.vae_scale_factor)
......
...@@ -163,8 +163,8 @@ class StableDiffusionModelEditingPipeline( ...@@ -163,8 +163,8 @@ class StableDiffusionModelEditingPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -196,8 +196,8 @@ class StableDiffusionModelEditingPipeline( ...@@ -196,8 +196,8 @@ class StableDiffusionModelEditingPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -217,10 +217,10 @@ class StableDiffusionModelEditingPipeline( ...@@ -217,10 +217,10 @@ class StableDiffusionModelEditingPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -620,12 +620,12 @@ class StableDiffusionModelEditingPipeline( ...@@ -620,12 +620,12 @@ class StableDiffusionModelEditingPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
...@@ -657,14 +657,14 @@ class StableDiffusionModelEditingPipeline( ...@@ -657,14 +657,14 @@ class StableDiffusionModelEditingPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
...@@ -674,7 +674,7 @@ class StableDiffusionModelEditingPipeline( ...@@ -674,7 +674,7 @@ class StableDiffusionModelEditingPipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -154,8 +154,8 @@ class StableDiffusionParadigmsPipeline( ...@@ -154,8 +154,8 @@ class StableDiffusionParadigmsPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -187,8 +187,8 @@ class StableDiffusionParadigmsPipeline( ...@@ -187,8 +187,8 @@ class StableDiffusionParadigmsPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -208,10 +208,10 @@ class StableDiffusionParadigmsPipeline( ...@@ -208,10 +208,10 @@ class StableDiffusionParadigmsPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -492,12 +492,12 @@ class StableDiffusionParadigmsPipeline( ...@@ -492,12 +492,12 @@ class StableDiffusionParadigmsPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
debug: bool = False, debug: bool = False,
...@@ -537,14 +537,14 @@ class StableDiffusionParadigmsPipeline( ...@@ -537,14 +537,14 @@ class StableDiffusionParadigmsPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
...@@ -554,7 +554,7 @@ class StableDiffusionParadigmsPipeline( ...@@ -554,7 +554,7 @@ class StableDiffusionParadigmsPipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -60,14 +60,14 @@ class Pix2PixInversionPipelineOutput(BaseOutput, TextualInversionLoaderMixin): ...@@ -60,14 +60,14 @@ class Pix2PixInversionPipelineOutput(BaseOutput, TextualInversionLoaderMixin):
Output class for Stable Diffusion pipelines. Output class for Stable Diffusion pipelines.
Args: Args:
latents (`torch.FloatTensor`) latents (`torch.Tensor`)
inverted latents tensor inverted latents tensor
images (`List[PIL.Image.Image]` or `np.ndarray`) images (`List[PIL.Image.Image]` or `np.ndarray`)
List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width, List of denoised PIL images of length `batch_size` or numpy array of shape `(batch_size, height, width,
num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline. num_channels)`. PIL images or numpy array present the denoised images of the diffusion pipeline.
""" """
latents: torch.FloatTensor latents: torch.Tensor
images: Union[List[PIL.Image.Image], np.ndarray] images: Union[List[PIL.Image.Image], np.ndarray]
...@@ -377,8 +377,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -377,8 +377,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -410,8 +410,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -410,8 +410,8 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -431,10 +431,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -431,10 +431,10 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -707,7 +707,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -707,7 +707,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
return (embs_target.mean(0) - embs_source.mean(0)).unsqueeze(0) return (embs_target.mean(0) - embs_source.mean(0)).unsqueeze(0)
@torch.no_grad() @torch.no_grad()
def get_embeds(self, prompt: List[str], batch_size: int = 16) -> torch.FloatTensor: def get_embeds(self, prompt: List[str], batch_size: int = 16) -> torch.Tensor:
num_prompts = len(prompt) num_prompts = len(prompt)
embeds = [] embeds = []
for i in range(0, num_prompts, batch_size): for i in range(0, num_prompts, batch_size):
...@@ -827,13 +827,13 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -827,13 +827,13 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
cross_attention_guidance_amount: float = 0.1, cross_attention_guidance_amount: float = 0.1,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: Optional[int] = 1, callback_steps: Optional[int] = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
...@@ -876,14 +876,14 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -876,14 +876,14 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -897,7 +897,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -897,7 +897,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
...@@ -1112,12 +1112,12 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -1112,12 +1112,12 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
num_inference_steps: int = 50, num_inference_steps: int = 50,
guidance_scale: float = 1, guidance_scale: float = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
cross_attention_guidance_amount: float = 0.1, cross_attention_guidance_amount: float = 0.1,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: Optional[int] = 1, callback_steps: Optional[int] = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
lambda_auto_corr: float = 20.0, lambda_auto_corr: float = 20.0,
...@@ -1132,7 +1132,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -1132,7 +1132,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
image (`torch.FloatTensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`): image (`torch.Tensor` `np.ndarray`, `PIL.Image.Image`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
`Image`, or tensor representing an image batch which will be used for conditioning. Can also accept `Image`, or tensor representing an image batch which will be used for conditioning. Can also accept
image latents as `image`, if passing latents directly, it will not be encoded again. image latents as `image`, if passing latents directly, it will not be encoded again.
num_inference_steps (`int`, *optional*, defaults to 50): num_inference_steps (`int`, *optional*, defaults to 50):
...@@ -1147,11 +1147,11 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -1147,11 +1147,11 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
cross_attention_guidance_amount (`float`, defaults to 0.1): cross_attention_guidance_amount (`float`, defaults to 0.1):
...@@ -1164,7 +1164,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin ...@@ -1164,7 +1164,7 @@ class StableDiffusionPix2PixZeroPipeline(DiffusionPipeline, StableDiffusionMixin
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -1048,7 +1048,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): ...@@ -1048,7 +1048,7 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
def forward( def forward(
self, self,
sample: torch.FloatTensor, sample: torch.Tensor,
timestep: Union[torch.Tensor, float, int], timestep: Union[torch.Tensor, float, int],
encoder_hidden_states: torch.Tensor, encoder_hidden_states: torch.Tensor,
class_labels: Optional[torch.Tensor] = None, class_labels: Optional[torch.Tensor] = None,
...@@ -1066,10 +1066,10 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin): ...@@ -1066,10 +1066,10 @@ class UNetFlatConditionModel(ModelMixin, ConfigMixin):
The [`UNetFlatConditionModel`] forward method. The [`UNetFlatConditionModel`] forward method.
Args: Args:
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
The noisy input tensor with the following shape `(batch, channel, height, width)`. The noisy input tensor with the following shape `(batch, channel, height, width)`.
timestep (`torch.FloatTensor` or `float` or `int`): The number of timesteps to denoise an input. timestep (`torch.Tensor` or `float` or `int`): The number of timesteps to denoise an input.
encoder_hidden_states (`torch.FloatTensor`): encoder_hidden_states (`torch.Tensor`):
The encoder hidden states with shape `(batch, sequence_length, feature_dim)`. The encoder hidden states with shape `(batch, sequence_length, feature_dim)`.
class_labels (`torch.Tensor`, *optional*, defaults to `None`): class_labels (`torch.Tensor`, *optional*, defaults to `None`):
Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings. Optional class labels for conditioning. Their embeddings will be summed with the timestep embeddings.
...@@ -1590,8 +1590,8 @@ class DownBlockFlat(nn.Module): ...@@ -1590,8 +1590,8 @@ class DownBlockFlat(nn.Module):
self.gradient_checkpointing = False self.gradient_checkpointing = False
def forward( def forward(
self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, ...]]:
output_states = () output_states = ()
for resnet in self.resnets: for resnet in self.resnets:
...@@ -1719,14 +1719,14 @@ class CrossAttnDownBlockFlat(nn.Module): ...@@ -1719,14 +1719,14 @@ class CrossAttnDownBlockFlat(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
additional_residuals: Optional[torch.FloatTensor] = None, additional_residuals: Optional[torch.Tensor] = None,
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.Tensor, Tuple[torch.Tensor, ...]]:
output_states = () output_states = ()
blocks = list(zip(self.resnets, self.attentions)) blocks = list(zip(self.resnets, self.attentions))
...@@ -1837,13 +1837,13 @@ class UpBlockFlat(nn.Module): ...@@ -1837,13 +1837,13 @@ class UpBlockFlat(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], res_hidden_states_tuple: Tuple[torch.Tensor, ...],
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
upsample_size: Optional[int] = None, upsample_size: Optional[int] = None,
*args, *args,
**kwargs, **kwargs,
) -> torch.FloatTensor: ) -> torch.Tensor:
if len(args) > 0 or kwargs.get("scale", None) is not None: if len(args) > 0 or kwargs.get("scale", None) is not None:
deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`." deprecation_message = "The `scale` argument is deprecated and will be ignored. Please remove it, as passing it will raise an error in the future. `scale` should directly be passed while calling the underlying pipeline component i.e., via `cross_attention_kwargs`."
deprecate("scale", "1.0.0", deprecation_message) deprecate("scale", "1.0.0", deprecation_message)
...@@ -1994,15 +1994,15 @@ class CrossAttnUpBlockFlat(nn.Module): ...@@ -1994,15 +1994,15 @@ class CrossAttnUpBlockFlat(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], res_hidden_states_tuple: Tuple[torch.Tensor, ...],
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
upsample_size: Optional[int] = None, upsample_size: Optional[int] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
) -> torch.FloatTensor: ) -> torch.Tensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
...@@ -2104,8 +2104,8 @@ class UNetMidBlockFlat(nn.Module): ...@@ -2104,8 +2104,8 @@ class UNetMidBlockFlat(nn.Module):
output_scale_factor (`float`, *optional*, defaults to 1.0): The output scale factor. output_scale_factor (`float`, *optional*, defaults to 1.0): The output scale factor.
Returns: Returns:
`torch.FloatTensor`: The output of the last residual block, which is a tensor of shape `(batch_size, `torch.Tensor`: The output of the last residual block, which is a tensor of shape `(batch_size, in_channels,
in_channels, height, width)`. height, width)`.
""" """
...@@ -2223,7 +2223,7 @@ class UNetMidBlockFlat(nn.Module): ...@@ -2223,7 +2223,7 @@ class UNetMidBlockFlat(nn.Module):
self.attentions = nn.ModuleList(attentions) self.attentions = nn.ModuleList(attentions)
self.resnets = nn.ModuleList(resnets) self.resnets = nn.ModuleList(resnets)
def forward(self, hidden_states: torch.FloatTensor, temb: Optional[torch.FloatTensor] = None) -> torch.FloatTensor: def forward(self, hidden_states: torch.Tensor, temb: Optional[torch.Tensor] = None) -> torch.Tensor:
hidden_states = self.resnets[0](hidden_states, temb) hidden_states = self.resnets[0](hidden_states, temb)
for attn, resnet in zip(self.attentions, self.resnets[1:]): for attn, resnet in zip(self.attentions, self.resnets[1:]):
if attn is not None: if attn is not None:
...@@ -2339,13 +2339,13 @@ class UNetMidBlockFlatCrossAttn(nn.Module): ...@@ -2339,13 +2339,13 @@ class UNetMidBlockFlatCrossAttn(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
) -> torch.FloatTensor: ) -> torch.Tensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
...@@ -2480,13 +2480,13 @@ class UNetMidBlockFlatSimpleCrossAttn(nn.Module): ...@@ -2480,13 +2480,13 @@ class UNetMidBlockFlatSimpleCrossAttn(nn.Module):
def forward( def forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
) -> torch.FloatTensor: ) -> torch.Tensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
......
...@@ -81,7 +81,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -81,7 +81,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
@torch.no_grad() @torch.no_grad()
def image_variation( def image_variation(
self, self,
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
num_inference_steps: int = 50, num_inference_steps: int = 50,
...@@ -90,10 +90,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -90,10 +90,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
r""" r"""
...@@ -123,7 +123,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -123,7 +123,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -134,7 +134,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -134,7 +134,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
...@@ -202,10 +202,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -202,10 +202,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
r""" r"""
...@@ -235,7 +235,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -235,7 +235,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -246,7 +246,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -246,7 +246,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
...@@ -311,10 +311,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -311,10 +311,10 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
r""" r"""
...@@ -344,7 +344,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -344,7 +344,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -355,7 +355,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline): ...@@ -355,7 +355,7 @@ class VersatileDiffusionPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -395,10 +395,10 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -395,10 +395,10 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -429,7 +429,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -429,7 +429,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -439,7 +439,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline): ...@@ -439,7 +439,7 @@ class VersatileDiffusionDualGuidedPipeline(DiffusionPipeline):
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple. Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -197,7 +197,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -197,7 +197,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
and not isinstance(image, list) and not isinstance(image, list)
): ):
raise ValueError( raise ValueError(
"`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is" "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
f" {type(image)}" f" {type(image)}"
) )
...@@ -247,10 +247,10 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -247,10 +247,10 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -281,7 +281,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -281,7 +281,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -292,7 +292,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline): ...@@ -292,7 +292,7 @@ class VersatileDiffusionImageVariationPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -333,10 +333,10 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline): ...@@ -333,10 +333,10 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -367,7 +367,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline): ...@@ -367,7 +367,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
...@@ -378,7 +378,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline): ...@@ -378,7 +378,7 @@ class VersatileDiffusionTextToImagePipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -169,10 +169,10 @@ class VQDiffusionPipeline(DiffusionPipeline): ...@@ -169,10 +169,10 @@ class VQDiffusionPipeline(DiffusionPipeline):
truncation_rate: float = 1.0, truncation_rate: float = 1.0,
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
) -> Union[ImagePipelineOutput, Tuple]: ) -> Union[ImagePipelineOutput, Tuple]:
""" """
...@@ -196,7 +196,7 @@ class VQDiffusionPipeline(DiffusionPipeline): ...@@ -196,7 +196,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor` of shape (batch), *optional*): latents (`torch.Tensor` of shape (batch), *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Must be valid embedding indices.If not provided, a latents tensor will be generated of generation. Must be valid embedding indices.If not provided, a latents tensor will be generated of
completely masked latent pixels. completely masked latent pixels.
...@@ -206,7 +206,7 @@ class VQDiffusionPipeline(DiffusionPipeline): ...@@ -206,7 +206,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple. Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
...@@ -301,7 +301,7 @@ class VQDiffusionPipeline(DiffusionPipeline): ...@@ -301,7 +301,7 @@ class VQDiffusionPipeline(DiffusionPipeline):
return ImagePipelineOutput(images=image) return ImagePipelineOutput(images=image)
def truncate(self, log_p_x_0: torch.FloatTensor, truncation_rate: float) -> torch.FloatTensor: def truncate(self, log_p_x_0: torch.Tensor, truncation_rate: float) -> torch.Tensor:
""" """
Truncates `log_p_x_0` such that for each column vector, the total cumulative probability is `truncation_rate` Truncates `log_p_x_0` such that for each column vector, the total cumulative probability is `truncation_rate`
The lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to The lowest probabilities that would increase the cumulative probability above `truncation_rate` are set to
......
...@@ -154,8 +154,8 @@ class I2VGenXLPipeline( ...@@ -154,8 +154,8 @@ class I2VGenXLPipeline(
device, device,
num_videos_per_prompt, num_videos_per_prompt,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
r""" r"""
...@@ -174,10 +174,10 @@ class I2VGenXLPipeline( ...@@ -174,10 +174,10 @@ class I2VGenXLPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -434,7 +434,7 @@ class I2VGenXLPipeline( ...@@ -434,7 +434,7 @@ class I2VGenXLPipeline(
and not isinstance(image, list) and not isinstance(image, list)
): ):
raise ValueError( raise ValueError(
"`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is" "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
f" {type(image)}" f" {type(image)}"
) )
...@@ -513,9 +513,9 @@ class I2VGenXLPipeline( ...@@ -513,9 +513,9 @@ class I2VGenXLPipeline(
num_videos_per_prompt: Optional[int] = 1, num_videos_per_prompt: Optional[int] = 1,
decode_chunk_size: Optional[int] = 1, decode_chunk_size: Optional[int] = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -527,7 +527,7 @@ class I2VGenXLPipeline( ...@@ -527,7 +527,7 @@ class I2VGenXLPipeline(
Args: Args:
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`. The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.FloatTensor`): image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.Tensor`):
Image or images to guide image generation. If you provide a tensor, it needs to be compatible with Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
[`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json). [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`): height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
...@@ -559,14 +559,14 @@ class I2VGenXLPipeline( ...@@ -559,14 +559,14 @@ class I2VGenXLPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
......
...@@ -233,8 +233,8 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -233,8 +233,8 @@ class KandinskyPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image_embeds: Union[torch.FloatTensor, List[torch.FloatTensor]], image_embeds: Union[torch.Tensor, List[torch.Tensor]],
negative_image_embeds: Union[torch.FloatTensor, List[torch.FloatTensor]], negative_image_embeds: Union[torch.Tensor, List[torch.Tensor]],
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
height: int = 512, height: int = 512,
width: int = 512, width: int = 512,
...@@ -242,9 +242,9 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -242,9 +242,9 @@ class KandinskyPipeline(DiffusionPipeline):
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -254,9 +254,9 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -254,9 +254,9 @@ class KandinskyPipeline(DiffusionPipeline):
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for text prompt, that will be used to condition the image generation. The clip image embeddings for text prompt, that will be used to condition the image generation.
negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for negative text prompt, will be used to condition the image generation. The clip image embeddings for negative text prompt, will be used to condition the image generation.
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
...@@ -279,7 +279,7 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -279,7 +279,7 @@ class KandinskyPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -288,7 +288,7 @@ class KandinskyPipeline(DiffusionPipeline): ...@@ -288,7 +288,7 @@ class KandinskyPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -226,9 +226,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline): ...@@ -226,9 +226,9 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
prior_guidance_scale: float = 4.0, prior_guidance_scale: float = 4.0,
prior_num_inference_steps: int = 25, prior_num_inference_steps: int = 25,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -268,7 +268,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline): ...@@ -268,7 +268,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -277,7 +277,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline): ...@@ -277,7 +277,7 @@ class KandinskyCombinedPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
...@@ -436,7 +436,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline): ...@@ -436,7 +436,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]], image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
num_inference_steps: int = 100, num_inference_steps: int = 100,
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
...@@ -447,9 +447,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline): ...@@ -447,9 +447,9 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
prior_guidance_scale: float = 4.0, prior_guidance_scale: float = 4.0,
prior_num_inference_steps: int = 25, prior_num_inference_steps: int = 25,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -459,7 +459,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline): ...@@ -459,7 +459,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`): image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
again. again.
...@@ -499,7 +499,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline): ...@@ -499,7 +499,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -508,7 +508,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline): ...@@ -508,7 +508,7 @@ class KandinskyImg2ImgCombinedPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
...@@ -677,8 +677,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline): ...@@ -677,8 +677,8 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]], image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
mask_image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]], mask_image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
num_inference_steps: int = 100, num_inference_steps: int = 100,
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
...@@ -688,9 +688,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline): ...@@ -688,9 +688,9 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
prior_guidance_scale: float = 4.0, prior_guidance_scale: float = 4.0,
prior_num_inference_steps: int = 25, prior_num_inference_steps: int = 25,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -700,7 +700,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline): ...@@ -700,7 +700,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`): image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded process. Can also accept image latents as `image`, if passing latents directly, it will not be encoded
again. again.
...@@ -739,7 +739,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline): ...@@ -739,7 +739,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -748,7 +748,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline): ...@@ -748,7 +748,7 @@ class KandinskyInpaintCombinedPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -266,10 +266,10 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline): ...@@ -266,10 +266,10 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
# add_noise method to overwrite the one in schedule because it use a different beta schedule for adding noise vs sampling # add_noise method to overwrite the one in schedule because it use a different beta schedule for adding noise vs sampling
def add_noise( def add_noise(
self, self,
original_samples: torch.FloatTensor, original_samples: torch.Tensor,
noise: torch.FloatTensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
betas = torch.linspace(0.0001, 0.02, 1000, dtype=torch.float32) betas = torch.linspace(0.0001, 0.02, 1000, dtype=torch.float32)
alphas = 1.0 - betas alphas = 1.0 - betas
alphas_cumprod = torch.cumprod(alphas, dim=0) alphas_cumprod = torch.cumprod(alphas, dim=0)
...@@ -295,9 +295,9 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline): ...@@ -295,9 +295,9 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image]], image: Union[torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]],
image_embeds: torch.FloatTensor, image_embeds: torch.Tensor,
negative_image_embeds: torch.FloatTensor, negative_image_embeds: torch.Tensor,
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
height: int = 512, height: int = 512,
width: int = 512, width: int = 512,
...@@ -307,7 +307,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline): ...@@ -307,7 +307,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -317,12 +317,12 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline): ...@@ -317,12 +317,12 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image (`torch.FloatTensor`, `PIL.Image.Image`): image (`torch.Tensor`, `PIL.Image.Image`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. process.
image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for text prompt, that will be used to condition the image generation. The clip image embeddings for text prompt, that will be used to condition the image generation.
negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for negative text prompt, will be used to condition the image generation. The clip image embeddings for negative text prompt, will be used to condition the image generation.
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
...@@ -356,7 +356,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline): ...@@ -356,7 +356,7 @@ class KandinskyImg2ImgPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -398,10 +398,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -398,10 +398,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
mask_image: Union[torch.FloatTensor, PIL.Image.Image, np.ndarray], mask_image: Union[torch.Tensor, PIL.Image.Image, np.ndarray],
image_embeds: torch.FloatTensor, image_embeds: torch.Tensor,
negative_image_embeds: torch.FloatTensor, negative_image_embeds: torch.Tensor,
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
height: int = 512, height: int = 512,
width: int = 512, width: int = 512,
...@@ -409,9 +409,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -409,9 +409,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
return_dict: bool = True, return_dict: bool = True,
): ):
...@@ -421,10 +421,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -421,10 +421,10 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
Args: Args:
prompt (`str` or `List[str]`): prompt (`str` or `List[str]`):
The prompt or prompts to guide the image generation. The prompt or prompts to guide the image generation.
image (`torch.FloatTensor`, `PIL.Image.Image` or `np.ndarray`): image (`torch.Tensor`, `PIL.Image.Image` or `np.ndarray`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. process.
mask_image (`PIL.Image.Image`,`torch.FloatTensor` or `np.ndarray`): mask_image (`PIL.Image.Image`,`torch.Tensor` or `np.ndarray`):
`Image`, or a tensor representing an image batch, to mask `image`. White pixels in the mask will be `Image`, or a tensor representing an image batch, to mask `image`. White pixels in the mask will be
repainted, while black pixels will be preserved. You can pass a pytorch tensor as mask only if the repainted, while black pixels will be preserved. You can pass a pytorch tensor as mask only if the
image you passed is a pytorch tensor, and it should contain one color channel (L) instead of 3, so the image you passed is a pytorch tensor, and it should contain one color channel (L) instead of 3, so the
...@@ -432,9 +432,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -432,9 +432,9 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
image or numpy array, mask should also be a either PIL image or numpy array. If it is a PIL image, it image or numpy array, mask should also be a either PIL image or numpy array. If it is a PIL image, it
will be converted to a single channel (luminance) before use. If it is a nummpy array, the expected will be converted to a single channel (luminance) before use. If it is a nummpy array, the expected
shape is `(H, W)`. shape is `(H, W)`.
image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for text prompt, that will be used to condition the image generation. The clip image embeddings for text prompt, that will be used to condition the image generation.
negative_image_embeds (`torch.FloatTensor` or `List[torch.FloatTensor]`): negative_image_embeds (`torch.Tensor` or `List[torch.Tensor]`):
The clip image embeddings for negative text prompt, will be used to condition the image generation. The clip image embeddings for negative text prompt, will be used to condition the image generation.
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored The prompt or prompts not to guide the image generation. Ignored when not using guidance (i.e., ignored
...@@ -457,7 +457,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -457,7 +457,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -466,7 +466,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline): ...@@ -466,7 +466,7 @@ class KandinskyInpaintPipeline(DiffusionPipeline):
(`np.array`) or `"pt"` (`torch.Tensor`). (`np.array`) or `"pt"` (`torch.Tensor`).
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -115,14 +115,14 @@ class KandinskyPriorPipelineOutput(BaseOutput): ...@@ -115,14 +115,14 @@ class KandinskyPriorPipelineOutput(BaseOutput):
Output class for KandinskyPriorPipeline. Output class for KandinskyPriorPipeline.
Args: Args:
image_embeds (`torch.FloatTensor`) image_embeds (`torch.Tensor`)
clip image embeddings for text prompt clip image embeddings for text prompt
negative_image_embeds (`List[PIL.Image.Image]` or `np.ndarray`) negative_image_embeds (`List[PIL.Image.Image]` or `np.ndarray`)
clip image embeddings for unconditional tokens clip image embeddings for unconditional tokens
""" """
image_embeds: Union[torch.FloatTensor, np.ndarray] image_embeds: Union[torch.Tensor, np.ndarray]
negative_image_embeds: Union[torch.FloatTensor, np.ndarray] negative_image_embeds: Union[torch.Tensor, np.ndarray]
class KandinskyPriorPipeline(DiffusionPipeline): class KandinskyPriorPipeline(DiffusionPipeline):
...@@ -173,12 +173,12 @@ class KandinskyPriorPipeline(DiffusionPipeline): ...@@ -173,12 +173,12 @@ class KandinskyPriorPipeline(DiffusionPipeline):
@replace_example_docstring(EXAMPLE_INTERPOLATE_DOC_STRING) @replace_example_docstring(EXAMPLE_INTERPOLATE_DOC_STRING)
def interpolate( def interpolate(
self, self,
images_and_prompts: List[Union[str, PIL.Image.Image, torch.FloatTensor]], images_and_prompts: List[Union[str, PIL.Image.Image, torch.Tensor]],
weights: List[float], weights: List[float],
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
num_inference_steps: int = 25, num_inference_steps: int = 25,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
negative_prior_prompt: Optional[str] = None, negative_prior_prompt: Optional[str] = None,
negative_prompt: str = "", negative_prompt: str = "",
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
...@@ -188,7 +188,7 @@ class KandinskyPriorPipeline(DiffusionPipeline): ...@@ -188,7 +188,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
Function invoked when using the prior pipeline for interpolation. Function invoked when using the prior pipeline for interpolation.
Args: Args:
images_and_prompts (`List[Union[str, PIL.Image.Image, torch.FloatTensor]]`): images_and_prompts (`List[Union[str, PIL.Image.Image, torch.Tensor]]`):
list of prompts and images to guide the image generation. list of prompts and images to guide the image generation.
weights: (`List[float]`): weights: (`List[float]`):
list of weights for each condition in `images_and_prompts` list of weights for each condition in `images_and_prompts`
...@@ -200,7 +200,7 @@ class KandinskyPriorPipeline(DiffusionPipeline): ...@@ -200,7 +200,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -403,7 +403,7 @@ class KandinskyPriorPipeline(DiffusionPipeline): ...@@ -403,7 +403,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
num_images_per_prompt: int = 1, num_images_per_prompt: int = 1,
num_inference_steps: int = 25, num_inference_steps: int = 25,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
guidance_scale: float = 4.0, guidance_scale: float = 4.0,
output_type: Optional[str] = "pt", output_type: Optional[str] = "pt",
return_dict: bool = True, return_dict: bool = True,
...@@ -425,7 +425,7 @@ class KandinskyPriorPipeline(DiffusionPipeline): ...@@ -425,7 +425,7 @@ class KandinskyPriorPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment