"vscode:/vscode.git/clone" did not exist on "2ff3006c35b3f637dc1fc63774a997e7172999f3"
Unverified Commit be4afa0b authored by Mark Van Aken's avatar Mark Van Aken Committed by GitHub
Browse files

#7535 Update FloatTensor type hints to Tensor (#7883)

* find & replace all FloatTensors to Tensor

* apply formatting

* Update torch.FloatTensor to torch.Tensor in the remaining files

* formatting

* Fix the rest of the places where FloatTensor is used as well as in documentation

* formatting

* Update new file from FloatTensor to Tensor
parent 04f4bd54
...@@ -192,8 +192,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -192,8 +192,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -211,10 +211,10 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -211,10 +211,10 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead. `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -481,7 +481,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -481,7 +481,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
and not isinstance(image, list) and not isinstance(image, list)
): ):
raise ValueError( raise ValueError(
"`image` has to be of type `torch.FloatTensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is" "`image` has to be of type `torch.Tensor` or `PIL.Image.Image` or `List[PIL.Image.Image]` but is"
f" {type(image)}" f" {type(image)}"
) )
...@@ -595,8 +595,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -595,8 +595,8 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
@replace_example_docstring(EXAMPLE_DOC_STRING) @replace_example_docstring(EXAMPLE_DOC_STRING)
def __call__( def __call__(
self, self,
input_imgs: Union[torch.FloatTensor, PIL.Image.Image] = None, input_imgs: Union[torch.Tensor, PIL.Image.Image] = None,
prompt_imgs: Union[torch.FloatTensor, PIL.Image.Image] = None, prompt_imgs: Union[torch.Tensor, PIL.Image.Image] = None,
poses: Union[List[float], List[List[float]]] = None, poses: Union[List[float], List[List[float]]] = None,
torch_dtype=torch.float32, torch_dtype=torch.float32,
height: Optional[int] = None, height: Optional[int] = None,
...@@ -607,12 +607,12 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -607,12 +607,12 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: float = 1.0, controlnet_conditioning_scale: float = 1.0,
...@@ -650,14 +650,14 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -650,14 +650,14 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -669,7 +669,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -669,7 +669,7 @@ class Zero1to3StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -104,7 +104,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline): ...@@ -104,7 +104,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
rp_args: Dict[str, str] = None, rp_args: Dict[str, str] = None,
...@@ -168,7 +168,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline): ...@@ -168,7 +168,7 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline):
orig_hw = (height, width) orig_hw = (height, width)
revers = True revers = True
def pcallback(s_self, step: int, timestep: int, latents: torch.FloatTensor, selfs=None): def pcallback(s_self, step: int, timestep: int, latents: torch.Tensor, selfs=None):
if "PRO" in mode: # in Prompt mode, make masks from sum of attension maps if "PRO" in mode: # in Prompt mode, make masks from sum of attension maps
self.step = step self.step = step
...@@ -198,10 +198,10 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline): ...@@ -198,10 +198,10 @@ class RegionalPromptingStableDiffusionPipeline(StableDiffusionPipeline):
def hook_forward(module): def hook_forward(module):
# diffusers==0.23.2 # diffusers==0.23.2
def forward( def forward(
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
scale: float = 1.0, scale: float = 1.0,
) -> torch.Tensor: ) -> torch.Tensor:
attn = module attn = module
......
...@@ -142,12 +142,12 @@ class TextToVideoSDPipelineOutput(BaseOutput): ...@@ -142,12 +142,12 @@ class TextToVideoSDPipelineOutput(BaseOutput):
Output class for text-to-video pipelines. Output class for text-to-video pipelines.
Args: Args:
frames (`List[np.ndarray]` or `torch.FloatTensor`) frames (`List[np.ndarray]` or `torch.Tensor`)
List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as List of denoised frames (essentially images) as NumPy arrays of shape `(height, width, num_channels)` or as
a `torch` tensor. The length of the list denotes the video length (the number of frames). a `torch` tensor. The length of the list denotes the video length (the number of frames).
""" """
frames: Union[List[np.ndarray], torch.FloatTensor] frames: Union[List[np.ndarray], torch.Tensor]
@torch.no_grad() @torch.no_grad()
...@@ -589,20 +589,20 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline): ...@@ -589,20 +589,20 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
frames: Union[List[np.ndarray], torch.FloatTensor] = None, frames: Union[List[np.ndarray], torch.Tensor] = None,
control_frames: Union[List[np.ndarray], torch.FloatTensor] = None, control_frames: Union[List[np.ndarray], torch.Tensor] = None,
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: int = 50, num_inference_steps: int = 50,
guidance_scale: float = 7.5, guidance_scale: float = 7.5,
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 0.8, controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
...@@ -624,8 +624,8 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline): ...@@ -624,8 +624,8 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
frames (`List[np.ndarray]` or `torch.FloatTensor`): The input images to be used as the starting point for the image generation process. frames (`List[np.ndarray]` or `torch.Tensor`): The input images to be used as the starting point for the image generation process.
control_frames (`List[np.ndarray]` or `torch.FloatTensor`): The ControlNet input images condition to provide guidance to the `unet` for generation. control_frames (`List[np.ndarray]` or `torch.Tensor`): The ControlNet input images condition to provide guidance to the `unet` for generation.
strength ('float'): SDEdit strength. strength ('float'): SDEdit strength.
num_inference_steps (`int`, *optional*, defaults to 50): num_inference_steps (`int`, *optional*, defaults to 50):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the The number of denoising steps. More denoising steps usually lead to a higher quality image at the
...@@ -646,14 +646,14 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline): ...@@ -646,14 +646,14 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -665,7 +665,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline): ...@@ -665,7 +665,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -507,18 +507,18 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -507,18 +507,18 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
fp16: bool = True, fp16: bool = True,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[ image: Union[
torch.FloatTensor, torch.Tensor,
PIL.Image.Image, PIL.Image.Image,
np.ndarray, np.ndarray,
List[torch.FloatTensor], List[torch.Tensor],
List[PIL.Image.Image], List[PIL.Image.Image],
List[np.ndarray], List[np.ndarray],
] = None, ] = None,
control_image: Union[ control_image: Union[
torch.FloatTensor, torch.Tensor,
PIL.Image.Image, PIL.Image.Image,
np.ndarray, np.ndarray,
List[torch.FloatTensor], List[torch.Tensor],
List[PIL.Image.Image], List[PIL.Image.Image],
List[np.ndarray], List[np.ndarray],
] = None, ] = None,
...@@ -531,12 +531,12 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -531,12 +531,12 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 0.8, controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
...@@ -551,14 +551,14 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -551,14 +551,14 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,: image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`): `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
The initial image will be used as the starting point for the image generation process. Can also accept The initial image will be used as the starting point for the image generation process. Can also accept
image latents as `image`, if passing latents directly, it will not be encoded again. image latents as `image`, if passing latents directly, it will not be encoded again.
control_image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,: control_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`): `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can
also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If
height and/or width are passed, `image` is resized according to them. If multiple ControlNets are height and/or width are passed, `image` is resized according to them. If multiple ControlNets are
specified in init, images must be passed as a list such that each element of the list can be correctly specified in init, images must be passed as a list such that each element of the list can be correctly
...@@ -588,14 +588,14 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -588,14 +588,14 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -607,7 +607,7 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -607,7 +607,7 @@ class OnnxStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -611,18 +611,18 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -611,18 +611,18 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
fp16: bool = True, fp16: bool = True,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[ image: Union[
torch.FloatTensor, torch.Tensor,
PIL.Image.Image, PIL.Image.Image,
np.ndarray, np.ndarray,
List[torch.FloatTensor], List[torch.Tensor],
List[PIL.Image.Image], List[PIL.Image.Image],
List[np.ndarray], List[np.ndarray],
] = None, ] = None,
control_image: Union[ control_image: Union[
torch.FloatTensor, torch.Tensor,
PIL.Image.Image, PIL.Image.Image,
np.ndarray, np.ndarray,
List[torch.FloatTensor], List[torch.Tensor],
List[PIL.Image.Image], List[PIL.Image.Image],
List[np.ndarray], List[np.ndarray],
] = None, ] = None,
...@@ -635,12 +635,12 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -635,12 +635,12 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 0.8, controlnet_conditioning_scale: Union[float, List[float]] = 0.8,
...@@ -655,14 +655,14 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -655,14 +655,14 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,: image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`): `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
The initial image will be used as the starting point for the image generation process. Can also accept The initial image will be used as the starting point for the image generation process. Can also accept
image latents as `image`, if passing latents directly, it will not be encoded again. image latents as `image`, if passing latents directly, it will not be encoded again.
control_image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,: control_image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`): `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can
also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If
height and/or width are passed, `image` is resized according to them. If multiple ControlNets are height and/or width are passed, `image` is resized according to them. If multiple ControlNets are
specified in init, images must be passed as a list such that each element of the list can be correctly specified in init, images must be passed as a list such that each element of the list can be correctly
...@@ -692,14 +692,14 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -692,14 +692,14 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -711,7 +711,7 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline): ...@@ -711,7 +711,7 @@ class TensorRTStableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -34,16 +34,16 @@ class UFOGenSchedulerOutput(BaseOutput): ...@@ -34,16 +34,16 @@ class UFOGenSchedulerOutput(BaseOutput):
Output class for the scheduler's `step` function output. Output class for the scheduler's `step` function output.
Args: Args:
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
denoising loop. denoising loop.
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
The predicted denoised sample `(x_{0})` based on the model output from the current timestep. The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
`pred_original_sample` can be used to preview progress or for guidance. `pred_original_sample` can be used to preview progress or for guidance.
""" """
prev_sample: torch.FloatTensor prev_sample: torch.Tensor
pred_original_sample: Optional[torch.FloatTensor] = None pred_original_sample: Optional[torch.Tensor] = None
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
...@@ -98,11 +98,11 @@ def rescale_zero_terminal_snr(betas): ...@@ -98,11 +98,11 @@ def rescale_zero_terminal_snr(betas):
Args: Args:
betas (`torch.FloatTensor`): betas (`torch.Tensor`):
the betas that the scheduler is being initialized with. the betas that the scheduler is being initialized with.
Returns: Returns:
`torch.FloatTensor`: rescaled betas with zero terminal SNR `torch.Tensor`: rescaled betas with zero terminal SNR
""" """
# Convert betas to alphas_bar_sqrt # Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas alphas = 1.0 - betas
...@@ -240,19 +240,19 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -240,19 +240,19 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
self.num_inference_steps = None self.num_inference_steps = None
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy()) self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy())
def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor: def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
""" """
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
current timestep. current timestep.
Args: Args:
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
The input sample. The input sample.
timestep (`int`, *optional*): timestep (`int`, *optional*):
The current timestep in the diffusion chain. The current timestep in the diffusion chain.
Returns: Returns:
`torch.FloatTensor`: `torch.Tensor`:
A scaled input sample. A scaled input sample.
""" """
return sample return sample
...@@ -340,7 +340,7 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -340,7 +340,7 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
self.timesteps = torch.from_numpy(timesteps).to(device) self.timesteps = torch.from_numpy(timesteps).to(device)
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
...@@ -375,9 +375,9 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -375,9 +375,9 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
def step( def step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timestep: int, timestep: int,
sample: torch.FloatTensor, sample: torch.Tensor,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
return_dict: bool = True, return_dict: bool = True,
) -> Union[UFOGenSchedulerOutput, Tuple]: ) -> Union[UFOGenSchedulerOutput, Tuple]:
...@@ -386,11 +386,11 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -386,11 +386,11 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): model_output (`torch.Tensor`):
The direct output from learned diffusion model. The direct output from learned diffusion model.
timestep (`float`): timestep (`float`):
The current discrete timestep in the diffusion chain. The current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process. A current instance of a sample created by the diffusion process.
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A random number generator. A random number generator.
...@@ -461,10 +461,10 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -461,10 +461,10 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
def add_noise( def add_noise(
self, self,
original_samples: torch.FloatTensor, original_samples: torch.Tensor,
noise: torch.FloatTensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype) alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device) timesteps = timesteps.to(original_samples.device)
...@@ -483,9 +483,7 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin): ...@@ -483,9 +483,7 @@ class UFOGenScheduler(SchedulerMixin, ConfigMixin):
return noisy_samples return noisy_samples
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity( def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device) timesteps = timesteps.to(sample.device)
......
...@@ -286,10 +286,10 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -286,10 +286,10 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -323,7 +323,7 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -323,7 +323,7 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -335,7 +335,7 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -335,7 +335,7 @@ class StableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -81,12 +81,12 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -81,12 +81,12 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
text_embeddings: Optional[torch.FloatTensor] = None, text_embeddings: Optional[torch.Tensor] = None,
**kwargs, **kwargs,
): ):
r""" r"""
...@@ -119,7 +119,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -119,7 +119,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -131,7 +131,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -131,7 +131,7 @@ class SeedResizeStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -76,10 +76,10 @@ class SpeechToImagePipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -76,10 +76,10 @@ class SpeechToImagePipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
......
...@@ -96,10 +96,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -96,10 +96,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -133,10 +133,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -133,10 +133,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -170,10 +170,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -170,10 +170,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -207,10 +207,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -207,10 +207,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -244,10 +244,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -244,10 +244,10 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -276,7 +276,7 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -276,7 +276,7 @@ class StableDiffusionComparisonPipeline(DiffusionPipeline, StableDiffusionMixin)
generator (`torch.Generator`, optional): generator (`torch.Generator`, optional):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, optional): latents (`torch.Tensor`, optional):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
......
...@@ -189,8 +189,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -189,8 +189,8 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -207,10 +207,10 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -207,10 +207,10 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -600,7 +600,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -600,7 +600,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[torch.Tensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
controlnet_conditioning_image: Union[ controlnet_conditioning_image: Union[
torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image] torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]
] = None, ] = None,
strength: float = 0.8, strength: float = 0.8,
height: Optional[int] = None, height: Optional[int] = None,
...@@ -611,12 +611,12 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -611,12 +611,12 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 1.0, controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
...@@ -633,9 +633,9 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -633,9 +633,9 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
image (`torch.Tensor` or `PIL.Image.Image`): image (`torch.Tensor` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will `Image`, or tensor representing an image batch which will be inpainted, *i.e.* parts of the image will
be masked out with `mask_image` and repainted according to `prompt`. be masked out with `mask_image` and repainted according to `prompt`.
controlnet_conditioning_image (`torch.FloatTensor`, `PIL.Image.Image`, `List[torch.FloatTensor]` or `List[PIL.Image.Image]`): controlnet_conditioning_image (`torch.Tensor`, `PIL.Image.Image`, `List[torch.Tensor]` or `List[PIL.Image.Image]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. PIL.Image.Image` can
also be accepted as an image. The control image is automatically resized to fit the output image. also be accepted as an image. The control image is automatically resized to fit the output image.
strength (`float`, *optional*): strength (`float`, *optional*):
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image` Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
...@@ -667,14 +667,14 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -667,14 +667,14 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -686,7 +686,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio ...@@ -686,7 +686,7 @@ class StableDiffusionControlNetImg2ImgPipeline(DiffusionPipeline, StableDiffusio
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -288,8 +288,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -288,8 +288,8 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -306,10 +306,10 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -306,10 +306,10 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds` instead. The prompt or prompts not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds` instead.
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -744,7 +744,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -744,7 +744,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
image: Union[torch.Tensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
mask_image: Union[torch.Tensor, PIL.Image.Image] = None, mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
controlnet_conditioning_image: Union[ controlnet_conditioning_image: Union[
torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image] torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]
] = None, ] = None,
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
...@@ -754,12 +754,12 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -754,12 +754,12 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 1.0, controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
...@@ -779,9 +779,9 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -779,9 +779,9 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L) to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`. instead of 3, so the expected shape would be `(B, H, W, 1)`.
controlnet_conditioning_image (`torch.FloatTensor`, `PIL.Image.Image`, `List[torch.FloatTensor]` or `List[PIL.Image.Image]`): controlnet_conditioning_image (`torch.Tensor`, `PIL.Image.Image`, `List[torch.Tensor]` or `List[PIL.Image.Image]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. PIL.Image.Image` can
also be accepted as an image. The control image is automatically resized to fit the output image. also be accepted as an image. The control image is automatically resized to fit the output image.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. The height in pixels of the generated image.
...@@ -807,14 +807,14 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -807,14 +807,14 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -826,7 +826,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio ...@@ -826,7 +826,7 @@ class StableDiffusionControlNetInpaintPipeline(DiffusionPipeline, StableDiffusio
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -273,8 +273,8 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -273,8 +273,8 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -291,10 +291,10 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -291,10 +291,10 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
negative_prompt (`str` or `List[str]`, *optional*): negative_prompt (`str` or `List[str]`, *optional*):
The prompt or prompts not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds` instead. The prompt or prompts not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds` instead.
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -731,7 +731,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -731,7 +731,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
image: Union[torch.Tensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
mask_image: Union[torch.Tensor, PIL.Image.Image] = None, mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
controlnet_conditioning_image: Union[ controlnet_conditioning_image: Union[
torch.FloatTensor, PIL.Image.Image, List[torch.FloatTensor], List[PIL.Image.Image] torch.Tensor, PIL.Image.Image, List[torch.Tensor], List[PIL.Image.Image]
] = None, ] = None,
strength: float = 0.8, strength: float = 0.8,
height: Optional[int] = None, height: Optional[int] = None,
...@@ -742,12 +742,12 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -742,12 +742,12 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: float = 1.0, controlnet_conditioning_scale: float = 1.0,
...@@ -767,9 +767,9 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -767,9 +767,9 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted repainted, while black pixels will be preserved. If `mask_image` is a PIL image, it will be converted
to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L) to a single channel (luminance) before use. If it's a tensor, it should contain one color channel (L)
instead of 3, so the expected shape would be `(B, H, W, 1)`. instead of 3, so the expected shape would be `(B, H, W, 1)`.
controlnet_conditioning_image (`torch.FloatTensor`, `PIL.Image.Image`, `List[torch.FloatTensor]` or `List[PIL.Image.Image]`): controlnet_conditioning_image (`torch.Tensor`, `PIL.Image.Image`, `List[torch.Tensor]` or `List[PIL.Image.Image]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. PIL.Image.Image` can
also be accepted as an image. The control image is automatically resized to fit the output image. also be accepted as an image. The control image is automatically resized to fit the output image.
strength (`float`, *optional*): strength (`float`, *optional*):
Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image` Conceptually, indicates how much to transform the reference `image`. Must be between 0 and 1. `image`
...@@ -801,14 +801,14 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -801,14 +801,14 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -820,7 +820,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD ...@@ -820,7 +820,7 @@ class StableDiffusionControlNetInpaintImg2ImgPipeline(DiffusionPipeline, StableD
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -100,14 +100,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -100,14 +100,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[ image: Union[
torch.FloatTensor, torch.Tensor,
PIL.Image.Image, PIL.Image.Image,
np.ndarray, np.ndarray,
List[torch.FloatTensor], List[torch.Tensor],
List[PIL.Image.Image], List[PIL.Image.Image],
List[np.ndarray], List[np.ndarray],
] = None, ] = None,
ref_image: Union[torch.FloatTensor, PIL.Image.Image] = None, ref_image: Union[torch.Tensor, PIL.Image.Image] = None,
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
num_inference_steps: int = 50, num_inference_steps: int = 50,
...@@ -116,12 +116,12 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -116,12 +116,12 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
controlnet_conditioning_scale: Union[float, List[float]] = 1.0, controlnet_conditioning_scale: Union[float, List[float]] = 1.0,
...@@ -139,17 +139,17 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -139,17 +139,17 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
image (`torch.FloatTensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.FloatTensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,: image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, `List[np.ndarray]`,:
`List[List[torch.FloatTensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`): `List[List[torch.Tensor]]`, `List[List[np.ndarray]]` or `List[List[PIL.Image.Image]]`):
The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If The ControlNet input condition. ControlNet uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to ControlNet as is. `PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to ControlNet as is. `PIL.Image.Image` can
also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If also be accepted as an image. The dimensions of the output image defaults to `image`'s dimensions. If
height and/or width are passed, `image` is resized according to them. If multiple ControlNets are height and/or width are passed, `image` is resized according to them. If multiple ControlNets are
specified in init, images must be passed as a list such that each element of the list can be correctly specified in init, images must be passed as a list such that each element of the list can be correctly
batched for input to a single controlnet. batched for input to a single controlnet.
ref_image (`torch.FloatTensor`, `PIL.Image.Image`): ref_image (`torch.Tensor`, `PIL.Image.Image`):
The Reference Control input condition. Reference Control uses this input condition to generate guidance to Unet. If The Reference Control input condition. Reference Control uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to Reference Control as is. `PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to Reference Control as is. `PIL.Image.Image` can
also be accepted as an image. also be accepted as an image.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. The height in pixels of the generated image.
...@@ -176,14 +176,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -176,14 +176,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -195,7 +195,7 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -195,7 +195,7 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
...@@ -374,10 +374,10 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -374,10 +374,10 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
def hacked_basic_transformer_inner_forward( def hacked_basic_transformer_inner_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
timestep: Optional[torch.LongTensor] = None, timestep: Optional[torch.LongTensor] = None,
cross_attention_kwargs: Dict[str, Any] = None, cross_attention_kwargs: Dict[str, Any] = None,
class_labels: Optional[torch.LongTensor] = None, class_labels: Optional[torch.LongTensor] = None,
...@@ -492,12 +492,12 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -492,12 +492,12 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
def hack_CrossAttnDownBlock2D_forward( def hack_CrossAttnDownBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
): ):
eps = 1e-6 eps = 1e-6
...@@ -588,14 +588,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli ...@@ -588,14 +588,14 @@ class StableDiffusionControlNetReferencePipeline(StableDiffusionControlNetPipeli
def hacked_CrossAttnUpBlock2D_forward( def hacked_CrossAttnUpBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], res_hidden_states_tuple: Tuple[torch.Tensor, ...],
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
upsample_size: Optional[int] = None, upsample_size: Optional[int] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
): ):
eps = 1e-6 eps = 1e-6
# TODO(Patrick, William) - attention mask is not used # TODO(Patrick, William) - attention mask is not used
......
...@@ -311,8 +311,8 @@ class StableDiffusionIPEXPipeline( ...@@ -311,8 +311,8 @@ class StableDiffusionIPEXPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -330,10 +330,10 @@ class StableDiffusionIPEXPipeline( ...@@ -330,10 +330,10 @@ class StableDiffusionIPEXPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead. `negative_prompt_embeds`. instead. If not defined, one has to pass `negative_prompt_embeds`. instead.
Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`). Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -567,12 +567,12 @@ class StableDiffusionIPEXPipeline( ...@@ -567,12 +567,12 @@ class StableDiffusionIPEXPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
): ):
...@@ -608,14 +608,14 @@ class StableDiffusionIPEXPipeline( ...@@ -608,14 +608,14 @@ class StableDiffusionIPEXPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -627,7 +627,7 @@ class StableDiffusionIPEXPipeline( ...@@ -627,7 +627,7 @@ class StableDiffusionIPEXPipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -99,8 +99,8 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -99,8 +99,8 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
def inpaint( def inpaint(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
mask_image: Union[torch.FloatTensor, PIL.Image.Image], mask_image: Union[torch.Tensor, PIL.Image.Image],
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: Optional[int] = 50, num_inference_steps: Optional[int] = 50,
guidance_scale: Optional[float] = 7.5, guidance_scale: Optional[float] = 7.5,
...@@ -110,7 +110,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -110,7 +110,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
# For more information on how this function works, please see: https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion#diffusers.StableDiffusionImg2ImgPipeline # For more information on how this function works, please see: https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion#diffusers.StableDiffusionImg2ImgPipeline
...@@ -134,7 +134,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -134,7 +134,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
def img2img( def img2img(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: Optional[int] = 50, num_inference_steps: Optional[int] = 50,
guidance_scale: Optional[float] = 7.5, guidance_scale: Optional[float] = 7.5,
...@@ -144,7 +144,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -144,7 +144,7 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -177,10 +177,10 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -177,10 +177,10 @@ class StableDiffusionMegaPipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
# For more information on how this function https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion#diffusers.StableDiffusionPipeline # For more information on how this function https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion#diffusers.StableDiffusionPipeline
......
...@@ -268,10 +268,10 @@ class StableDiffusionReferencePipeline( ...@@ -268,10 +268,10 @@ class StableDiffusionReferencePipeline(
width: int, width: int,
callback_steps: Optional[int], callback_steps: Optional[int],
negative_prompt: Optional[str] = None, negative_prompt: Optional[str] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
ip_adapter_image: Optional[torch.Tensor] = None, ip_adapter_image: Optional[torch.Tensor] = None,
ip_adapter_image_embeds: Optional[torch.FloatTensor] = None, ip_adapter_image_embeds: Optional[torch.Tensor] = None,
callback_on_step_end_tensor_inputs: Optional[List[str]] = None, callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
) -> None: ) -> None:
""" """
...@@ -283,10 +283,10 @@ class StableDiffusionReferencePipeline( ...@@ -283,10 +283,10 @@ class StableDiffusionReferencePipeline(
width (int): The width of the input image. width (int): The width of the input image.
callback_steps (Optional[int]): The number of steps to perform the callback on. callback_steps (Optional[int]): The number of steps to perform the callback on.
negative_prompt (Optional[str]): The negative prompt text. negative_prompt (Optional[str]): The negative prompt text.
prompt_embeds (Optional[torch.FloatTensor]): The prompt embeddings. prompt_embeds (Optional[torch.Tensor]): The prompt embeddings.
negative_prompt_embeds (Optional[torch.FloatTensor]): The negative prompt embeddings. negative_prompt_embeds (Optional[torch.Tensor]): The negative prompt embeddings.
ip_adapter_image (Optional[torch.Tensor]): The input adapter image. ip_adapter_image (Optional[torch.Tensor]): The input adapter image.
ip_adapter_image_embeds (Optional[torch.FloatTensor]): The input adapter image embeddings. ip_adapter_image_embeds (Optional[torch.Tensor]): The input adapter image embeddings.
callback_on_step_end_tensor_inputs (Optional[List[str]]): The list of tensor inputs to perform the callback on. callback_on_step_end_tensor_inputs (Optional[List[str]]): The list of tensor inputs to perform the callback on.
Raises: Raises:
...@@ -357,11 +357,11 @@ class StableDiffusionReferencePipeline( ...@@ -357,11 +357,11 @@ class StableDiffusionReferencePipeline(
num_images_per_prompt: int, num_images_per_prompt: int,
do_classifier_free_guidance: bool, do_classifier_free_guidance: bool,
negative_prompt: Optional[Union[str, List[str]]] = None, negative_prompt: Optional[Union[str, List[str]]] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
) -> torch.FloatTensor: ) -> torch.Tensor:
r""" r"""
Encodes the prompt into embeddings. Encodes the prompt into embeddings.
...@@ -371,13 +371,13 @@ class StableDiffusionReferencePipeline( ...@@ -371,13 +371,13 @@ class StableDiffusionReferencePipeline(
num_images_per_prompt (int): The number of images per prompt. num_images_per_prompt (int): The number of images per prompt.
do_classifier_free_guidance (bool): Whether to use classifier-free guidance. do_classifier_free_guidance (bool): Whether to use classifier-free guidance.
negative_prompt (Optional[Union[str, List[str]]], optional): The negative prompt text or a list of negative prompt texts. Defaults to None. negative_prompt (Optional[Union[str, List[str]]], optional): The negative prompt text or a list of negative prompt texts. Defaults to None.
prompt_embeds (Optional[torch.FloatTensor], optional): The prompt embeddings. Defaults to None. prompt_embeds (Optional[torch.Tensor], optional): The prompt embeddings. Defaults to None.
negative_prompt_embeds (Optional[torch.FloatTensor], optional): The negative prompt embeddings. Defaults to None. negative_prompt_embeds (Optional[torch.Tensor], optional): The negative prompt embeddings. Defaults to None.
lora_scale (Optional[float], optional): The LoRA scale. Defaults to None. lora_scale (Optional[float], optional): The LoRA scale. Defaults to None.
**kwargs: Additional keyword arguments. **kwargs: Additional keyword arguments.
Returns: Returns:
torch.FloatTensor: The encoded prompt embeddings. torch.Tensor: The encoded prompt embeddings.
""" """
deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple." deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False) deprecate("_encode_prompt()", "1.0.0", deprecation_message, standard_warn=False)
...@@ -407,11 +407,11 @@ class StableDiffusionReferencePipeline( ...@@ -407,11 +407,11 @@ class StableDiffusionReferencePipeline(
num_images_per_prompt: int, num_images_per_prompt: int,
do_classifier_free_guidance: bool, do_classifier_free_guidance: bool,
negative_prompt: Optional[str] = None, negative_prompt: Optional[str] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
) -> torch.FloatTensor: ) -> torch.Tensor:
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -428,10 +428,10 @@ class StableDiffusionReferencePipeline( ...@@ -428,10 +428,10 @@ class StableDiffusionReferencePipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -813,7 +813,7 @@ class StableDiffusionReferencePipeline( ...@@ -813,7 +813,7 @@ class StableDiffusionReferencePipeline(
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
ref_image: Union[torch.FloatTensor, PIL.Image.Image] = None, ref_image: Union[torch.Tensor, PIL.Image.Image] = None,
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
num_inference_steps: int = 50, num_inference_steps: int = 50,
...@@ -822,12 +822,12 @@ class StableDiffusionReferencePipeline( ...@@ -822,12 +822,12 @@ class StableDiffusionReferencePipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
guidance_rescale: float = 0.0, guidance_rescale: float = 0.0,
...@@ -844,9 +844,9 @@ class StableDiffusionReferencePipeline( ...@@ -844,9 +844,9 @@ class StableDiffusionReferencePipeline(
prompt (`str` or `List[str]`, *optional*): prompt (`str` or `List[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`. The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
instead. instead.
ref_image (`torch.FloatTensor`, `PIL.Image.Image`): ref_image (`torch.Tensor`, `PIL.Image.Image`):
The Reference Control input condition. Reference Control uses this input condition to generate guidance to Unet. If The Reference Control input condition. Reference Control uses this input condition to generate guidance to Unet. If
the type is specified as `Torch.FloatTensor`, it is passed to Reference Control as is. `PIL.Image.Image` can the type is specified as `torch.Tensor`, it is passed to Reference Control as is. `PIL.Image.Image` can
also be accepted as an image. also be accepted as an image.
height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor): height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
The height in pixels of the generated image. The height in pixels of the generated image.
...@@ -873,14 +873,14 @@ class StableDiffusionReferencePipeline( ...@@ -873,14 +873,14 @@ class StableDiffusionReferencePipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -892,7 +892,7 @@ class StableDiffusionReferencePipeline( ...@@ -892,7 +892,7 @@ class StableDiffusionReferencePipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
...@@ -1017,10 +1017,10 @@ class StableDiffusionReferencePipeline( ...@@ -1017,10 +1017,10 @@ class StableDiffusionReferencePipeline(
def hacked_basic_transformer_inner_forward( def hacked_basic_transformer_inner_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
timestep: Optional[torch.LongTensor] = None, timestep: Optional[torch.LongTensor] = None,
cross_attention_kwargs: Dict[str, Any] = None, cross_attention_kwargs: Dict[str, Any] = None,
class_labels: Optional[torch.LongTensor] = None, class_labels: Optional[torch.LongTensor] = None,
...@@ -1135,12 +1135,12 @@ class StableDiffusionReferencePipeline( ...@@ -1135,12 +1135,12 @@ class StableDiffusionReferencePipeline(
def hack_CrossAttnDownBlock2D_forward( def hack_CrossAttnDownBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
): ):
eps = 1e-6 eps = 1e-6
...@@ -1191,10 +1191,10 @@ class StableDiffusionReferencePipeline( ...@@ -1191,10 +1191,10 @@ class StableDiffusionReferencePipeline(
def hacked_DownBlock2D_forward( def hacked_DownBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
**kwargs: Any, **kwargs: Any,
) -> Tuple[torch.FloatTensor, ...]: ) -> Tuple[torch.Tensor, ...]:
eps = 1e-6 eps = 1e-6
output_states = () output_states = ()
...@@ -1236,15 +1236,15 @@ class StableDiffusionReferencePipeline( ...@@ -1236,15 +1236,15 @@ class StableDiffusionReferencePipeline(
def hacked_CrossAttnUpBlock2D_forward( def hacked_CrossAttnUpBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], res_hidden_states_tuple: Tuple[torch.Tensor, ...],
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
upsample_size: Optional[int] = None, upsample_size: Optional[int] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
encoder_attention_mask: Optional[torch.FloatTensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None,
) -> torch.FloatTensor: ) -> torch.Tensor:
eps = 1e-6 eps = 1e-6
# TODO(Patrick, William) - attention mask is not used # TODO(Patrick, William) - attention mask is not used
for i, (resnet, attn) in enumerate(zip(self.resnets, self.attentions)): for i, (resnet, attn) in enumerate(zip(self.resnets, self.attentions)):
...@@ -1292,12 +1292,12 @@ class StableDiffusionReferencePipeline( ...@@ -1292,12 +1292,12 @@ class StableDiffusionReferencePipeline(
def hacked_UpBlock2D_forward( def hacked_UpBlock2D_forward(
self, self,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
res_hidden_states_tuple: Tuple[torch.FloatTensor, ...], res_hidden_states_tuple: Tuple[torch.Tensor, ...],
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
upsample_size: Optional[int] = None, upsample_size: Optional[int] = None,
**kwargs: Any, **kwargs: Any,
) -> torch.FloatTensor: ) -> torch.Tensor:
eps = 1e-6 eps = 1e-6
for i, resnet in enumerate(self.resnets): for i, resnet in enumerate(self.resnets):
# pop res hidden states # pop res hidden states
......
...@@ -285,8 +285,8 @@ class StableDiffusionRepaintPipeline( ...@@ -285,8 +285,8 @@ class StableDiffusionRepaintPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
): ):
r""" r"""
Encodes the prompt into text encoder hidden states. Encodes the prompt into text encoder hidden states.
...@@ -303,10 +303,10 @@ class StableDiffusionRepaintPipeline( ...@@ -303,10 +303,10 @@ class StableDiffusionRepaintPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -606,8 +606,8 @@ class StableDiffusionRepaintPipeline( ...@@ -606,8 +606,8 @@ class StableDiffusionRepaintPipeline(
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None, mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
num_inference_steps: int = 50, num_inference_steps: int = 50,
...@@ -618,12 +618,12 @@ class StableDiffusionRepaintPipeline( ...@@ -618,12 +618,12 @@ class StableDiffusionRepaintPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
): ):
r""" r"""
...@@ -671,14 +671,14 @@ class StableDiffusionRepaintPipeline( ...@@ -671,14 +671,14 @@ class StableDiffusionRepaintPipeline(
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html) One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
to make generation deterministic. to make generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -690,7 +690,7 @@ class StableDiffusionRepaintPipeline( ...@@ -690,7 +690,7 @@ class StableDiffusionRepaintPipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -962,7 +962,7 @@ class TensorRTStableDiffusionImg2ImgPipeline(StableDiffusionImg2ImgPipeline): ...@@ -962,7 +962,7 @@ class TensorRTStableDiffusionImg2ImgPipeline(StableDiffusionImg2ImgPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: int = 50, num_inference_steps: int = 50,
guidance_scale: float = 7.5, guidance_scale: float = 7.5,
......
...@@ -962,8 +962,8 @@ class TensorRTStableDiffusionInpaintPipeline(StableDiffusionInpaintPipeline): ...@@ -962,8 +962,8 @@ class TensorRTStableDiffusionInpaintPipeline(StableDiffusionInpaintPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
mask_image: Union[torch.FloatTensor, PIL.Image.Image] = None, mask_image: Union[torch.Tensor, PIL.Image.Image] = None,
strength: float = 1.0, strength: float = 1.0,
num_inference_steps: int = 50, num_inference_steps: int = 50,
guidance_scale: float = 7.5, guidance_scale: float = 7.5,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment