"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "769f0be8fb41daca9f3cbcffcfd0dbf01cc194b8"
Unverified Commit be4afa0b authored by Mark Van Aken's avatar Mark Van Aken Committed by GitHub
Browse files

#7535 Update FloatTensor type hints to Tensor (#7883)

* find & replace all FloatTensors to Tensor

* apply formatting

* Update torch.FloatTensor to torch.Tensor in the remaining files

* formatting

* Fix the rest of the places where FloatTensor is used as well as in documentation

* formatting

* Update new file from FloatTensor to Tensor
parent 04f4bd54
...@@ -242,10 +242,10 @@ Here's an example of a tuple return, comprising several objects: ...@@ -242,10 +242,10 @@ Here's an example of a tuple return, comprising several objects:
``` ```
Returns: Returns:
`tuple(torch.FloatTensor)` comprising various elements depending on the configuration ([`BertConfig`]) and inputs: `tuple(torch.Tensor)` comprising various elements depending on the configuration ([`BertConfig`]) and inputs:
- ** loss** (*optional*, returned when `masked_lm_labels` is provided) `torch.FloatTensor` of shape `(1,)` -- - ** loss** (*optional*, returned when `masked_lm_labels` is provided) `torch.Tensor` of shape `(1,)` --
Total loss is the sum of the masked language modeling loss and the next sequence prediction (classification) loss. Total loss is the sum of the masked language modeling loss and the next sequence prediction (classification) loss.
- **prediction_scores** (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`) -- - **prediction_scores** (`torch.Tensor` of shape `(batch_size, sequence_length, config.vocab_size)`) --
Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax). Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
``` ```
......
...@@ -261,7 +261,7 @@ from dataclasses import dataclass ...@@ -261,7 +261,7 @@ from dataclasses import dataclass
@dataclass @dataclass
class UNet2DConditionOutput: class UNet2DConditionOutput:
sample: torch.FloatTensor sample: torch.Tensor
pipe = StableDiffusionPipeline.from_pretrained( pipe = StableDiffusionPipeline.from_pretrained(
......
...@@ -339,7 +339,7 @@ from dataclasses import dataclass ...@@ -339,7 +339,7 @@ from dataclasses import dataclass
@dataclass @dataclass
class UNet2DConditionOutput: class UNet2DConditionOutput:
sample: torch.FloatTensor sample: torch.Tensor
pipe = StableDiffusionPipeline.from_pretrained( pipe = StableDiffusionPipeline.from_pretrained(
......
...@@ -44,9 +44,9 @@ def bits_to_decimal(x, bits=BITS): ...@@ -44,9 +44,9 @@ def bits_to_decimal(x, bits=BITS):
# modified scheduler step functions for clamping the predicted x_0 between -bit_scale and +bit_scale # modified scheduler step functions for clamping the predicted x_0 between -bit_scale and +bit_scale
def ddim_bit_scheduler_step( def ddim_bit_scheduler_step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timestep: int, timestep: int,
sample: torch.FloatTensor, sample: torch.Tensor,
eta: float = 0.0, eta: float = 0.0,
use_clipped_model_output: bool = True, use_clipped_model_output: bool = True,
generator=None, generator=None,
...@@ -56,9 +56,9 @@ def ddim_bit_scheduler_step( ...@@ -56,9 +56,9 @@ def ddim_bit_scheduler_step(
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): direct output from learned diffusion model. model_output (`torch.Tensor`): direct output from learned diffusion model.
timestep (`int`): current discrete timestep in the diffusion chain. timestep (`int`): current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
current instance of sample being created by diffusion process. current instance of sample being created by diffusion process.
eta (`float`): weight of noise for added noise in diffusion step. eta (`float`): weight of noise for added noise in diffusion step.
use_clipped_model_output (`bool`): TODO use_clipped_model_output (`bool`): TODO
...@@ -134,9 +134,9 @@ def ddim_bit_scheduler_step( ...@@ -134,9 +134,9 @@ def ddim_bit_scheduler_step(
def ddpm_bit_scheduler_step( def ddpm_bit_scheduler_step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timestep: int, timestep: int,
sample: torch.FloatTensor, sample: torch.Tensor,
prediction_type="epsilon", prediction_type="epsilon",
generator=None, generator=None,
return_dict: bool = True, return_dict: bool = True,
...@@ -145,9 +145,9 @@ def ddpm_bit_scheduler_step( ...@@ -145,9 +145,9 @@ def ddpm_bit_scheduler_step(
Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): direct output from learned diffusion model. model_output (`torch.Tensor`): direct output from learned diffusion model.
timestep (`int`): current discrete timestep in the diffusion chain. timestep (`int`): current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
current instance of sample being created by diffusion process. current instance of sample being created by diffusion process.
prediction_type (`str`, default `epsilon`): prediction_type (`str`, default `epsilon`):
indicates whether the model predicts the noise (epsilon), or the samples (`sample`). indicates whether the model predicts the noise (epsilon), or the samples (`sample`).
......
...@@ -233,8 +233,8 @@ class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline, StableDiffusionMi ...@@ -233,8 +233,8 @@ class CLIPGuidedImagesMixingStableDiffusion(DiffusionPipeline, StableDiffusionMi
@torch.no_grad() @torch.no_grad()
def __call__( def __call__(
self, self,
style_image: Union[torch.FloatTensor, PIL.Image.Image], style_image: Union[torch.Tensor, PIL.Image.Image],
content_image: Union[torch.FloatTensor, PIL.Image.Image], content_image: Union[torch.Tensor, PIL.Image.Image],
style_prompt: Optional[str] = None, style_prompt: Optional[str] = None,
content_prompt: Optional[str] = None, content_prompt: Optional[str] = None,
height: Optional[int] = 512, height: Optional[int] = 512,
......
...@@ -180,7 +180,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin): ...@@ -180,7 +180,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
num_cutouts: Optional[int] = 4, num_cutouts: Optional[int] = 4,
use_cutouts: Optional[bool] = True, use_cutouts: Optional[bool] = True,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
): ):
......
...@@ -306,7 +306,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin): ...@@ -306,7 +306,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
height: Optional[int] = 512, height: Optional[int] = 512,
width: Optional[int] = 512, width: Optional[int] = 512,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
strength: float = 0.8, strength: float = 0.8,
num_inference_steps: Optional[int] = 50, num_inference_steps: Optional[int] = 50,
guidance_scale: Optional[float] = 7.5, guidance_scale: Optional[float] = 7.5,
...@@ -317,7 +317,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin): ...@@ -317,7 +317,7 @@ class CLIPGuidedStableDiffusion(DiffusionPipeline, StableDiffusionMixin):
num_cutouts: Optional[int] = 4, num_cutouts: Optional[int] = 4,
use_cutouts: Optional[bool] = True, use_cutouts: Optional[bool] = True,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
): ):
......
...@@ -354,10 +354,10 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -354,10 +354,10 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
weights: Optional[str] = "", weights: Optional[str] = "",
): ):
...@@ -391,7 +391,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -391,7 +391,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -403,7 +403,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin) ...@@ -403,7 +403,7 @@ class ComposableStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin)
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -103,7 +103,7 @@ class DDIMNoiseComparativeAnalysisPipeline(DiffusionPipeline): ...@@ -103,7 +103,7 @@ class DDIMNoiseComparativeAnalysisPipeline(DiffusionPipeline):
@torch.no_grad() @torch.no_grad()
def __call__( def __call__(
self, self,
image: Union[torch.FloatTensor, PIL.Image.Image] = None, image: Union[torch.Tensor, PIL.Image.Image] = None,
strength: float = 0.8, strength: float = 0.8,
batch_size: int = 1, batch_size: int = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
...@@ -115,7 +115,7 @@ class DDIMNoiseComparativeAnalysisPipeline(DiffusionPipeline): ...@@ -115,7 +115,7 @@ class DDIMNoiseComparativeAnalysisPipeline(DiffusionPipeline):
) -> Union[ImagePipelineOutput, Tuple]: ) -> Union[ImagePipelineOutput, Tuple]:
r""" r"""
Args: Args:
image (`torch.FloatTensor` or `PIL.Image.Image`): image (`torch.Tensor` or `PIL.Image.Image`):
`Image`, or tensor representing an image batch, that will be used as the starting point for the `Image`, or tensor representing an image batch, that will be used as the starting point for the
process. process.
strength (`float`, *optional*, defaults to 0.8): strength (`float`, *optional*, defaults to 0.8):
......
...@@ -205,7 +205,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -205,7 +205,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
safety_checker: StableDiffusionSafetyChecker, safety_checker: StableDiffusionSafetyChecker,
feature_extractor: CLIPImageProcessor, feature_extractor: CLIPImageProcessor,
language_adapter: TranslatorNoLN = None, language_adapter: TranslatorNoLN = None,
tensor_norm: torch.FloatTensor = None, tensor_norm: torch.Tensor = None,
requires_safety_checker: bool = True, requires_safety_checker: bool = True,
): ):
super().__init__() super().__init__()
...@@ -231,7 +231,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -231,7 +231,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
num_token: int, num_token: int,
dim: int, dim: int,
dim_out: int, dim_out: int,
tensor_norm: torch.FloatTensor, tensor_norm: torch.Tensor,
mult: int = 2, mult: int = 2,
depth: int = 5, depth: int = 5,
): ):
...@@ -242,7 +242,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -242,7 +242,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
) )
self.language_adapter.load_state_dict(torch.load(model_path)) self.language_adapter.load_state_dict(torch.load(model_path))
def _adapt_language(self, prompt_embeds: torch.FloatTensor): def _adapt_language(self, prompt_embeds: torch.Tensor):
prompt_embeds = prompt_embeds / 3 prompt_embeds = prompt_embeds / 3
prompt_embeds = self.language_adapter(prompt_embeds) * (self.tensor_norm / 2) prompt_embeds = self.language_adapter(prompt_embeds) * (self.tensor_norm / 2)
return prompt_embeds return prompt_embeds
...@@ -254,8 +254,8 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -254,8 +254,8 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -275,10 +275,10 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -275,10 +275,10 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -535,7 +535,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -535,7 +535,7 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
data type of the generated embeddings data type of the generated embeddings
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
...@@ -594,9 +594,9 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -594,9 +594,9 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -635,14 +635,14 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo ...@@ -635,14 +635,14 @@ class GlueGenStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin, Lo
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters. ip_adapter_image: (`PipelineImageInput`, *optional*): Optional image input to work with IP Adapters.
......
...@@ -28,10 +28,10 @@ class RASGAttnProcessor: ...@@ -28,10 +28,10 @@ class RASGAttnProcessor:
def __call__( def __call__(
self, self,
attn: Attention, attn: Attention,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
scale: float = 1.0, scale: float = 1.0,
) -> torch.Tensor: ) -> torch.Tensor:
# Same as the default AttnProcessor up untill the part where similarity matrix gets saved # Same as the default AttnProcessor up untill the part where similarity matrix gets saved
...@@ -111,10 +111,10 @@ class PAIntAAttnProcessor: ...@@ -111,10 +111,10 @@ class PAIntAAttnProcessor:
def __call__( def __call__(
self, self,
attn: Attention, attn: Attention,
hidden_states: torch.FloatTensor, hidden_states: torch.Tensor,
encoder_hidden_states: Optional[torch.FloatTensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None,
attention_mask: Optional[torch.FloatTensor] = None, attention_mask: Optional[torch.Tensor] = None,
temb: Optional[torch.FloatTensor] = None, temb: Optional[torch.Tensor] = None,
scale: float = 1.0, scale: float = 1.0,
) -> torch.Tensor: ) -> torch.Tensor:
# Automatically recognize the resolution of the current attention layer and resize the masks accordingly # Automatically recognize the resolution of the current attention layer and resize the masks accordingly
...@@ -454,7 +454,7 @@ class StableDiffusionHDPainterPipeline(StableDiffusionInpaintPipeline): ...@@ -454,7 +454,7 @@ class StableDiffusionHDPainterPipeline(StableDiffusionInpaintPipeline):
prompt: Union[str, List[str]] = None, prompt: Union[str, List[str]] = None,
image: PipelineImageInput = None, image: PipelineImageInput = None,
mask_image: PipelineImageInput = None, mask_image: PipelineImageInput = None,
masked_image_latents: torch.FloatTensor = None, masked_image_latents: torch.Tensor = None,
height: Optional[int] = None, height: Optional[int] = None,
width: Optional[int] = None, width: Optional[int] = None,
padding_mask_crop: Optional[int] = None, padding_mask_crop: Optional[int] = None,
...@@ -467,9 +467,9 @@ class StableDiffusionHDPainterPipeline(StableDiffusionInpaintPipeline): ...@@ -467,9 +467,9 @@ class StableDiffusionHDPainterPipeline(StableDiffusionInpaintPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.01, eta: float = 0.01,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
ip_adapter_image: Optional[PipelineImageInput] = None, ip_adapter_image: Optional[PipelineImageInput] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
......
...@@ -17,21 +17,21 @@ class IADBScheduler(SchedulerMixin, ConfigMixin): ...@@ -17,21 +17,21 @@ class IADBScheduler(SchedulerMixin, ConfigMixin):
def step( def step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timestep: int, timestep: int,
x_alpha: torch.FloatTensor, x_alpha: torch.Tensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
""" """
Predict the sample at the previous timestep by reversing the ODE. Core function to propagate the diffusion Predict the sample at the previous timestep by reversing the ODE. Core function to propagate the diffusion
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): direct output from learned diffusion model. It is the direction from x0 to x1. model_output (`torch.Tensor`): direct output from learned diffusion model. It is the direction from x0 to x1.
timestep (`float`): current timestep in the diffusion chain. timestep (`float`): current timestep in the diffusion chain.
x_alpha (`torch.FloatTensor`): x_alpha sample for the current timestep x_alpha (`torch.Tensor`): x_alpha sample for the current timestep
Returns: Returns:
`torch.FloatTensor`: the sample at the previous timestep `torch.Tensor`: the sample at the previous timestep
""" """
if self.num_inference_steps is None: if self.num_inference_steps is None:
...@@ -53,10 +53,10 @@ class IADBScheduler(SchedulerMixin, ConfigMixin): ...@@ -53,10 +53,10 @@ class IADBScheduler(SchedulerMixin, ConfigMixin):
def add_noise( def add_noise(
self, self,
original_samples: torch.FloatTensor, original_samples: torch.Tensor,
noise: torch.FloatTensor, noise: torch.Tensor,
alpha: torch.FloatTensor, alpha: torch.Tensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
return original_samples * alpha + noise * (1 - alpha) return original_samples * alpha + noise * (1 - alpha)
def __len__(self): def __len__(self):
......
...@@ -110,7 +110,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -110,7 +110,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
def train( def train(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
height: Optional[int] = 512, height: Optional[int] = 512,
width: Optional[int] = 512, width: Optional[int] = 512,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
...@@ -144,7 +144,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -144,7 +144,7 @@ class ImagicStableDiffusionPipeline(DiffusionPipeline, StableDiffusionMixin):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
......
...@@ -133,9 +133,9 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline): ...@@ -133,9 +133,9 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline):
def __call__( def __call__(
self, self,
prompt: Union[str, List[str]], prompt: Union[str, List[str]],
image: Union[torch.FloatTensor, PIL.Image.Image], image: Union[torch.Tensor, PIL.Image.Image],
inner_image: Union[torch.FloatTensor, PIL.Image.Image], inner_image: Union[torch.Tensor, PIL.Image.Image],
mask_image: Union[torch.FloatTensor, PIL.Image.Image], mask_image: Union[torch.Tensor, PIL.Image.Image],
height: int = 512, height: int = 512,
width: int = 512, width: int = 512,
num_inference_steps: int = 50, num_inference_steps: int = 50,
...@@ -144,10 +144,10 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline): ...@@ -144,10 +144,10 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
**kwargs, **kwargs,
): ):
...@@ -194,7 +194,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline): ...@@ -194,7 +194,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -206,7 +206,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline): ...@@ -206,7 +206,7 @@ class ImageToImageInpaintingPipeline(DiffusionPipeline):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
......
...@@ -189,8 +189,8 @@ class InstaFlowPipeline( ...@@ -189,8 +189,8 @@ class InstaFlowPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
): ):
deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple." deprecation_message = "`_encode_prompt()` is deprecated and it will be removed in a future version. Use `encode_prompt()` instead. Also, be aware that the output format changed from a concatenated tensor to a tuple."
...@@ -219,8 +219,8 @@ class InstaFlowPipeline( ...@@ -219,8 +219,8 @@ class InstaFlowPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
): ):
r""" r"""
...@@ -239,10 +239,10 @@ class InstaFlowPipeline( ...@@ -239,10 +239,10 @@ class InstaFlowPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -501,12 +501,12 @@ class InstaFlowPipeline( ...@@ -501,12 +501,12 @@ class InstaFlowPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
guidance_rescale: float = 0.0, guidance_rescale: float = 0.0,
...@@ -538,14 +538,14 @@ class InstaFlowPipeline( ...@@ -538,14 +538,14 @@ class InstaFlowPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
...@@ -555,7 +555,7 @@ class InstaFlowPipeline( ...@@ -555,7 +555,7 @@ class InstaFlowPipeline(
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function is called. If not specified, the callback is called at The frequency at which the `callback` function is called. If not specified, the callback is called at
every step. every step.
......
...@@ -132,12 +132,12 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -132,12 +132,12 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin):
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.Tensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
text_embeddings: Optional[torch.FloatTensor] = None, text_embeddings: Optional[torch.Tensor] = None,
**kwargs, **kwargs,
): ):
r""" r"""
...@@ -170,7 +170,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -170,7 +170,7 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin):
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation A [torch generator](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make generation
deterministic. deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor will ge generated by sampling using the supplied random `generator`. tensor will ge generated by sampling using the supplied random `generator`.
...@@ -182,11 +182,11 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin): ...@@ -182,11 +182,11 @@ class StableDiffusionWalkPipeline(DiffusionPipeline, StableDiffusionMixin):
plain tuple. plain tuple.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that will be called every `callback_steps` steps during inference. The function will be A function that will be called every `callback_steps` steps during inference. The function will be
called with the following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`. called with the following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
callback_steps (`int`, *optional*, defaults to 1): callback_steps (`int`, *optional*, defaults to 1):
The frequency at which the `callback` function will be called. If not specified, the callback will be The frequency at which the `callback` function will be called. If not specified, the callback will be
called at every step. called at every step.
text_embeddings (`torch.FloatTensor`, *optional*, defaults to `None`): text_embeddings (`torch.Tensor`, *optional*, defaults to `None`):
Pre-generated text embeddings to be used as inputs for image generation. Can be used in place of Pre-generated text embeddings to be used as inputs for image generation. Can be used in place of
`prompt` to avoid re-computing the embeddings. If not provided, the embeddings will be generated from `prompt` to avoid re-computing the embeddings. If not provided, the embeddings will be generated from
the supplied `prompt`. the supplied `prompt`.
......
...@@ -62,7 +62,7 @@ class IPAdapterFullImageProjection(nn.Module): ...@@ -62,7 +62,7 @@ class IPAdapterFullImageProjection(nn.Module):
self.ff = FeedForward(image_embed_dim, cross_attention_dim * num_tokens, mult=mult, activation_fn="gelu") self.ff = FeedForward(image_embed_dim, cross_attention_dim * num_tokens, mult=mult, activation_fn="gelu")
self.norm = nn.LayerNorm(cross_attention_dim) self.norm = nn.LayerNorm(cross_attention_dim)
def forward(self, image_embeds: torch.FloatTensor): def forward(self, image_embeds: torch.Tensor):
x = self.ff(image_embeds) x = self.ff(image_embeds)
x = x.reshape(-1, self.num_tokens, self.cross_attention_dim) x = x.reshape(-1, self.num_tokens, self.cross_attention_dim)
return self.norm(x) return self.norm(x)
...@@ -452,8 +452,8 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -452,8 +452,8 @@ class IPAdapterFaceIDStableDiffusionPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
**kwargs, **kwargs,
): ):
...@@ -484,8 +484,8 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -484,8 +484,8 @@ class IPAdapterFaceIDStableDiffusionPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -505,10 +505,10 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -505,10 +505,10 @@ class IPAdapterFaceIDStableDiffusionPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -788,7 +788,7 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -788,7 +788,7 @@ class IPAdapterFaceIDStableDiffusionPipeline(
data type of the generated embeddings data type of the generated embeddings
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
...@@ -847,10 +847,10 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -847,10 +847,10 @@ class IPAdapterFaceIDStableDiffusionPipeline(
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
eta: float = 0.0, eta: float = 0.0,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
image_embeds: Optional[torch.FloatTensor] = None, image_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -891,17 +891,17 @@ class IPAdapterFaceIDStableDiffusionPipeline( ...@@ -891,17 +891,17 @@ class IPAdapterFaceIDStableDiffusionPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument. not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
image_embeds (`torch.FloatTensor`, *optional*): image_embeds (`torch.Tensor`, *optional*):
Pre-generated image embeddings. Pre-generated image embeddings.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
The output format of the generated image. Choose between `PIL.Image` or `np.array`. The output format of the generated image. Choose between `PIL.Image` or `np.array`.
......
...@@ -88,7 +88,7 @@ class LatentConsistencyModelImg2ImgPipeline(DiffusionPipeline): ...@@ -88,7 +88,7 @@ class LatentConsistencyModelImg2ImgPipeline(DiffusionPipeline):
torch device torch device
num_images_per_prompt (`int`): num_images_per_prompt (`int`):
number of images that should be generated per prompt number of images that should be generated per prompt
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
""" """
...@@ -282,10 +282,10 @@ class LatentConsistencyModelImg2ImgPipeline(DiffusionPipeline): ...@@ -282,10 +282,10 @@ class LatentConsistencyModelImg2ImgPipeline(DiffusionPipeline):
width: Optional[int] = 768, width: Optional[int] = 768,
guidance_scale: float = 7.5, guidance_scale: float = 7.5,
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
num_inference_steps: int = 4, num_inference_steps: int = 4,
lcm_origin_steps: int = 50, lcm_origin_steps: int = 50,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -395,16 +395,16 @@ class LCMSchedulerOutput(BaseOutput): ...@@ -395,16 +395,16 @@ class LCMSchedulerOutput(BaseOutput):
""" """
Output class for the scheduler's `step` function output. Output class for the scheduler's `step` function output.
Args: Args:
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
denoising loop. denoising loop.
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
The predicted denoised sample `(x_{0})` based on the model output from the current timestep. The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
`pred_original_sample` can be used to preview progress or for guidance. `pred_original_sample` can be used to preview progress or for guidance.
""" """
prev_sample: torch.FloatTensor prev_sample: torch.Tensor
denoised: Optional[torch.FloatTensor] = None denoised: Optional[torch.Tensor] = None
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
...@@ -452,10 +452,10 @@ def rescale_zero_terminal_snr(betas): ...@@ -452,10 +452,10 @@ def rescale_zero_terminal_snr(betas):
""" """
Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1) Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
Args: Args:
betas (`torch.FloatTensor`): betas (`torch.Tensor`):
the betas that the scheduler is being initialized with. the betas that the scheduler is being initialized with.
Returns: Returns:
`torch.FloatTensor`: rescaled betas with zero terminal SNR `torch.Tensor`: rescaled betas with zero terminal SNR
""" """
# Convert betas to alphas_bar_sqrt # Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas alphas = 1.0 - betas
...@@ -587,17 +587,17 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -587,17 +587,17 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
self.num_inference_steps = None self.num_inference_steps = None
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor: def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
""" """
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
current timestep. current timestep.
Args: Args:
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
The input sample. The input sample.
timestep (`int`, *optional*): timestep (`int`, *optional*):
The current timestep in the diffusion chain. The current timestep in the diffusion chain.
Returns: Returns:
`torch.FloatTensor`: `torch.Tensor`:
A scaled input sample. A scaled input sample.
""" """
return sample return sample
...@@ -613,7 +613,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -613,7 +613,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
return variance return variance
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
...@@ -685,25 +685,25 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -685,25 +685,25 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
def step( def step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timeindex: int, timeindex: int,
timestep: int, timestep: int,
sample: torch.FloatTensor, sample: torch.Tensor,
eta: float = 0.0, eta: float = 0.0,
use_clipped_model_output: bool = False, use_clipped_model_output: bool = False,
generator=None, generator=None,
variance_noise: Optional[torch.FloatTensor] = None, variance_noise: Optional[torch.Tensor] = None,
return_dict: bool = True, return_dict: bool = True,
) -> Union[LCMSchedulerOutput, Tuple]: ) -> Union[LCMSchedulerOutput, Tuple]:
""" """
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): model_output (`torch.Tensor`):
The direct output from learned diffusion model. The direct output from learned diffusion model.
timestep (`float`): timestep (`float`):
The current discrete timestep in the diffusion chain. The current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process. A current instance of a sample created by the diffusion process.
eta (`float`): eta (`float`):
The weight of noise for added noise in diffusion step. The weight of noise for added noise in diffusion step.
...@@ -714,7 +714,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -714,7 +714,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
`use_clipped_model_output` has no effect. `use_clipped_model_output` has no effect.
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A random number generator. A random number generator.
variance_noise (`torch.FloatTensor`): variance_noise (`torch.Tensor`):
Alternative to generating noise with `generator` by directly providing the noise for the variance Alternative to generating noise with `generator` by directly providing the noise for the variance
itself. Useful for methods such as [`CycleDiffusion`]. itself. Useful for methods such as [`CycleDiffusion`].
return_dict (`bool`, *optional*, defaults to `True`): return_dict (`bool`, *optional*, defaults to `True`):
...@@ -777,10 +777,10 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -777,10 +777,10 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
def add_noise( def add_noise(
self, self,
original_samples: torch.FloatTensor, original_samples: torch.Tensor,
noise: torch.FloatTensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype) alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device) timesteps = timesteps.to(original_samples.device)
...@@ -799,9 +799,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin): ...@@ -799,9 +799,7 @@ class LCMSchedulerWithTimestamp(SchedulerMixin, ConfigMixin):
return noisy_samples return noisy_samples
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity( def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device) timesteps = timesteps.to(sample.device)
......
...@@ -281,8 +281,8 @@ class LatentConsistencyModelWalkPipeline( ...@@ -281,8 +281,8 @@ class LatentConsistencyModelWalkPipeline(
num_images_per_prompt, num_images_per_prompt,
do_classifier_free_guidance, do_classifier_free_guidance,
negative_prompt=None, negative_prompt=None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
negative_prompt_embeds: Optional[torch.FloatTensor] = None, negative_prompt_embeds: Optional[torch.Tensor] = None,
lora_scale: Optional[float] = None, lora_scale: Optional[float] = None,
clip_skip: Optional[int] = None, clip_skip: Optional[int] = None,
): ):
...@@ -302,10 +302,10 @@ class LatentConsistencyModelWalkPipeline( ...@@ -302,10 +302,10 @@ class LatentConsistencyModelWalkPipeline(
The prompt or prompts not to guide the image generation. If not defined, one has to pass The prompt or prompts not to guide the image generation. If not defined, one has to pass
`negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
less than `1`). less than `1`).
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
negative_prompt_embeds (`torch.FloatTensor`, *optional*): negative_prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
argument. argument.
...@@ -506,7 +506,7 @@ class LatentConsistencyModelWalkPipeline( ...@@ -506,7 +506,7 @@ class LatentConsistencyModelWalkPipeline(
data type of the generated embeddings data type of the generated embeddings
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.Tensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)`
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
...@@ -546,7 +546,7 @@ class LatentConsistencyModelWalkPipeline( ...@@ -546,7 +546,7 @@ class LatentConsistencyModelWalkPipeline(
height: int, height: int,
width: int, width: int,
callback_steps: int, callback_steps: int,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
callback_on_step_end_tensor_inputs=None, callback_on_step_end_tensor_inputs=None,
): ):
if height % 8 != 0 or width % 8 != 0: if height % 8 != 0 or width % 8 != 0:
...@@ -580,11 +580,11 @@ class LatentConsistencyModelWalkPipeline( ...@@ -580,11 +580,11 @@ class LatentConsistencyModelWalkPipeline(
@torch.no_grad() @torch.no_grad()
def interpolate_embedding( def interpolate_embedding(
self, self,
start_embedding: torch.FloatTensor, start_embedding: torch.Tensor,
end_embedding: torch.FloatTensor, end_embedding: torch.Tensor,
num_interpolation_steps: Union[int, List[int]], num_interpolation_steps: Union[int, List[int]],
interpolation_type: str, interpolation_type: str,
) -> torch.FloatTensor: ) -> torch.Tensor:
if interpolation_type == "lerp": if interpolation_type == "lerp":
interpolation_fn = lerp interpolation_fn = lerp
elif interpolation_type == "slerp": elif interpolation_type == "slerp":
...@@ -611,11 +611,11 @@ class LatentConsistencyModelWalkPipeline( ...@@ -611,11 +611,11 @@ class LatentConsistencyModelWalkPipeline(
@torch.no_grad() @torch.no_grad()
def interpolate_latent( def interpolate_latent(
self, self,
start_latent: torch.FloatTensor, start_latent: torch.Tensor,
end_latent: torch.FloatTensor, end_latent: torch.Tensor,
num_interpolation_steps: Union[int, List[int]], num_interpolation_steps: Union[int, List[int]],
interpolation_type: str, interpolation_type: str,
) -> torch.FloatTensor: ) -> torch.Tensor:
if interpolation_type == "lerp": if interpolation_type == "lerp":
interpolation_fn = lerp interpolation_fn = lerp
elif interpolation_type == "slerp": elif interpolation_type == "slerp":
...@@ -663,8 +663,8 @@ class LatentConsistencyModelWalkPipeline( ...@@ -663,8 +663,8 @@ class LatentConsistencyModelWalkPipeline(
guidance_scale: float = 8.5, guidance_scale: float = 8.5,
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None, generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -705,11 +705,11 @@ class LatentConsistencyModelWalkPipeline( ...@@ -705,11 +705,11 @@ class LatentConsistencyModelWalkPipeline(
generator (`torch.Generator` or `List[torch.Generator]`, *optional*): generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
generation deterministic. generation deterministic.
latents (`torch.FloatTensor`, *optional*): latents (`torch.Tensor`, *optional*):
Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image Pre-generated noisy latents sampled from a Gaussian distribution, to be used as inputs for image
generation. Can be used to tweak the same generation with different prompts. If not provided, a latents generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
tensor is generated by sampling using the supplied random `generator`. tensor is generated by sampling using the supplied random `generator`.
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
provided, text embeddings are generated from the `prompt` input argument. provided, text embeddings are generated from the `prompt` input argument.
output_type (`str`, *optional*, defaults to `"pil"`): output_type (`str`, *optional*, defaults to `"pil"`):
......
...@@ -86,7 +86,7 @@ class LatentConsistencyModelPipeline(DiffusionPipeline): ...@@ -86,7 +86,7 @@ class LatentConsistencyModelPipeline(DiffusionPipeline):
torch device torch device
num_images_per_prompt (`int`): num_images_per_prompt (`int`):
number of images that should be generated per prompt number of images that should be generated per prompt
prompt_embeds (`torch.FloatTensor`, *optional*): prompt_embeds (`torch.Tensor`, *optional*):
Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
provided, text embeddings will be generated from `prompt` input argument. provided, text embeddings will be generated from `prompt` input argument.
""" """
...@@ -208,10 +208,10 @@ class LatentConsistencyModelPipeline(DiffusionPipeline): ...@@ -208,10 +208,10 @@ class LatentConsistencyModelPipeline(DiffusionPipeline):
width: Optional[int] = 768, width: Optional[int] = 768,
guidance_scale: float = 7.5, guidance_scale: float = 7.5,
num_images_per_prompt: Optional[int] = 1, num_images_per_prompt: Optional[int] = 1,
latents: Optional[torch.FloatTensor] = None, latents: Optional[torch.Tensor] = None,
num_inference_steps: int = 4, num_inference_steps: int = 4,
lcm_origin_steps: int = 50, lcm_origin_steps: int = 50,
prompt_embeds: Optional[torch.FloatTensor] = None, prompt_embeds: Optional[torch.Tensor] = None,
output_type: Optional[str] = "pil", output_type: Optional[str] = "pil",
return_dict: bool = True, return_dict: bool = True,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
...@@ -310,16 +310,16 @@ class LCMSchedulerOutput(BaseOutput): ...@@ -310,16 +310,16 @@ class LCMSchedulerOutput(BaseOutput):
""" """
Output class for the scheduler's `step` function output. Output class for the scheduler's `step` function output.
Args: Args:
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): prev_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the Computed sample `(x_{t-1})` of previous timestep. `prev_sample` should be used as next model input in the
denoising loop. denoising loop.
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images): pred_original_sample (`torch.Tensor` of shape `(batch_size, num_channels, height, width)` for images):
The predicted denoised sample `(x_{0})` based on the model output from the current timestep. The predicted denoised sample `(x_{0})` based on the model output from the current timestep.
`pred_original_sample` can be used to preview progress or for guidance. `pred_original_sample` can be used to preview progress or for guidance.
""" """
prev_sample: torch.FloatTensor prev_sample: torch.Tensor
denoised: Optional[torch.FloatTensor] = None denoised: Optional[torch.Tensor] = None
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
...@@ -367,10 +367,10 @@ def rescale_zero_terminal_snr(betas): ...@@ -367,10 +367,10 @@ def rescale_zero_terminal_snr(betas):
""" """
Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1) Rescales betas to have zero terminal SNR Based on https://arxiv.org/pdf/2305.08891.pdf (Algorithm 1)
Args: Args:
betas (`torch.FloatTensor`): betas (`torch.Tensor`):
the betas that the scheduler is being initialized with. the betas that the scheduler is being initialized with.
Returns: Returns:
`torch.FloatTensor`: rescaled betas with zero terminal SNR `torch.Tensor`: rescaled betas with zero terminal SNR
""" """
# Convert betas to alphas_bar_sqrt # Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas alphas = 1.0 - betas
...@@ -499,17 +499,17 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -499,17 +499,17 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
self.num_inference_steps = None self.num_inference_steps = None
self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64)) self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
def scale_model_input(self, sample: torch.FloatTensor, timestep: Optional[int] = None) -> torch.FloatTensor: def scale_model_input(self, sample: torch.Tensor, timestep: Optional[int] = None) -> torch.Tensor:
""" """
Ensures interchangeability with schedulers that need to scale the denoising model input depending on the Ensures interchangeability with schedulers that need to scale the denoising model input depending on the
current timestep. current timestep.
Args: Args:
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
The input sample. The input sample.
timestep (`int`, *optional*): timestep (`int`, *optional*):
The current timestep in the diffusion chain. The current timestep in the diffusion chain.
Returns: Returns:
`torch.FloatTensor`: `torch.Tensor`:
A scaled input sample. A scaled input sample.
""" """
return sample return sample
...@@ -525,7 +525,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -525,7 +525,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
return variance return variance
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.FloatTensor) -> torch.FloatTensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
...@@ -593,25 +593,25 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -593,25 +593,25 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
def step( def step(
self, self,
model_output: torch.FloatTensor, model_output: torch.Tensor,
timeindex: int, timeindex: int,
timestep: int, timestep: int,
sample: torch.FloatTensor, sample: torch.Tensor,
eta: float = 0.0, eta: float = 0.0,
use_clipped_model_output: bool = False, use_clipped_model_output: bool = False,
generator=None, generator=None,
variance_noise: Optional[torch.FloatTensor] = None, variance_noise: Optional[torch.Tensor] = None,
return_dict: bool = True, return_dict: bool = True,
) -> Union[LCMSchedulerOutput, Tuple]: ) -> Union[LCMSchedulerOutput, Tuple]:
""" """
Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion Predict the sample from the previous timestep by reversing the SDE. This function propagates the diffusion
process from the learned model outputs (most often the predicted noise). process from the learned model outputs (most often the predicted noise).
Args: Args:
model_output (`torch.FloatTensor`): model_output (`torch.Tensor`):
The direct output from learned diffusion model. The direct output from learned diffusion model.
timestep (`float`): timestep (`float`):
The current discrete timestep in the diffusion chain. The current discrete timestep in the diffusion chain.
sample (`torch.FloatTensor`): sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process. A current instance of a sample created by the diffusion process.
eta (`float`): eta (`float`):
The weight of noise for added noise in diffusion step. The weight of noise for added noise in diffusion step.
...@@ -622,7 +622,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -622,7 +622,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
`use_clipped_model_output` has no effect. `use_clipped_model_output` has no effect.
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A random number generator. A random number generator.
variance_noise (`torch.FloatTensor`): variance_noise (`torch.Tensor`):
Alternative to generating noise with `generator` by directly providing the noise for the variance Alternative to generating noise with `generator` by directly providing the noise for the variance
itself. Useful for methods such as [`CycleDiffusion`]. itself. Useful for methods such as [`CycleDiffusion`].
return_dict (`bool`, *optional*, defaults to `True`): return_dict (`bool`, *optional*, defaults to `True`):
...@@ -685,10 +685,10 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -685,10 +685,10 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.add_noise
def add_noise( def add_noise(
self, self,
original_samples: torch.FloatTensor, original_samples: torch.Tensor,
noise: torch.FloatTensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.FloatTensor: ) -> torch.Tensor:
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype) alphas_cumprod = self.alphas_cumprod.to(device=original_samples.device, dtype=original_samples.dtype)
timesteps = timesteps.to(original_samples.device) timesteps = timesteps.to(original_samples.device)
...@@ -707,9 +707,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -707,9 +707,7 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
return noisy_samples return noisy_samples
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity( def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
self, sample: torch.FloatTensor, noise: torch.FloatTensor, timesteps: torch.IntTensor
) -> torch.FloatTensor:
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(device=sample.device, dtype=sample.dtype)
timesteps = timesteps.to(sample.device) timesteps = timesteps.to(sample.device)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment