Unverified Commit 5d848ec0 authored by M. Tolga Cangöz's avatar M. Tolga Cangöz Committed by GitHub
Browse files

[`Tests`] Update a deprecated parameter in test files and fix several typos (#7277)

* Add properties and `IPAdapterTesterMixin` tests for `StableDiffusionPanoramaPipeline`

* Fix variable name typo and update comments

* Update deprecated `output_type="numpy"` to "np" in test files

* Discard changes to src/diffusers/pipelines/stable_diffusion_panorama/pipeline_stable_diffusion_panorama.py

* Update test_stable_diffusion_panorama.py

* Update numbers in README.md

* Update get_guidance_scale_embedding method to use timesteps instead of w

* Update number of checkpoints in README.md

* Add type hints and fix var name

* Fix PyTorch's convention for inplace functions

* Fix a typo

* Revert "Fix PyTorch's convention for inplace functions"

This reverts commit 74350cf65b2c9aa77f08bec7937d7a8b13edb509.

* Fix typos

* Indent

* Refactor get_guidance_scale_embedding method in LEditsPPPipelineStableDiffusionXL class
parent 4974b845
...@@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi ...@@ -77,7 +77,7 @@ Please refer to the [How to use Stable Diffusion in Apple Silicon](https://huggi
## Quickstart ## Quickstart
Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 19000+ checkpoints): Generating outputs is super easy with 🤗 Diffusers. To generate an image from text, use the `from_pretrained` method to load any pretrained diffusion model (browse the [Hub](https://huggingface.co/models?library=diffusers&sort=downloads) for 22000+ checkpoints):
```python ```python
from diffusers import DiffusionPipeline from diffusers import DiffusionPipeline
...@@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9 ...@@ -219,7 +219,7 @@ Also, say 👋 in our public Discord channel <a href="https://discord.gg/G7tWnz9
- https://github.com/deep-floyd/IF - https://github.com/deep-floyd/IF
- https://github.com/bentoml/BentoML - https://github.com/bentoml/BentoML
- https://github.com/bmaltais/kohya_ss - https://github.com/bmaltais/kohya_ss
- +8000 other amazing GitHub repositories 💪 - +9000 other amazing GitHub repositories 💪
Thank you for using us ❤️. Thank you for using us ❤️.
......
...@@ -637,7 +637,7 @@ def main(args): ...@@ -637,7 +637,7 @@ def main(args):
generator=generator, generator=generator,
batch_size=args.eval_batch_size, batch_size=args.eval_batch_size,
num_inference_steps=args.ddpm_num_inference_steps, num_inference_steps=args.ddpm_num_inference_steps,
output_type="numpy", output_type="np",
).images ).images
if args.use_ema: if args.use_ema:
......
...@@ -648,7 +648,7 @@ def main(args): ...@@ -648,7 +648,7 @@ def main(args):
generator=generator, generator=generator,
batch_size=args.eval_batch_size, batch_size=args.eval_batch_size,
num_inference_steps=args.ddpm_num_inference_steps, num_inference_steps=args.ddpm_num_inference_steps,
output_type="numpy", output_type="np",
).images ).images
if args.use_ema: if args.use_ema:
......
...@@ -293,7 +293,7 @@ class BasicTransformerBlock(nn.Module): ...@@ -293,7 +293,7 @@ class BasicTransformerBlock(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
# Notice that normalization is always applied before the real computation in the following blocks. # Notice that normalization is always applied before the real computation in the following blocks.
# 0. Self-Attention # 0. Self-Attention
......
...@@ -308,7 +308,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin): ...@@ -308,7 +308,7 @@ class Transformer2DModel(ModelMixin, ConfigMixin):
""" """
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
# ensure attention_mask is a bias, and give it a singleton query_tokens dimension. # ensure attention_mask is a bias, and give it a singleton query_tokens dimension.
# we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward. # we may have done this conversion already, e.g. if we came here via UNet2DConditionModel#forward.
# we can tell by counting dims; if ndim == 2: it's a mask rather than a bias. # we can tell by counting dims; if ndim == 2: it's a mask rather than a bias.
......
...@@ -846,7 +846,7 @@ class UNetMidBlock2DCrossAttn(nn.Module): ...@@ -846,7 +846,7 @@ class UNetMidBlock2DCrossAttn(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
hidden_states = self.resnets[0](hidden_states, temb) hidden_states = self.resnets[0](hidden_states, temb)
for attn, resnet in zip(self.attentions, self.resnets[1:]): for attn, resnet in zip(self.attentions, self.resnets[1:]):
...@@ -986,7 +986,7 @@ class UNetMidBlock2DSimpleCrossAttn(nn.Module): ...@@ -986,7 +986,7 @@ class UNetMidBlock2DSimpleCrossAttn(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
if attention_mask is None: if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
...@@ -1116,7 +1116,7 @@ class AttnDownBlock2D(nn.Module): ...@@ -1116,7 +1116,7 @@ class AttnDownBlock2D(nn.Module):
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
output_states = () output_states = ()
...@@ -1241,7 +1241,7 @@ class CrossAttnDownBlock2D(nn.Module): ...@@ -1241,7 +1241,7 @@ class CrossAttnDownBlock2D(nn.Module):
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
output_states = () output_states = ()
...@@ -1986,7 +1986,7 @@ class SimpleCrossAttnDownBlock2D(nn.Module): ...@@ -1986,7 +1986,7 @@ class SimpleCrossAttnDownBlock2D(nn.Module):
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
output_states = () output_states = ()
...@@ -2201,7 +2201,7 @@ class KCrossAttnDownBlock2D(nn.Module): ...@@ -2201,7 +2201,7 @@ class KCrossAttnDownBlock2D(nn.Module):
) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]: ) -> Tuple[torch.FloatTensor, Tuple[torch.FloatTensor, ...]]:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
output_states = () output_states = ()
...@@ -2483,7 +2483,7 @@ class CrossAttnUpBlock2D(nn.Module): ...@@ -2483,7 +2483,7 @@ class CrossAttnUpBlock2D(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
is_freeu_enabled = ( is_freeu_enabled = (
getattr(self, "s1", None) getattr(self, "s1", None)
...@@ -3312,7 +3312,7 @@ class SimpleCrossAttnUpBlock2D(nn.Module): ...@@ -3312,7 +3312,7 @@ class SimpleCrossAttnUpBlock2D(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
if attention_mask is None: if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
...@@ -3694,7 +3694,7 @@ class KAttentionBlock(nn.Module): ...@@ -3694,7 +3694,7 @@ class KAttentionBlock(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
# 1. Self-Attention # 1. Self-Attention
if self.add_self_attention: if self.add_self_attention:
......
...@@ -1183,7 +1183,7 @@ class CrossAttnDownBlockMotion(nn.Module): ...@@ -1183,7 +1183,7 @@ class CrossAttnDownBlockMotion(nn.Module):
): ):
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
output_states = () output_states = ()
...@@ -1367,7 +1367,7 @@ class CrossAttnUpBlockMotion(nn.Module): ...@@ -1367,7 +1367,7 @@ class CrossAttnUpBlockMotion(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
is_freeu_enabled = ( is_freeu_enabled = (
getattr(self, "s1", None) getattr(self, "s1", None)
...@@ -1707,7 +1707,7 @@ class UNetMidBlockCrossAttnMotion(nn.Module): ...@@ -1707,7 +1707,7 @@ class UNetMidBlockCrossAttnMotion(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
hidden_states = self.resnets[0](hidden_states, temb) hidden_states = self.resnets[0](hidden_states, temb)
......
...@@ -127,7 +127,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline): ...@@ -127,7 +127,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising on the amount of noise initially added. When `strength` is 1, added noise is maximum and the denoising
process runs for the full number of iterations specified in `num_inference_steps`. A value of 1 process runs for the full number of iterations specified in `num_inference_steps`. A value of 1
essentially ignores `image`. essentially ignores `image`.
num_inference_steps (`int`, *optional*, defaults to 16): num_inference_steps (`int`, *optional*, defaults to 12):
The number of denoising steps. More denoising steps usually lead to a higher quality image at the The number of denoising steps. More denoising steps usually lead to a higher quality image at the
expense of slower inference. expense of slower inference.
guidance_scale (`float`, *optional*, defaults to 10.0): guidance_scale (`float`, *optional*, defaults to 10.0):
...@@ -191,7 +191,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline): ...@@ -191,7 +191,7 @@ class AmusedImg2ImgPipeline(DiffusionPipeline):
negative_prompt_embeds is None and negative_encoder_hidden_states is not None negative_prompt_embeds is None and negative_encoder_hidden_states is not None
): ):
raise ValueError( raise ValueError(
"pass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neither" "pass either both `negative_prompt_embeds` and `negative_encoder_hidden_states` or neither"
) )
if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None): if (prompt is None and prompt_embeds is None) or (prompt is not None and prompt_embeds is not None):
......
...@@ -824,20 +824,22 @@ class StableDiffusionControlNetPipeline( ...@@ -824,20 +824,22 @@ class StableDiffusionControlNetPipeline(
return latents return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -869,20 +869,22 @@ class StableDiffusionXLControlNetPipeline( ...@@ -869,20 +869,22 @@ class StableDiffusionXLControlNetPipeline(
self.vae.decoder.mid_block.to(dtype) self.vae.decoder.mid_block.to(dtype)
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -133,7 +133,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline): ...@@ -133,7 +133,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
generator: Optional[torch.Generator] = None, generator: Optional[torch.Generator] = None,
num_inference_steps: int = 100, num_inference_steps: int = 100,
return_dict: bool = True, return_dict: bool = True,
output_type: str = "numpy", output_type: str = "np",
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
) -> Union[AudioPipelineOutput, Tuple]: ) -> Union[AudioPipelineOutput, Tuple]:
...@@ -157,7 +157,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline): ...@@ -157,7 +157,7 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
expense of slower inference. expense of slower inference.
return_dict (`bool`, *optional*, defaults to `True`): return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple. Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
output_type (`str`, *optional*, defaults to `"numpy"`): output_type (`str`, *optional*, defaults to `"np"`):
The output format of the generated audio. The output format of the generated audio.
callback (`Callable`, *optional*): callback (`Callable`, *optional*):
A function that calls every `callback_steps` steps during inference. The function is called with the A function that calls every `callback_steps` steps during inference. The function is called with the
...@@ -249,16 +249,16 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline): ...@@ -249,16 +249,16 @@ class SpectrogramDiffusionPipeline(DiffusionPipeline):
logger.info("Generated segment", i) logger.info("Generated segment", i)
if output_type == "numpy" and not is_onnx_available(): if output_type == "np" and not is_onnx_available():
raise ValueError( raise ValueError(
"Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'." "Cannot return output in 'np' format if ONNX is not available. Make sure to have ONNX installed or set 'output_type' to 'mel'."
) )
elif output_type == "numpy" and self.melgan is None: elif output_type == "np" and self.melgan is None:
raise ValueError( raise ValueError(
"Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'." "Cannot return output in 'np' format if melgan component is not defined. Make sure to define `self.melgan` or set 'output_type' to 'mel'."
) )
if output_type == "numpy": if output_type == "np":
output = self.melgan(input_features=full_pred_mel.astype(np.float32)) output = self.melgan(input_features=full_pred_mel.astype(np.float32))
else: else:
output = full_pred_mel output = full_pred_mel
......
...@@ -2004,7 +2004,7 @@ class CrossAttnUpBlockFlat(nn.Module): ...@@ -2004,7 +2004,7 @@ class CrossAttnUpBlockFlat(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
is_freeu_enabled = ( is_freeu_enabled = (
getattr(self, "s1", None) getattr(self, "s1", None)
...@@ -2338,7 +2338,7 @@ class UNetMidBlockFlatCrossAttn(nn.Module): ...@@ -2338,7 +2338,7 @@ class UNetMidBlockFlatCrossAttn(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
if cross_attention_kwargs is not None: if cross_attention_kwargs is not None:
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
hidden_states = self.resnets[0](hidden_states, temb) hidden_states = self.resnets[0](hidden_states, temb)
for attn, resnet in zip(self.attentions, self.resnets[1:]): for attn, resnet in zip(self.attentions, self.resnets[1:]):
...@@ -2479,7 +2479,7 @@ class UNetMidBlockFlatSimpleCrossAttn(nn.Module): ...@@ -2479,7 +2479,7 @@ class UNetMidBlockFlatSimpleCrossAttn(nn.Module):
) -> torch.FloatTensor: ) -> torch.FloatTensor:
cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {} cross_attention_kwargs = cross_attention_kwargs if cross_attention_kwargs is not None else {}
if cross_attention_kwargs.get("scale", None) is not None: if cross_attention_kwargs.get("scale", None) is not None:
logger.warning("Passing `scale` to `cross_attention_kwargs` is depcrecated. `scale` will be ignored.") logger.warning("Passing `scale` to `cross_attention_kwargs` is deprecated. `scale` will be ignored.")
if attention_mask is None: if attention_mask is None:
# if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask. # if encoder_hidden_states is defined: we are doing cross-attn, so we should use cross-attn mask.
......
...@@ -548,20 +548,22 @@ class LatentConsistencyModelImg2ImgPipeline( ...@@ -548,20 +548,22 @@ class LatentConsistencyModelImg2ImgPipeline(
return latents return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -490,20 +490,22 @@ class LatentConsistencyModelPipeline( ...@@ -490,20 +490,22 @@ class LatentConsistencyModelPipeline(
latents = latents * self.scheduler.init_noise_sigma latents = latents * self.scheduler.init_noise_sigma
return latents return latents
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -713,20 +713,22 @@ class LEditsPPPipelineStableDiffusionXL( ...@@ -713,20 +713,22 @@ class LEditsPPPipelineStableDiffusionXL(
self.vae.decoder.mid_block.to(dtype) self.vae.decoder.mid_block.to(dtype)
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -669,20 +669,22 @@ class StableDiffusionPipeline( ...@@ -669,20 +669,22 @@ class StableDiffusionPipeline(
return latents return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -767,20 +767,22 @@ class StableDiffusionImg2ImgPipeline( ...@@ -767,20 +767,22 @@ class StableDiffusionImg2ImgPipeline(
return latents return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -909,20 +909,22 @@ class StableDiffusionInpaintPipeline( ...@@ -909,20 +909,22 @@ class StableDiffusionInpaintPipeline(
return timesteps, num_inference_steps - t_start return timesteps, num_inference_steps - t_start
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
...@@ -1304,7 +1304,7 @@ class StableDiffusionDiffEditPipeline( ...@@ -1304,7 +1304,7 @@ class StableDiffusionDiffEditPipeline(
callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None, callback: Optional[Callable[[int, int, torch.FloatTensor], None]] = None,
callback_steps: int = 1, callback_steps: int = 1,
cross_attention_kwargs: Optional[Dict[str, Any]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None,
clip_ckip: int = None, clip_skip: int = None,
): ):
r""" r"""
The call function to the pipeline for generation. The call function to the pipeline for generation.
...@@ -1426,7 +1426,7 @@ class StableDiffusionDiffEditPipeline( ...@@ -1426,7 +1426,7 @@ class StableDiffusionDiffEditPipeline(
prompt_embeds=prompt_embeds, prompt_embeds=prompt_embeds,
negative_prompt_embeds=negative_prompt_embeds, negative_prompt_embeds=negative_prompt_embeds,
lora_scale=text_encoder_lora_scale, lora_scale=text_encoder_lora_scale,
clip_skip=clip_ckip, clip_skip=clip_skip,
) )
# For classifier free guidance, we need to do two forward passes. # For classifier free guidance, we need to do two forward passes.
# Here we concatenate the unconditional and text embeddings into a single batch # Here we concatenate the unconditional and text embeddings into a single batch
......
...@@ -644,20 +644,22 @@ class StableDiffusionLDM3DPipeline( ...@@ -644,20 +644,22 @@ class StableDiffusionLDM3DPipeline(
return latents return latents
# Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding # Copied from diffusers.pipelines.latent_consistency_models.pipeline_latent_consistency_text2img.LatentConsistencyModelPipeline.get_guidance_scale_embedding
def get_guidance_scale_embedding(self, w, embedding_dim=512, dtype=torch.float32): def get_guidance_scale_embedding(
self, w: torch.Tensor, embedding_dim: int = 512, dtype: torch.dtype = torch.float32
) -> torch.FloatTensor:
""" """
See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298 See https://github.com/google-research/vdm/blob/dc27b98a554f65cdc654b800da5aa1846545d41b/model_vdm.py#L298
Args: Args:
timesteps (`torch.Tensor`): w (`torch.Tensor`):
generate embedding vectors at these timesteps Generate embedding vectors with a specified guidance scale to subsequently enrich timestep embeddings.
embedding_dim (`int`, *optional*, defaults to 512): embedding_dim (`int`, *optional*, defaults to 512):
dimension of the embeddings to generate Dimension of the embeddings to generate.
dtype: dtype (`torch.dtype`, *optional*, defaults to `torch.float32`):
data type of the generated embeddings Data type of the generated embeddings.
Returns: Returns:
`torch.FloatTensor`: Embedding vectors with shape `(len(timesteps), embedding_dim)` `torch.FloatTensor`: Embedding vectors with shape `(len(w), embedding_dim)`.
""" """
assert len(w.shape) == 1 assert len(w.shape) == 1
w = w * 1000.0 w = w * 1000.0
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment