Unverified Commit 3c1ca869 authored by David El Malih's avatar David El Malih Committed by GitHub
Browse files

Improve docstrings and type hints in scheduling_ddpm.py (#12651)

* Enhance type hints and docstrings in scheduling_ddpm.py

- Added type hints for function parameters and return types across the DDPMScheduler class and related functions.
- Improved docstrings for clarity, including detailed descriptions of parameters and return values.
- Updated the alpha_transform_type and beta_schedule parameters to use Literal types for better type safety.
- Refined the _get_variance and previous_timestep methods with comprehensive documentation.

* Refactor docstrings and type hints in scheduling_ddpm.py

- Cleaned up whitespace in the rescale_zero_terminal_snr function.
- Enhanced the variance_type parameter in the DDPMScheduler class with improved formatting for better readability.
- Updated the docstring for the compute_variance method to maintain consistency and clarity in parameter descriptions and return values.

* Apply `make fix-copies`

* Refactor type hints across multiple scheduler files

- Updated type hints to include `Literal` for improved type safety in various scheduling files.
- Ensured consistency in type hinting for parameters and return types across the affected modules.
- This change enhances code clarity and maintainability.
parent 6fe4a6ff
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, Union from typing import Literal, Optional, Tuple, Union
import torch import torch
...@@ -12,10 +12,10 @@ from .scheduling_utils import SchedulerMixin ...@@ -12,10 +12,10 @@ from .scheduling_utils import SchedulerMixin
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -23,16 +23,17 @@ def betas_for_alpha_bar( ...@@ -23,16 +23,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -49,10 +49,10 @@ class DDIMSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class DDIMSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -281,6 +282,8 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -281,6 +282,8 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -288,6 +291,14 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -288,6 +291,14 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -501,6 +512,22 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -501,6 +512,22 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -523,6 +550,21 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -523,6 +550,21 @@ class DDIMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -49,10 +49,10 @@ class DDIMSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class DDIMSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -408,6 +409,22 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -408,6 +409,22 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -430,6 +447,21 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin): ...@@ -430,6 +447,21 @@ class CogVideoXDDIMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# and https://github.com/hojonathanho/diffusion # and https://github.com/hojonathanho/diffusion
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -47,10 +47,10 @@ class DDIMSchedulerOutput(BaseOutput): ...@@ -47,10 +47,10 @@ class DDIMSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -58,16 +58,17 @@ def betas_for_alpha_bar( ...@@ -58,16 +58,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -49,10 +49,10 @@ class DDIMParallelSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class DDIMParallelSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -284,6 +285,8 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -284,6 +285,8 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -291,6 +294,14 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -291,6 +294,14 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -606,6 +617,22 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -606,6 +617,22 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -628,6 +655,21 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -628,6 +655,21 @@ class DDIMParallelScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -46,10 +46,10 @@ class DDPMSchedulerOutput(BaseOutput): ...@@ -46,10 +46,10 @@ class DDPMSchedulerOutput(BaseOutput):
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -57,16 +57,17 @@ def betas_for_alpha_bar( ...@@ -57,16 +57,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -90,7 +91,7 @@ def betas_for_alpha_bar( ...@@ -90,7 +91,7 @@ def betas_for_alpha_bar(
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr # Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
def rescale_zero_terminal_snr(betas): def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
""" """
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1) Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
...@@ -133,39 +134,37 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -133,39 +134,37 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
methods the library implements for all schedulers such as loading and saving. methods the library implements for all schedulers such as loading and saving.
Args: Args:
num_train_timesteps (`int`, defaults to 1000): num_train_timesteps (`int`, defaults to `1000`):
The number of diffusion steps to train the model. The number of diffusion steps to train the model.
beta_start (`float`, defaults to 0.0001): beta_start (`float`, defaults to `0.0001`):
The starting `beta` value of inference. The starting `beta` value of inference.
beta_end (`float`, defaults to 0.02): beta_end (`float`, defaults to `0.02`):
The final `beta` value. The final `beta` value.
beta_schedule (`str`, defaults to `"linear"`): beta_schedule (`"linear"`, `"scaled_linear"`, `"squaredcos_cap_v2"`, or `"sigmoid"`, defaults to `"linear"`):
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model.
`linear`, `scaled_linear`, `squaredcos_cap_v2`, or `sigmoid`.
trained_betas (`np.ndarray`, *optional*): trained_betas (`np.ndarray`, *optional*):
An array of betas to pass directly to the constructor without using `beta_start` and `beta_end`. An array of betas to pass directly to the constructor without using `beta_start` and `beta_end`.
variance_type (`str`, defaults to `"fixed_small"`): variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, defaults to `"fixed_small"`):
Clip the variance when adding noise to the denoised sample. Choose from `fixed_small`, `fixed_small_log`, Clip the variance when adding noise to the denoised sample.
`fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
clip_sample (`bool`, defaults to `True`): clip_sample (`bool`, defaults to `True`):
Clip the predicted sample for numerical stability. Clip the predicted sample for numerical stability.
clip_sample_range (`float`, defaults to 1.0): clip_sample_range (`float`, defaults to `1.0`):
The maximum magnitude for sample clipping. Valid only when `clip_sample=True`. The maximum magnitude for sample clipping. Valid only when `clip_sample=True`.
prediction_type (`str`, defaults to `epsilon`, *optional*): prediction_type (`"epsilon"`, `"sample"`, or `"v_prediction"`, defaults to `"epsilon"`):
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process), Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen `sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
Video](https://imagen.research.google/video/paper.pdf) paper). Video](https://imagen.research.google/video/paper.pdf) paper).
thresholding (`bool`, defaults to `False`): thresholding (`bool`, defaults to `False`):
Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such Whether to use the "dynamic thresholding" method. This is unsuitable for latent-space diffusion models such
as Stable Diffusion. as Stable Diffusion.
dynamic_thresholding_ratio (`float`, defaults to 0.995): dynamic_thresholding_ratio (`float`, defaults to `0.995`):
The ratio for the dynamic thresholding method. Valid only when `thresholding=True`. The ratio for the dynamic thresholding method. Valid only when `thresholding=True`.
sample_max_value (`float`, defaults to 1.0): sample_max_value (`float`, defaults to `1.0`):
The threshold value for dynamic thresholding. Valid only when `thresholding=True`. The threshold value for dynamic thresholding. Valid only when `thresholding=True`.
timestep_spacing (`str`, defaults to `"leading"`): timestep_spacing (`"linspace"`, `"leading"`, or `"trailing"`, defaults to `"leading"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information. Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
steps_offset (`int`, defaults to 0): steps_offset (`int`, defaults to `0`):
An offset added to the inference steps, as required by some model families. An offset added to the inference steps, as required by some model families.
rescale_betas_zero_snr (`bool`, defaults to `False`): rescale_betas_zero_snr (`bool`, defaults to `False`):
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
...@@ -182,16 +181,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -182,16 +181,18 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
num_train_timesteps: int = 1000, num_train_timesteps: int = 1000,
beta_start: float = 0.0001, beta_start: float = 0.0001,
beta_end: float = 0.02, beta_end: float = 0.02,
beta_schedule: str = "linear", beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None, trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
variance_type: str = "fixed_small", variance_type: Literal[
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
] = "fixed_small",
clip_sample: bool = True, clip_sample: bool = True,
prediction_type: str = "epsilon", prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
thresholding: bool = False, thresholding: bool = False,
dynamic_thresholding_ratio: float = 0.995, dynamic_thresholding_ratio: float = 0.995,
clip_sample_range: float = 1.0, clip_sample_range: float = 1.0,
sample_max_value: float = 1.0, sample_max_value: float = 1.0,
timestep_spacing: str = "leading", timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
steps_offset: int = 0, steps_offset: int = 0,
rescale_betas_zero_snr: bool = False, rescale_betas_zero_snr: bool = False,
): ):
...@@ -321,7 +322,31 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -321,7 +322,31 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
self.timesteps = torch.from_numpy(timesteps).to(device) self.timesteps = torch.from_numpy(timesteps).to(device)
def _get_variance(self, t, predicted_variance=None, variance_type=None): def _get_variance(
self,
t: int,
predicted_variance: Optional[torch.Tensor] = None,
variance_type: Optional[
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
] = None,
) -> torch.Tensor:
"""
Compute the variance for a given timestep according to the specified variance type.
Args:
t (`int`):
The current timestep.
predicted_variance (`torch.Tensor`, *optional*):
The predicted variance from the model. Used only when `variance_type` is `"learned"` or
`"learned_range"`.
variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, *optional*):
The type of variance to compute. If `None`, uses the variance type specified in the scheduler
configuration.
Returns:
`torch.Tensor`:
The computed variance.
"""
prev_t = self.previous_timestep(t) prev_t = self.previous_timestep(t)
alpha_prod_t = self.alphas_cumprod[t] alpha_prod_t = self.alphas_cumprod[t]
...@@ -363,6 +388,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -363,6 +388,8 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -370,6 +397,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -370,6 +397,14 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -399,7 +434,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -399,7 +434,7 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
model_output: torch.Tensor, model_output: torch.Tensor,
timestep: int, timestep: int,
sample: torch.Tensor, sample: torch.Tensor,
generator=None, generator: Optional[torch.Generator] = None,
return_dict: bool = True, return_dict: bool = True,
) -> Union[DDPMSchedulerOutput, Tuple]: ) -> Union[DDPMSchedulerOutput, Tuple]:
""" """
...@@ -409,20 +444,19 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -409,20 +444,19 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
Args: Args:
model_output (`torch.Tensor`): model_output (`torch.Tensor`):
The direct output from learned diffusion model. The direct output from learned diffusion model.
timestep (`float`): timestep (`int`):
The current discrete timestep in the diffusion chain. The current discrete timestep in the diffusion chain.
sample (`torch.Tensor`): sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process. A current instance of a sample created by the diffusion process.
generator (`torch.Generator`, *optional*): generator (`torch.Generator`, *optional*):
A random number generator. A random number generator.
return_dict (`bool`, *optional*, defaults to `True`): return_dict (`bool`, defaults to `True`):
Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`. Whether or not to return a [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`.
Returns: Returns:
[`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`: [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] or `tuple`:
If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a If return_dict is `True`, [`~schedulers.scheduling_ddpm.DDPMSchedulerOutput`] is returned, otherwise a
tuple is returned where the first element is the sample tensor. tuple is returned where the first element is the sample tensor.
""" """
t = timestep t = timestep
...@@ -503,6 +537,22 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -503,6 +537,22 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -524,6 +574,21 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -524,6 +574,21 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
return noisy_samples return noisy_samples
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
...@@ -542,10 +607,21 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -542,10 +607,21 @@ class DDPMScheduler(SchedulerMixin, ConfigMixin):
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
return velocity return velocity
def __len__(self): def __len__(self) -> int:
return self.config.num_train_timesteps return self.config.num_train_timesteps
def previous_timestep(self, timestep): def previous_timestep(self, timestep: int) -> int:
"""
Compute the previous timestep in the diffusion chain.
Args:
timestep (`int`):
The current timestep.
Returns:
`int`:
The previous timestep.
"""
if self.custom_timesteps or self.num_inference_steps: if self.custom_timesteps or self.num_inference_steps:
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
if index == self.timesteps.shape[0] - 1: if index == self.timesteps.shape[0] - 1:
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -48,10 +48,10 @@ class DDPMParallelSchedulerOutput(BaseOutput): ...@@ -48,10 +48,10 @@ class DDPMParallelSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -59,16 +59,17 @@ def betas_for_alpha_bar( ...@@ -59,16 +59,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -190,16 +191,18 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -190,16 +191,18 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
num_train_timesteps: int = 1000, num_train_timesteps: int = 1000,
beta_start: float = 0.0001, beta_start: float = 0.0001,
beta_end: float = 0.02, beta_end: float = 0.02,
beta_schedule: str = "linear", beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2", "sigmoid"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None, trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
variance_type: str = "fixed_small", variance_type: Literal[
"fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"
] = "fixed_small",
clip_sample: bool = True, clip_sample: bool = True,
prediction_type: str = "epsilon", prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
thresholding: bool = False, thresholding: bool = False,
dynamic_thresholding_ratio: float = 0.995, dynamic_thresholding_ratio: float = 0.995,
clip_sample_range: float = 1.0, clip_sample_range: float = 1.0,
sample_max_value: float = 1.0, sample_max_value: float = 1.0,
timestep_spacing: str = "leading", timestep_spacing: Literal["linspace", "leading", "trailing"] = "leading",
steps_offset: int = 0, steps_offset: int = 0,
rescale_betas_zero_snr: bool = False, rescale_betas_zero_snr: bool = False,
): ):
...@@ -332,7 +335,31 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -332,7 +335,31 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
self.timesteps = torch.from_numpy(timesteps).to(device) self.timesteps = torch.from_numpy(timesteps).to(device)
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._get_variance # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._get_variance
def _get_variance(self, t, predicted_variance=None, variance_type=None): def _get_variance(
self,
t: int,
predicted_variance: Optional[torch.Tensor] = None,
variance_type: Optional[
Literal["fixed_small", "fixed_small_log", "fixed_large", "fixed_large_log", "learned", "learned_range"]
] = None,
) -> torch.Tensor:
"""
Compute the variance for a given timestep according to the specified variance type.
Args:
t (`int`):
The current timestep.
predicted_variance (`torch.Tensor`, *optional*):
The predicted variance from the model. Used only when `variance_type` is `"learned"` or
`"learned_range"`.
variance_type (`"fixed_small"`, `"fixed_small_log"`, `"fixed_large"`, `"fixed_large_log"`, `"learned"`, or `"learned_range"`, *optional*):
The type of variance to compute. If `None`, uses the variance type specified in the scheduler
configuration.
Returns:
`torch.Tensor`:
The computed variance.
"""
prev_t = self.previous_timestep(t) prev_t = self.previous_timestep(t)
alpha_prod_t = self.alphas_cumprod[t] alpha_prod_t = self.alphas_cumprod[t]
...@@ -375,6 +402,8 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -375,6 +402,8 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -382,6 +411,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -382,6 +411,14 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -592,6 +629,22 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -592,6 +629,22 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -614,6 +667,21 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -614,6 +667,21 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
...@@ -637,6 +705,17 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin): ...@@ -637,6 +705,17 @@ class DDPMParallelScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
def previous_timestep(self, timestep): def previous_timestep(self, timestep):
"""
Compute the previous timestep in the diffusion chain.
Args:
timestep (`int`):
The current timestep.
Returns:
`int`:
The previous timestep.
"""
if self.custom_timesteps or self.num_inference_steps: if self.custom_timesteps or self.num_inference_steps:
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
if index == self.timesteps.shape[0] - 1: if index == self.timesteps.shape[0] - 1:
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py # The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -32,10 +32,10 @@ if is_scipy_available(): ...@@ -32,10 +32,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -43,16 +43,17 @@ def betas_for_alpha_bar( ...@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -320,6 +321,8 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -320,6 +321,8 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -327,6 +330,14 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -327,6 +330,14 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -50,10 +50,10 @@ class DDIMSchedulerOutput(BaseOutput): ...@@ -50,10 +50,10 @@ class DDIMSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -61,16 +61,17 @@ def betas_for_alpha_bar( ...@@ -61,16 +61,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -445,6 +446,22 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -445,6 +446,22 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -467,6 +484,21 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin): ...@@ -467,6 +484,21 @@ class CogVideoXDPMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver # DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -32,10 +32,10 @@ if is_scipy_available(): ...@@ -32,10 +32,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -43,16 +43,17 @@ def betas_for_alpha_bar( ...@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -459,6 +460,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -459,6 +460,8 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -466,6 +469,14 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -466,6 +469,14 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver # DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -32,10 +32,10 @@ if is_scipy_available(): ...@@ -32,10 +32,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -43,16 +43,17 @@ def betas_for_alpha_bar( ...@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -332,6 +333,8 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): ...@@ -332,6 +333,8 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -339,6 +342,14 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin): ...@@ -339,6 +342,14 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -115,10 +115,10 @@ class BrownianTreeNoiseSampler: ...@@ -115,10 +115,10 @@ class BrownianTreeNoiseSampler:
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -126,16 +126,17 @@ def betas_for_alpha_bar( ...@@ -126,16 +126,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver # DISCLAIMER: This file is strongly influenced by https://github.com/LuChengTHU/dpm-solver
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -34,10 +34,10 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name ...@@ -34,10 +34,10 @@ logger = logging.get_logger(__name__) # pylint: disable=invalid-name
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -45,16 +45,17 @@ def betas_for_alpha_bar( ...@@ -45,16 +45,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -410,6 +411,8 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): ...@@ -410,6 +411,8 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -417,6 +420,14 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin): ...@@ -417,6 +420,14 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
...@@ -299,6 +299,8 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -299,6 +299,8 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -306,6 +308,14 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -306,6 +308,14 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -49,10 +49,10 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class EulerAncestralDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -52,10 +52,10 @@ class EulerDiscreteSchedulerOutput(BaseOutput): ...@@ -52,10 +52,10 @@ class EulerDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -63,16 +63,17 @@ def betas_for_alpha_bar( ...@@ -63,16 +63,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -49,10 +49,10 @@ class HeunDiscreteSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class HeunDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -50,10 +50,10 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput): ...@@ -50,10 +50,10 @@ class KDPM2AncestralDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -61,16 +61,17 @@ def betas_for_alpha_bar( ...@@ -61,16 +61,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -49,10 +49,10 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput): ...@@ -49,10 +49,10 @@ class KDPM2DiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -60,16 +60,17 @@ def betas_for_alpha_bar( ...@@ -60,16 +60,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -51,10 +51,10 @@ class LCMSchedulerOutput(BaseOutput): ...@@ -51,10 +51,10 @@ class LCMSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -62,16 +62,17 @@ def betas_for_alpha_bar( ...@@ -62,16 +62,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -314,6 +315,8 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -314,6 +315,8 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -321,6 +324,14 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -321,6 +324,14 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -596,6 +607,22 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -596,6 +607,22 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -618,6 +645,21 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -618,6 +645,21 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
...@@ -641,6 +683,17 @@ class LCMScheduler(SchedulerMixin, ConfigMixin): ...@@ -641,6 +683,17 @@ class LCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
def previous_timestep(self, timestep): def previous_timestep(self, timestep):
"""
Compute the previous timestep in the diffusion chain.
Args:
timestep (`int`):
The current timestep.
Returns:
`int`:
The previous timestep.
"""
if self.custom_timesteps or self.num_inference_steps: if self.custom_timesteps or self.num_inference_steps:
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
if index == self.timesteps.shape[0] - 1: if index == self.timesteps.shape[0] - 1:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment