Unverified Commit 3c1ca869 authored by David El Malih's avatar David El Malih Committed by GitHub
Browse files

Improve docstrings and type hints in scheduling_ddpm.py (#12651)

* Enhance type hints and docstrings in scheduling_ddpm.py

- Added type hints for function parameters and return types across the DDPMScheduler class and related functions.
- Improved docstrings for clarity, including detailed descriptions of parameters and return values.
- Updated the alpha_transform_type and beta_schedule parameters to use Literal types for better type safety.
- Refined the _get_variance and previous_timestep methods with comprehensive documentation.

* Refactor docstrings and type hints in scheduling_ddpm.py

- Cleaned up whitespace in the rescale_zero_terminal_snr function.
- Enhanced the variance_type parameter in the DDPMScheduler class with improved formatting for better readability.
- Updated the docstring for the compute_variance method to maintain consistency and clarity in parameter descriptions and return values.

* Apply `make fix-copies`

* Refactor type hints across multiple scheduler files

- Updated type hints to include `Literal` for improved type safety in various scheduling files.
- Ensured consistency in type hinting for parameters and return types across the affected modules.
- This change enhances code clarity and maintainability.
parent 6fe4a6ff
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
import warnings import warnings
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import scipy.stats import scipy.stats
...@@ -47,10 +47,10 @@ class LMSDiscreteSchedulerOutput(BaseOutput): ...@@ -47,10 +47,10 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -58,16 +58,17 @@ def betas_for_alpha_bar( ...@@ -58,16 +58,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim # DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -26,10 +26,10 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul ...@@ -26,10 +26,10 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -37,16 +37,17 @@ def betas_for_alpha_bar( ...@@ -37,16 +37,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -452,6 +453,22 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin): ...@@ -452,6 +453,22 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, Union from typing import Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -45,10 +45,10 @@ class RePaintSchedulerOutput(BaseOutput): ...@@ -45,10 +45,10 @@ class RePaintSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -56,16 +56,17 @@ def betas_for_alpha_bar( ...@@ -56,16 +56,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py # The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
import math import math
from typing import Callable, List, Optional, Tuple, Union from typing import Callable, List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -33,10 +33,10 @@ if is_scipy_available(): ...@@ -33,10 +33,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -44,16 +44,17 @@ def betas_for_alpha_bar( ...@@ -44,16 +44,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -342,6 +343,8 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin): ...@@ -342,6 +343,8 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -349,6 +352,14 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin): ...@@ -349,6 +352,14 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -1193,6 +1204,22 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin): ...@@ -1193,6 +1204,22 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -50,10 +50,10 @@ class TCDSchedulerOutput(BaseOutput): ...@@ -50,10 +50,10 @@ class TCDSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -61,16 +61,17 @@ def betas_for_alpha_bar( ...@@ -61,16 +61,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -345,6 +346,8 @@ class TCDScheduler(SchedulerMixin, ConfigMixin): ...@@ -345,6 +346,8 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -352,6 +355,14 @@ class TCDScheduler(SchedulerMixin, ConfigMixin): ...@@ -352,6 +355,14 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
...@@ -651,6 +662,22 @@ class TCDScheduler(SchedulerMixin, ConfigMixin): ...@@ -651,6 +662,22 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
...@@ -673,6 +700,21 @@ class TCDScheduler(SchedulerMixin, ConfigMixin): ...@@ -673,6 +700,21 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor: def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample # Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device) self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype) alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
...@@ -696,6 +738,17 @@ class TCDScheduler(SchedulerMixin, ConfigMixin): ...@@ -696,6 +738,17 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
def previous_timestep(self, timestep): def previous_timestep(self, timestep):
"""
Compute the previous timestep in the diffusion chain.
Args:
timestep (`int`):
The current timestep.
Returns:
`int`:
The previous timestep.
"""
if self.custom_timesteps or self.num_inference_steps: if self.custom_timesteps or self.num_inference_steps:
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0] index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
if index == self.timesteps.shape[0] - 1: if index == self.timesteps.shape[0] - 1:
......
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
import math import math
from dataclasses import dataclass from dataclasses import dataclass
from typing import Optional, Tuple, Union from typing import Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -46,10 +46,10 @@ class UnCLIPSchedulerOutput(BaseOutput): ...@@ -46,10 +46,10 @@ class UnCLIPSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -57,16 +57,17 @@ def betas_for_alpha_bar( ...@@ -57,16 +57,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -334,6 +335,22 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin): ...@@ -334,6 +335,22 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor, noise: torch.Tensor,
timesteps: torch.IntTensor, timesteps: torch.IntTensor,
) -> torch.Tensor: ) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls # for the subsequent add_noise calls
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py # The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
import math import math
from typing import List, Optional, Tuple, Union from typing import List, Literal, Optional, Tuple, Union
import numpy as np import numpy as np
import torch import torch
...@@ -32,10 +32,10 @@ if is_scipy_available(): ...@@ -32,10 +32,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar( def betas_for_alpha_bar(
num_diffusion_timesteps, num_diffusion_timesteps: int,
max_beta=0.999, max_beta: float = 0.999,
alpha_transform_type="cosine", alpha_transform_type: Literal["cosine", "exp"] = "cosine",
): ) -> torch.Tensor:
""" """
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1]. (1-beta) over time from t = [0,1].
...@@ -43,16 +43,17 @@ def betas_for_alpha_bar( ...@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process. to that part of the diffusion process.
Args: Args:
num_diffusion_timesteps (`int`): the number of betas to produce. num_diffusion_timesteps (`int`):
max_beta (`float`): the maximum beta to use; use values lower than 1 to The number of betas to produce.
prevent singularities. max_beta (`float`, defaults to `0.999`):
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar. The maximum beta to use; use values lower than 1 to avoid numerical instability.
Choose from `cosine` or `exp` alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns: Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs `torch.Tensor`:
The betas used by the scheduler to step the model outputs.
""" """
if alpha_transform_type == "cosine": if alpha_transform_type == "cosine":
...@@ -431,6 +432,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -431,6 +432,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor: def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
""" """
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
...@@ -438,6 +441,14 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin): ...@@ -438,6 +441,14 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights." photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487 https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
""" """
dtype = sample.dtype dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape batch_size, channels, *remaining_dims = sample.shape
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment