Unverified Commit 3c1ca869 authored by David El Malih's avatar David El Malih Committed by GitHub
Browse files

Improve docstrings and type hints in scheduling_ddpm.py (#12651)

* Enhance type hints and docstrings in scheduling_ddpm.py

- Added type hints for function parameters and return types across the DDPMScheduler class and related functions.
- Improved docstrings for clarity, including detailed descriptions of parameters and return values.
- Updated the alpha_transform_type and beta_schedule parameters to use Literal types for better type safety.
- Refined the _get_variance and previous_timestep methods with comprehensive documentation.

* Refactor docstrings and type hints in scheduling_ddpm.py

- Cleaned up whitespace in the rescale_zero_terminal_snr function.
- Enhanced the variance_type parameter in the DDPMScheduler class with improved formatting for better readability.
- Updated the docstring for the compute_variance method to maintain consistency and clarity in parameter descriptions and return values.

* Apply `make fix-copies`

* Refactor type hints across multiple scheduler files

- Updated type hints to include `Literal` for improved type safety in various scheduling files.
- Ensured consistency in type hinting for parameters and return types across the affected modules.
- This change enhances code clarity and maintainability.
parent 6fe4a6ff
......@@ -14,7 +14,7 @@
import math
import warnings
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union
from typing import List, Literal, Optional, Tuple, Union
import numpy as np
import scipy.stats
......@@ -47,10 +47,10 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -58,16 +58,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......
......@@ -15,7 +15,7 @@
# DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim
import math
from typing import List, Optional, Tuple, Union
from typing import List, Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -26,10 +26,10 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -37,16 +37,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......@@ -452,6 +453,22 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.IntTensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
......
......@@ -14,7 +14,7 @@
import math
from dataclasses import dataclass
from typing import Optional, Tuple, Union
from typing import Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -45,10 +45,10 @@ class RePaintSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -56,16 +56,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......
......@@ -16,7 +16,7 @@
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
import math
from typing import Callable, List, Optional, Tuple, Union
from typing import Callable, List, Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -33,10 +33,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -44,16 +44,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......@@ -342,6 +343,8 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
"""
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
......@@ -349,6 +352,14 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
"""
dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape
......@@ -1193,6 +1204,22 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.IntTensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
......
......@@ -17,7 +17,7 @@
import math
from dataclasses import dataclass
from typing import List, Optional, Tuple, Union
from typing import List, Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -50,10 +50,10 @@ class TCDSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -61,16 +61,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......@@ -345,6 +346,8 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
"""
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
......@@ -352,6 +355,14 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
"""
dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape
......@@ -651,6 +662,22 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.IntTensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
......@@ -673,6 +700,21 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
"""
Compute the velocity prediction from the sample and noise according to the velocity formula.
Args:
sample (`torch.Tensor`):
The input sample.
noise (`torch.Tensor`):
The noise tensor.
timesteps (`torch.IntTensor`):
The timesteps for velocity computation.
Returns:
`torch.Tensor`:
The computed velocity.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as sample
self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
......@@ -696,6 +738,17 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
def previous_timestep(self, timestep):
"""
Compute the previous timestep in the diffusion chain.
Args:
timestep (`int`):
The current timestep.
Returns:
`int`:
The previous timestep.
"""
if self.custom_timesteps or self.num_inference_steps:
index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
if index == self.timesteps.shape[0] - 1:
......
......@@ -14,7 +14,7 @@
import math
from dataclasses import dataclass
from typing import Optional, Tuple, Union
from typing import Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -46,10 +46,10 @@ class UnCLIPSchedulerOutput(BaseOutput):
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -57,16 +57,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......@@ -334,6 +335,22 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.IntTensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
diffusion process).
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise to add to the samples.
timesteps (`torch.IntTensor`):
The timesteps indicating the noise level for each sample.
Returns:
`torch.Tensor`:
The noisy samples.
"""
# Make sure alphas_cumprod and timestep have same device and dtype as original_samples
# Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
# for the subsequent add_noise calls
......
......@@ -16,7 +16,7 @@
# The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
import math
from typing import List, Optional, Tuple, Union
from typing import List, Literal, Optional, Tuple, Union
import numpy as np
import torch
......@@ -32,10 +32,10 @@ if is_scipy_available():
# Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
def betas_for_alpha_bar(
num_diffusion_timesteps,
max_beta=0.999,
alpha_transform_type="cosine",
):
num_diffusion_timesteps: int,
max_beta: float = 0.999,
alpha_transform_type: Literal["cosine", "exp"] = "cosine",
) -> torch.Tensor:
"""
Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
(1-beta) over time from t = [0,1].
......@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
to that part of the diffusion process.
Args:
num_diffusion_timesteps (`int`): the number of betas to produce.
max_beta (`float`): the maximum beta to use; use values lower than 1 to
prevent singularities.
alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
Choose from `cosine` or `exp`
num_diffusion_timesteps (`int`):
The number of betas to produce.
max_beta (`float`, defaults to `0.999`):
The maximum beta to use; use values lower than 1 to avoid numerical instability.
alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
Returns:
betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
`torch.Tensor`:
The betas used by the scheduler to step the model outputs.
"""
if alpha_transform_type == "cosine":
......@@ -431,6 +432,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
"""
Apply dynamic thresholding to the predicted sample.
"Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
......@@ -438,6 +441,14 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
photorealism as well as better image-text alignment, especially when using very large guidance weights."
https://huggingface.co/papers/2205.11487
Args:
sample (`torch.Tensor`):
The predicted sample to be thresholded.
Returns:
`torch.Tensor`:
The thresholded sample.
"""
dtype = sample.dtype
batch_size, channels, *remaining_dims = sample.shape
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment