Improve docstrings and type hints in scheduling_ddpm.py (#12651)

* Enhance type hints and docstrings in scheduling_ddpm.py - Added type hints for function parameters and return types across the DDPMScheduler class and related functions. - Improved docstrings for clarity, including detailed descriptions of parameters and return values. - Updated the alpha_transform_type and beta_schedule parameters to use Literal types for better type safety. - Refined the _get_variance and previous_timestep methods with comprehensive documentation. * Refactor docstrings and type hints in scheduling_ddpm.py - Cleaned up whitespace in the rescale_zero_terminal_snr function. - Enhanced the variance_type parameter in the DDPMScheduler class with improved formatting for better readability. - Updated the docstring for the compute_variance method to maintain consistency and clarity in parameter descriptions and return values. * Apply `make fix-copies` * Refactor type hints across multiple scheduler files - Updated type hints to include `Literal` for improved type safety in various scheduling files. - Ensured consistency in type hinting for parameters and return types across the affected modules. - This change enhances code clarity and maintainability.

Improve docstrings and type hints in scheduling_ddpm.py (#12651)
* Enhance type hints and docstrings in scheduling_ddpm.py - Added type hints for function parameters and return types across the DDPMScheduler class and related functions. - Improved docstrings for clarity, including detailed descriptions of parameters and return values. - Updated the alpha_transform_type and beta_schedule parameters to use Literal types for better type safety. - Refined the _get_variance and previous_timestep methods with comprehensive documentation. * Refactor docstrings and type hints in scheduling_ddpm.py - Cleaned up whitespace in the rescale_zero_terminal_snr function. - Enhanced the variance_type parameter in the DDPMScheduler class with improved formatting for better readability. - Updated the docstring for the compute_variance method to maintain consistency and clarity in parameter descriptions and return values. * Apply `make fix-copies` * Refactor type hints across multiple scheduler files - Updated type hints to include `Literal` for improved type safety in various scheduling files. - Ensured consistency in type hinting for parameters and return types across the affected modules. - This change enhances code clarity and maintainability.
3c1ca869 · David El Malih · GitHub · 6fe4a6ff · 3c1ca869 · 3c1ca869
Unverified Commit 3c1ca869 authored Nov 13, 2025 by David El Malih Committed by GitHub Nov 13, 2025
7 changed files
--- a/src/diffusers/schedulers/scheduling_lms_discrete.py
+++ b/src/diffusers/schedulers/scheduling_lms_discrete.py
@@ -14,7 +14,7 @@
 import math
 import warnings
 from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 import numpy as np
 import scipy.stats
@@ -47,10 +47,10 @@ class LMSDiscreteSchedulerOutput(BaseOutput):
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -58,16 +58,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":

--- a/src/diffusers/schedulers/scheduling_pndm.py
+++ b/src/diffusers/schedulers/scheduling_pndm.py
@@ -15,7 +15,7 @@
 # DISCLAIMER: This file is strongly influenced by https://github.com/ermongroup/ddim
 import math
-from typing import List, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -26,10 +26,10 @@ from .scheduling_utils import KarrasDiffusionSchedulers, SchedulerMixin, Schedul
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -37,16 +37,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":
@@ -452,6 +453,22 @@ class PNDMScheduler(SchedulerMixin, ConfigMixin):
        noise: torch.Tensor,
        timesteps: torch.IntTensor,
    ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
+        diffusion process).
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise to add to the samples.
+            timesteps (`torch.IntTensor`):
+                The timesteps indicating the noise level for each sample.
+        Returns:
+            `torch.Tensor`:
+                The noisy samples.
+        """
        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
        # for the subsequent add_noise calls

--- a/src/diffusers/schedulers/scheduling_repaint.py
+++ b/src/diffusers/schedulers/scheduling_repaint.py
@@ -14,7 +14,7 @@
 import math
 from dataclasses import dataclass
-from typing import Optional, Tuple, Union
+from typing import Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -45,10 +45,10 @@ class RePaintSchedulerOutput(BaseOutput):
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -56,16 +56,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":

--- a/src/diffusers/schedulers/scheduling_sasolver.py
+++ b/src/diffusers/schedulers/scheduling_sasolver.py
@@ -16,7 +16,7 @@
 # The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
 import math
-from typing import Callable, List, Optional, Tuple, Union
+from typing import Callable, List, Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -33,10 +33,10 @@ if is_scipy_available():
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -44,16 +44,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":
@@ -342,6 +343,8 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
        """
+        Apply dynamic thresholding to the predicted sample.
        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
@@ -349,6 +352,14 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
        photorealism as well as better image-text alignment, especially when using very large guidance weights."
        https://huggingface.co/papers/2205.11487
+        Args:
+            sample (`torch.Tensor`):
+                The predicted sample to be thresholded.
+        Returns:
+            `torch.Tensor`:
+                The thresholded sample.
        """
        dtype = sample.dtype
        batch_size, channels, *remaining_dims = sample.shape
@@ -1193,6 +1204,22 @@ class SASolverScheduler(SchedulerMixin, ConfigMixin):
        noise: torch.Tensor,
        timesteps: torch.IntTensor,
    ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
+        diffusion process).
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise to add to the samples.
+            timesteps (`torch.IntTensor`):
+                The timesteps indicating the noise level for each sample.
+        Returns:
+            `torch.Tensor`:
+                The noisy samples.
+        """
        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
        # for the subsequent add_noise calls

--- a/src/diffusers/schedulers/scheduling_tcd.py
+++ b/src/diffusers/schedulers/scheduling_tcd.py
@@ -17,7 +17,7 @@
 import math
 from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -50,10 +50,10 @@ class TCDSchedulerOutput(BaseOutput):
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -61,16 +61,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":
@@ -345,6 +346,8 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
        """
+        Apply dynamic thresholding to the predicted sample.
        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
@@ -352,6 +355,14 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
        photorealism as well as better image-text alignment, especially when using very large guidance weights."
        https://huggingface.co/papers/2205.11487
+        Args:
+            sample (`torch.Tensor`):
+                The predicted sample to be thresholded.
+        Returns:
+            `torch.Tensor`:
+                The thresholded sample.
        """
        dtype = sample.dtype
        batch_size, channels, *remaining_dims = sample.shape
@@ -651,6 +662,22 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
        noise: torch.Tensor,
        timesteps: torch.IntTensor,
    ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
+        diffusion process).
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise to add to the samples.
+            timesteps (`torch.IntTensor`):
+                The timesteps indicating the noise level for each sample.
+        Returns:
+            `torch.Tensor`:
+                The noisy samples.
+        """
        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
        # for the subsequent add_noise calls
@@ -673,6 +700,21 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.get_velocity
    def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.IntTensor) -> torch.Tensor:
+        """
+        Compute the velocity prediction from the sample and noise according to the velocity formula.
+        Args:
+            sample (`torch.Tensor`):
+                The input sample.
+            noise (`torch.Tensor`):
+                The noise tensor.
+            timesteps (`torch.IntTensor`):
+                The timesteps for velocity computation.
+        Returns:
+            `torch.Tensor`:
+                The computed velocity.
+        """
        # Make sure alphas_cumprod and timestep have same device and dtype as sample
        self.alphas_cumprod = self.alphas_cumprod.to(device=sample.device)
        alphas_cumprod = self.alphas_cumprod.to(dtype=sample.dtype)
@@ -696,6 +738,17 @@ class TCDScheduler(SchedulerMixin, ConfigMixin):
    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler.previous_timestep
    def previous_timestep(self, timestep):
+        """
+        Compute the previous timestep in the diffusion chain.
+        Args:
+            timestep (`int`):
+                The current timestep.
+        Returns:
+            `int`:
+                The previous timestep.
+        """
        if self.custom_timesteps or self.num_inference_steps:
            index = (self.timesteps == timestep).nonzero(as_tuple=True)[0][0]
            if index == self.timesteps.shape[0] - 1:

--- a/src/diffusers/schedulers/scheduling_unclip.py
+++ b/src/diffusers/schedulers/scheduling_unclip.py
@@ -14,7 +14,7 @@
 import math
 from dataclasses import dataclass
-from typing import Optional, Tuple, Union
+from typing import Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -46,10 +46,10 @@ class UnCLIPSchedulerOutput(BaseOutput):
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -57,16 +57,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":
@@ -334,6 +335,22 @@ class UnCLIPScheduler(SchedulerMixin, ConfigMixin):
        noise: torch.Tensor,
        timesteps: torch.IntTensor,
    ) -> torch.Tensor:
+        """
+        Add noise to the original samples according to the noise magnitude at each timestep (this is the forward
+        diffusion process).
+        Args:
+            original_samples (`torch.Tensor`):
+                The original samples to which noise will be added.
+            noise (`torch.Tensor`):
+                The noise to add to the samples.
+            timesteps (`torch.IntTensor`):
+                The timesteps indicating the noise level for each sample.
+        Returns:
+            `torch.Tensor`:
+                The noisy samples.
+        """
        # Make sure alphas_cumprod and timestep have same device and dtype as original_samples
        # Move the self.alphas_cumprod to device to avoid redundant CPU to GPU data movement
        # for the subsequent add_noise calls

--- a/src/diffusers/schedulers/scheduling_unipc_multistep.py
+++ b/src/diffusers/schedulers/scheduling_unipc_multistep.py
@@ -16,7 +16,7 @@
 # The codebase is modified based on https://github.com/huggingface/diffusers/blob/main/src/diffusers/schedulers/scheduling_dpmsolver_multistep.py
 import math
-from typing import List, Optional, Tuple, Union
+from typing import List, Literal, Optional, Tuple, Union
 import numpy as np
 import torch
@@ -32,10 +32,10 @@ if is_scipy_available():
 # Copied from diffusers.schedulers.scheduling_ddpm.betas_for_alpha_bar
 def betas_for_alpha_bar(
-    num_diffusion_timesteps,
+    num_diffusion_timesteps: int,
-    max_beta=0.999,
+    max_beta: float = 0.999,
-    alpha_transform_type="cosine",
+    alpha_transform_type: Literal["cosine", "exp"] = "cosine",
-):
+) -> torch.Tensor:
    """
    Create a beta schedule that discretizes the given alpha_t_bar function, which defines the cumulative product of
    (1-beta) over time from t = [0,1].
@@ -43,16 +43,17 @@ def betas_for_alpha_bar(
    Contains a function alpha_bar that takes an argument t and transforms it to the cumulative product of (1-beta) up
    to that part of the diffusion process.
    Args:
-        num_diffusion_timesteps (`int`): the number of betas to produce.
+        num_diffusion_timesteps (`int`):
-        max_beta (`float`): the maximum beta to use; use values lower than 1 to
+            The number of betas to produce.
-                     prevent singularities.
+        max_beta (`float`, defaults to `0.999`):
-        alpha_transform_type (`str`, *optional*, default to `cosine`): the type of noise schedule for alpha_bar.
+            The maximum beta to use; use values lower than 1 to avoid numerical instability.
-                     Choose from `cosine` or `exp`
+        alpha_transform_type (`"cosine"` or `"exp"`, defaults to `"cosine"`):
+            The type of noise schedule for `alpha_bar`. Choose from `cosine` or `exp`.
    Returns:
-        betas (`np.ndarray`): the betas used by the scheduler to step the model outputs
+        `torch.Tensor`:
+            The betas used by the scheduler to step the model outputs.
    """
    if alpha_transform_type == "cosine":
@@ -431,6 +432,8 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
    # Copied from diffusers.schedulers.scheduling_ddpm.DDPMScheduler._threshold_sample
    def _threshold_sample(self, sample: torch.Tensor) -> torch.Tensor:
        """
+        Apply dynamic thresholding to the predicted sample.
        "Dynamic thresholding: At each sampling step we set s to a certain percentile absolute pixel value in xt0 (the
        prediction of x_0 at timestep t), and if s > 1, then we threshold xt0 to the range [-s, s] and then divide by
        s. Dynamic thresholding pushes saturated pixels (those near -1 and 1) inwards, thereby actively preventing
@@ -438,6 +441,14 @@ class UniPCMultistepScheduler(SchedulerMixin, ConfigMixin):
        photorealism as well as better image-text alignment, especially when using very large guidance weights."
        https://huggingface.co/papers/2205.11487
+        Args:
+            sample (`torch.Tensor`):
+                The predicted sample to be thresholded.
+        Returns:
+            `torch.Tensor`:
+                The thresholded sample.
        """
        dtype = sample.dtype
        batch_size, channels, *remaining_dims = sample.shape