Unverified Commit 63dd6017 authored by David El Malih's avatar David El Malih Committed by GitHub
Browse files

Improve docstrings and type hints in scheduling_euler_discrete.py (#12654)

* refactor: enhance type hints and documentation in EulerDiscreteScheduler

Updated type hints for function parameters and return types in the EulerDiscreteScheduler class to improve code clarity and maintainability. Enhanced docstrings for several methods to provide clearer descriptions of their functionality and expected arguments. This includes specifying Literal types for certain parameters and ensuring consistent return type annotations across the class.

* refactor: enhance type hints and documentation across multiple schedulers

Updated type hints and improved docstrings in various scheduler classes, including CMStochasticIterativeScheduler, CosineDPMSolverMultistepScheduler, and others. This includes specifying parameter types, return types, and providing clearer descriptions of method functionalities. Notable changes include the addition of default values in the begin_index argument and enhanced explanations for noise addition methods. These improvements aim to enhance code clarity and maintainability across the scheduling module.

* refactor: update docstrings to clarify noise schedule construction

Revised docstrings across multiple scheduler classes to enhance clarity regarding the construction of noise schedules. Updated references to relevant papers, ensuring accurate citations for the methodologies used. This includes changes in DEISMultistepScheduler, DPMSolverMultistepInverseScheduler, and others, improving documentation consistency and readability.
parent eeae0338
......@@ -121,7 +121,7 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -287,7 +287,23 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
return c_skip, c_out
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -302,7 +318,14 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -410,6 +433,21 @@ class CMStochasticIterativeScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -137,7 +137,7 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -266,6 +266,19 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -537,6 +550,21 @@ class CosineDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -99,10 +99,11 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......
......@@ -98,10 +98,11 @@ def rescale_zero_terminal_snr(betas):
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......
......@@ -100,10 +100,11 @@ def rescale_zero_terminal_snr(betas):
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......
......@@ -97,10 +97,11 @@ def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......
......@@ -99,10 +99,11 @@ def rescale_zero_terminal_snr(betas):
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......
......@@ -230,7 +230,7 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -364,6 +364,19 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -399,7 +412,20 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -425,7 +451,19 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -449,7 +487,24 @@ class DEISMultistepScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -83,10 +83,11 @@ def rescale_zero_terminal_snr(betas):
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......@@ -323,7 +324,7 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -503,6 +504,19 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -537,7 +551,20 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -576,7 +603,19 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -600,7 +639,24 @@ class DPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -376,6 +376,19 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -411,7 +424,20 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -437,7 +463,19 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -461,7 +499,24 @@ class DPMSolverMultistepInverseScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -251,7 +251,23 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -266,7 +282,14 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -302,7 +325,7 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -430,6 +453,19 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -468,7 +504,19 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -492,7 +540,24 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -646,6 +711,21 @@ class DPMSolverSDEScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -295,7 +295,7 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -454,6 +454,19 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -489,7 +502,20 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -515,7 +541,19 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -539,7 +577,24 @@ class DPMSolverSinglestepScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -169,7 +169,7 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -342,6 +342,19 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -682,6 +695,21 @@ class EDMDPMSolverMultistepScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -155,7 +155,7 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -284,7 +284,23 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
return sigmas
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -299,7 +315,14 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -413,6 +436,21 @@ class EDMEulerScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -100,10 +100,11 @@ def rescale_zero_terminal_snr(betas):
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......@@ -245,7 +246,7 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -319,7 +320,23 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -334,7 +351,14 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -451,6 +475,21 @@ class EulerAncestralDiscreteScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
......@@ -97,16 +97,17 @@ def betas_for_alpha_bar(
# Copied from diffusers.schedulers.scheduling_ddim.rescale_zero_terminal_snr
def rescale_zero_terminal_snr(betas):
def rescale_zero_terminal_snr(betas: torch.Tensor) -> torch.Tensor:
"""
Rescales betas to have zero terminal SNR Based on https://huggingface.co/papers/2305.08891 (Algorithm 1)
Args:
betas (`torch.Tensor`):
the betas that the scheduler is being initialized with.
The betas that the scheduler is being initialized with.
Returns:
`torch.Tensor`: rescaled betas with zero terminal SNR
`torch.Tensor`:
Rescaled betas with zero terminal SNR.
"""
# Convert betas to alphas_bar_sqrt
alphas = 1.0 - betas
......@@ -146,17 +147,17 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
The starting `beta` value of inference.
beta_end (`float`, defaults to 0.02):
The final `beta` value.
beta_schedule (`str`, defaults to `"linear"`):
beta_schedule (`Literal["linear", "scaled_linear", "squaredcos_cap_v2"]`, defaults to `"linear"`):
The beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
`linear` or `scaled_linear`.
`"linear"`, `"scaled_linear"`, or `"squaredcos_cap_v2"`.
trained_betas (`np.ndarray`, *optional*):
Pass an array of betas directly to the constructor to bypass `beta_start` and `beta_end`.
prediction_type (`str`, defaults to `epsilon`, *optional*):
Prediction type of the scheduler function; can be `epsilon` (predicts the noise of the diffusion process),
`sample` (directly predicts the noisy sample`) or `v_prediction` (see section 2.4 of [Imagen
prediction_type (`Literal["epsilon", "sample", "v_prediction"]`, defaults to `"epsilon"`, *optional*):
Prediction type of the scheduler function; can be `"epsilon"` (predicts the noise of the diffusion
process), `"sample"` (directly predicts the noisy sample`) or `"v_prediction"` (see section 2.4 of [Imagen
Video](https://imagen.research.google/video/paper.pdf) paper).
interpolation_type(`str`, defaults to `"linear"`, *optional*):
The interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be on of
interpolation_type (`Literal["linear", "log_linear"]`, defaults to `"linear"`, *optional*):
The interpolation type to compute intermediate sigmas for the scheduler denoising steps. Should be one of
`"linear"` or `"log_linear"`.
use_karras_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use Karras sigmas for step sizes in the noise schedule during the sampling process. If `True`,
......@@ -166,18 +167,26 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
use_beta_sigmas (`bool`, *optional*, defaults to `False`):
Whether to use beta sigmas for step sizes in the noise schedule during the sampling process. Refer to [Beta
Sampling is All You Need](https://huggingface.co/papers/2407.12173) for more information.
timestep_spacing (`str`, defaults to `"linspace"`):
sigma_min (`float`, *optional*):
The minimum sigma value for the noise schedule. If not provided, defaults to the last sigma in the
schedule.
sigma_max (`float`, *optional*):
The maximum sigma value for the noise schedule. If not provided, defaults to the first sigma in the
schedule.
timestep_spacing (`Literal["linspace", "leading", "trailing"]`, defaults to `"linspace"`):
The way the timesteps should be scaled. Refer to Table 2 of the [Common Diffusion Noise Schedules and
Sample Steps are Flawed](https://huggingface.co/papers/2305.08891) for more information.
timestep_type (`Literal["discrete", "continuous"]`, defaults to `"discrete"`):
The type of timesteps to use. Can be `"discrete"` or `"continuous"`.
steps_offset (`int`, defaults to 0):
An offset added to the inference steps, as required by some model families.
rescale_betas_zero_snr (`bool`, defaults to `False`):
Whether to rescale the betas to have zero terminal SNR. This enables the model to generate very bright and
dark samples instead of limiting it to samples with medium brightness. Loosely related to
[`--offset_noise`](https://github.com/huggingface/diffusers/blob/74fd735eb073eb1d774b1ab4154a0876eb82f055/examples/dreambooth/train_dreambooth.py#L506).
final_sigmas_type (`str`, defaults to `"zero"`):
final_sigmas_type (`Literal["zero", "sigma_min"]`, defaults to `"zero"`):
The final `sigma` value for the noise schedule during the sampling process. If `"sigma_min"`, the final
sigma is the same as the last sigma in the training schedule. If `zero`, the final sigma is set to 0.
sigma is the same as the last sigma in the training schedule. If `"zero"`, the final sigma is set to 0.
"""
_compatibles = [e.name for e in KarrasDiffusionSchedulers]
......@@ -189,20 +198,20 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
num_train_timesteps: int = 1000,
beta_start: float = 0.0001,
beta_end: float = 0.02,
beta_schedule: str = "linear",
beta_schedule: Literal["linear", "scaled_linear", "squaredcos_cap_v2"] = "linear",
trained_betas: Optional[Union[np.ndarray, List[float]]] = None,
prediction_type: str = "epsilon",
interpolation_type: str = "linear",
prediction_type: Literal["epsilon", "sample", "v_prediction"] = "epsilon",
interpolation_type: Literal["linear", "log_linear"] = "linear",
use_karras_sigmas: Optional[bool] = False,
use_exponential_sigmas: Optional[bool] = False,
use_beta_sigmas: Optional[bool] = False,
sigma_min: Optional[float] = None,
sigma_max: Optional[float] = None,
timestep_spacing: str = "linspace",
timestep_type: str = "discrete", # can be "discrete" or "continuous"
timestep_spacing: Literal["linspace", "leading", "trailing"] = "linspace",
timestep_type: Literal["discrete", "continuous"] = "discrete",
steps_offset: int = 0,
rescale_betas_zero_snr: bool = False,
final_sigmas_type: str = "zero", # can be "zero" or "sigma_min"
final_sigmas_type: Literal["zero", "sigma_min"] = "zero",
):
if self.config.use_beta_sigmas and not is_scipy_available():
raise ImportError("Make sure to install scipy if you want to use beta sigmas.")
......@@ -259,8 +268,15 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
@property
def init_noise_sigma(self):
# standard deviation of the initial noise distribution
def init_noise_sigma(self) -> Union[float, torch.Tensor]:
"""
The standard deviation of the initial noise distribution.
Returns:
`float` or `torch.Tensor`:
The standard deviation of the initial noise distribution, computed based on the maximum sigma value and
the timestep spacing configuration.
"""
max_sigma = max(self.sigmas) if isinstance(self.sigmas, list) else self.sigmas.max()
if self.config.timestep_spacing in ["linspace", "trailing"]:
return max_sigma
......@@ -268,26 +284,34 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
return (max_sigma**2 + 1) ** 0.5
@property
def step_index(self):
def step_index(self) -> Optional[int]:
"""
The index counter for current timestep. It will increase 1 after each scheduler step.
The index counter for current timestep. It will increase by 1 after each scheduler step.
Returns:
`int` or `None`:
The current step index, or `None` if not initialized.
"""
return self._step_index
@property
def begin_index(self):
def begin_index(self) -> Optional[int]:
"""
The index for the first timestep. It should be set from pipeline with `set_begin_index` method.
Returns:
`int` or `None`:
The begin index for the scheduler, or `None` if not set.
"""
return self._begin_index
# Copied from diffusers.schedulers.scheduling_dpmsolver_multistep.DPMSolverMultistepScheduler.set_begin_index
def set_begin_index(self, begin_index: int = 0):
def set_begin_index(self, begin_index: int = 0) -> None:
"""
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -299,13 +323,13 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
Args:
sample (`torch.Tensor`):
The input sample.
timestep (`int`, *optional*):
The input sample to be scaled.
timestep (`float` or `torch.Tensor`):
The current timestep in the diffusion chain.
Returns:
`torch.Tensor`:
A scaled input sample.
A scaled input sample, divided by `(sigma**2 + 1) ** 0.5`.
"""
if self.step_index is None:
self._init_step_index(timestep)
......@@ -318,17 +342,18 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
def set_timesteps(
self,
num_inference_steps: int = None,
device: Union[str, torch.device] = None,
num_inference_steps: Optional[int] = None,
device: Optional[Union[str, torch.device]] = None,
timesteps: Optional[List[int]] = None,
sigmas: Optional[List[float]] = None,
):
) -> None:
"""
Sets the discrete timesteps used for the diffusion chain (to be run before inference).
Args:
num_inference_steps (`int`):
The number of diffusion steps used when generating samples with a pre-trained model.
num_inference_steps (`int`, *optional*):
The number of diffusion steps used when generating samples with a pre-trained model. If `None`,
`timesteps` or `sigmas` must be provided.
device (`str` or `torch.device`, *optional*):
The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
timesteps (`List[int]`, *optional*):
......@@ -336,10 +361,9 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
based on the `timestep_spacing` attribute. If `timesteps` is passed, `num_inference_steps` and `sigmas`
must be `None`, and `timestep_spacing` attribute will be ignored.
sigmas (`List[float]`, *optional*):
Custom sigmas used to support arbitrary timesteps schedule schedule. If `None`, timesteps and sigmas
will be generated based on the relevant scheduler attributes. If `sigmas` is passed,
`num_inference_steps` and `timesteps` must be `None`, and the timesteps will be generated based on the
custom sigmas schedule.
Custom sigmas used to support arbitrary timesteps schedule. If `None`, timesteps and sigmas will be
generated based on the relevant scheduler attributes. If `sigmas` is passed, `num_inference_steps` and
`timesteps` must be `None`, and the timesteps will be generated based on the custom sigmas schedule.
"""
if timesteps is not None and sigmas is not None:
......@@ -449,7 +473,20 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
self._begin_index = None
self.sigmas = sigmas.to("cpu") # to avoid too much CPU/GPU communication
def _sigma_to_t(self, sigma, log_sigmas):
def _sigma_to_t(self, sigma: np.ndarray, log_sigmas: np.ndarray) -> np.ndarray:
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -473,8 +510,21 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
return t
# Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L17
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -500,7 +550,19 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from https://github.com/crowsonkb/k-diffusion/blob/686dbad0f39640ea25c8a8c6a6e56bb40eacefa2/k_diffusion/sampling.py#L26
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -523,7 +585,24 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -551,7 +630,23 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
)
return sigmas
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -565,7 +660,14 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
return indices[pos].item()
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -591,26 +693,33 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
Args:
model_output (`torch.Tensor`):
The direct output from learned diffusion model.
timestep (`float`):
The direct output from the learned diffusion model.
timestep (`float` or `torch.Tensor`):
The current discrete timestep in the diffusion chain.
sample (`torch.Tensor`):
A current instance of a sample created by the diffusion process.
s_churn (`float`):
s_tmin (`float`):
s_tmax (`float`):
s_noise (`float`, defaults to 1.0):
s_churn (`float`, *optional*, defaults to `0.0`):
Stochasticity parameter that controls the amount of noise added during sampling. Higher values increase
randomness.
s_tmin (`float`, *optional*, defaults to `0.0`):
Minimum timestep threshold for applying stochasticity. Only timesteps above this value will have noise
added.
s_tmax (`float`, *optional*, defaults to `inf`):
Maximum timestep threshold for applying stochasticity. Only timesteps below this value will have noise
added.
s_noise (`float`, *optional*, defaults to `1.0`):
Scaling factor for noise added to the sample.
generator (`torch.Generator`, *optional*):
A random number generator.
return_dict (`bool`):
A random number generator for reproducible sampling.
return_dict (`bool`, *optional*, defaults to `True`):
Whether or not to return a [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or
tuple.
Returns:
[`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] or `tuple`:
If return_dict is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
returned, otherwise a tuple is returned where the first element is the sample tensor.
If `return_dict` is `True`, [`~schedulers.scheduling_euler_discrete.EulerDiscreteSchedulerOutput`] is
returned, otherwise a tuple is returned where the first element is the sample tensor and the second
element is the predicted original sample.
"""
if isinstance(timestep, (int, torch.IntTensor, torch.LongTensor)):
......@@ -689,6 +798,21 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......@@ -717,6 +841,24 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
return noisy_samples
def get_velocity(self, sample: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor:
"""
Compute the velocity prediction for the given sample and noise at the specified timesteps.
This method implements the velocity prediction used in v-prediction models, which predicts a linear combination
of the sample and noise.
Args:
sample (`torch.Tensor`):
The input sample for which to compute the velocity.
noise (`torch.Tensor`):
The noise tensor corresponding to the sample.
timesteps (`torch.Tensor`):
The timesteps at which to compute the velocity.
Returns:
`torch.Tensor`:
The velocity prediction computed as `sqrt(alpha_prod) * noise - sqrt(1 - alpha_prod) * sample`.
"""
if (
isinstance(timesteps, int)
or isinstance(timesteps, torch.IntTensor)
......@@ -753,5 +895,5 @@ class EulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
velocity = sqrt_alpha_prod * noise - sqrt_one_minus_alpha_prod * sample
return velocity
def __len__(self):
def __len__(self) -> int:
return self.config.num_train_timesteps
......@@ -160,7 +160,7 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -473,7 +473,20 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -499,7 +512,19 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -523,7 +548,24 @@ class FlowMatchEulerDiscreteScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -102,7 +102,7 @@ class FlowMatchHeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......
......@@ -168,7 +168,7 @@ class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -473,7 +473,20 @@ class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -499,7 +512,19 @@ class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -523,7 +548,24 @@ class FlowMatchLCMScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......
......@@ -188,7 +188,23 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
self.sigmas = self.sigmas.to("cpu") # to avoid too much CPU/GPU communication
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler.index_for_timestep
def index_for_timestep(self, timestep, schedule_timesteps=None):
def index_for_timestep(
self, timestep: Union[float, torch.Tensor], schedule_timesteps: Optional[torch.Tensor] = None
) -> int:
"""
Find the index of a given timestep in the timestep schedule.
Args:
timestep (`float` or `torch.Tensor`):
The timestep value to find in the schedule.
schedule_timesteps (`torch.Tensor`, *optional*):
The timestep schedule to search in. If `None`, uses `self.timesteps`.
Returns:
`int`:
The index of the timestep in the schedule. For the very first step, returns the second index if
multiple matches exist to avoid skipping a sigma when starting mid-schedule (e.g., for image-to-image).
"""
if schedule_timesteps is None:
schedule_timesteps = self.timesteps
......@@ -230,7 +246,7 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
Sets the begin index for the scheduler. This function should be run from pipeline before the inference.
Args:
begin_index (`int`):
begin_index (`int`, defaults to `0`):
The begin index for the scheduler.
"""
self._begin_index = begin_index
......@@ -355,6 +371,19 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._sigma_to_t
def _sigma_to_t(self, sigma, log_sigmas):
"""
Convert sigma values to corresponding timestep values through interpolation.
Args:
sigma (`np.ndarray`):
The sigma value(s) to convert to timestep(s).
log_sigmas (`np.ndarray`):
The logarithm of the sigma schedule used for interpolation.
Returns:
`np.ndarray`:
The interpolated timestep value(s) corresponding to the input sigma(s).
"""
# get log sigma
log_sigma = np.log(np.maximum(sigma, 1e-10))
......@@ -379,7 +408,20 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_karras
def _convert_to_karras(self, in_sigmas: torch.Tensor, num_inference_steps) -> torch.Tensor:
"""Constructs the noise schedule of Karras et al. (2022)."""
"""
Construct the noise schedule as proposed in [Elucidating the Design Space of Diffusion-Based Generative
Models](https://huggingface.co/papers/2206.00364).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following the Karras noise schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -405,7 +447,19 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._convert_to_exponential
def _convert_to_exponential(self, in_sigmas: torch.Tensor, num_inference_steps: int) -> torch.Tensor:
"""Constructs an exponential noise schedule."""
"""
Construct an exponential noise schedule.
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
Returns:
`torch.Tensor`:
The converted sigma values following an exponential schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -429,7 +483,24 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
def _convert_to_beta(
self, in_sigmas: torch.Tensor, num_inference_steps: int, alpha: float = 0.6, beta: float = 0.6
) -> torch.Tensor:
"""From "Beta Sampling is All You Need" [arXiv:2407.12173] (Lee et. al, 2024)"""
"""
Construct a beta noise schedule as proposed in [Beta Sampling is All You
Need](https://huggingface.co/papers/2407.12173).
Args:
in_sigmas (`torch.Tensor`):
The input sigma values to be converted.
num_inference_steps (`int`):
The number of inference steps to generate the noise schedule for.
alpha (`float`, *optional*, defaults to `0.6`):
The alpha parameter for the beta distribution.
beta (`float`, *optional*, defaults to `0.6`):
The beta parameter for the beta distribution.
Returns:
`torch.Tensor`:
The converted sigma values following a beta distribution schedule.
"""
# Hack to make sure that other schedulers which copy this function don't break
# TODO: Add this logic to the other schedulers
......@@ -462,7 +533,14 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
return self.dt is None
# Copied from diffusers.schedulers.scheduling_euler_discrete.EulerDiscreteScheduler._init_step_index
def _init_step_index(self, timestep):
def _init_step_index(self, timestep: Union[float, torch.Tensor]) -> None:
"""
Initialize the step index for the scheduler based on the given timestep.
Args:
timestep (`float` or `torch.Tensor`):
The current timestep to initialize the step index from.
"""
if self.begin_index is None:
if isinstance(timestep, torch.Tensor):
timestep = timestep.to(self.timesteps.device)
......@@ -580,6 +658,21 @@ class HeunDiscreteScheduler(SchedulerMixin, ConfigMixin):
noise: torch.Tensor,
timesteps: torch.Tensor,
) -> torch.Tensor:
"""
Add noise to the original samples according to the noise schedule at the specified timesteps.
Args:
original_samples (`torch.Tensor`):
The original samples to which noise will be added.
noise (`torch.Tensor`):
The noise tensor to add to the original samples.
timesteps (`torch.Tensor`):
The timesteps at which to add noise, determining the noise level from the schedule.
Returns:
`torch.Tensor`:
The noisy samples with added noise scaled according to the timestep schedule.
"""
# Make sure sigmas and timesteps have the same device and dtype as original_samples
sigmas = self.sigmas.to(device=original_samples.device, dtype=original_samples.dtype)
if original_samples.device.type == "mps" and torch.is_floating_point(timesteps):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment