[refactor] Make guiders return their inputs (#12213)

* update * update * apply review suggestions * remove guider inputs * fix tests

[refactor] Make guiders return their inputs (#12213)
* update * update * apply review suggestions * remove guider inputs * fix tests
a840c39a · Aryan · GitHub · 9a7ae77a · a840c39a · a840c39a
Unverified Commit a840c39a authored Aug 23, 2025 by Aryan Committed by GitHub Aug 23, 2025
12 changed files
--- a/src/diffusers/guiders/adaptive_projected_guidance.py
+++ b/src/diffusers/guiders/adaptive_projected_guidance.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from ..configuration_utils import register_to_config
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -92,7 +92,7 @@ class AdaptiveProjectedGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if not self._is_apg_enabled():
@@ -111,7 +111,7 @@ class AdaptiveProjectedGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/auto_guidance.py
+++ b/src/diffusers/guiders/auto_guidance.py
@@ -20,7 +20,7 @@ import torch
 from ..configuration_utils import register_to_config
 from ..hooks import HookRegistry, LayerSkipConfig
 from ..hooks.layer_skip import _apply_layer_skip_hook
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -145,7 +145,7 @@ class AutoGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if not self._is_ag_enabled():
@@ -158,7 +158,7 @@ class AutoGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/classifier_free_guidance.py
+++ b/src/diffusers/guiders/classifier_free_guidance.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from ..configuration_utils import register_to_config
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -96,7 +96,7 @@ class ClassifierFreeGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if not self._is_cfg_enabled():
@@ -109,7 +109,7 @@ class ClassifierFreeGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/classifier_free_zero_star_guidance.py
+++ b/src/diffusers/guiders/classifier_free_zero_star_guidance.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from ..configuration_utils import register_to_config
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -89,7 +89,7 @@ class ClassifierFreeZeroStarGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if self._step < self.zero_init_steps:
@@ -109,7 +109,7 @@ class ClassifierFreeZeroStarGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/frequency_decoupled_guidance.py
+++ b/src/diffusers/guiders/frequency_decoupled_guidance.py
@@ -19,7 +19,7 @@ import torch
 from ..configuration_utils import register_to_config
 from ..utils import is_kornia_available
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -230,7 +230,7 @@ class FrequencyDecoupledGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if not self._is_fdg_enabled():
@@ -277,7 +277,7 @@ class FrequencyDecoupledGuidance(BaseGuidance):
            if self.guidance_rescale_space == "data" and self.guidance_rescale[0] > 0.0:
                pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale[0])
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/guider_utils.py
+++ b/src/diffusers/guiders/guider_utils.py
@@ -20,7 +20,7 @@ from huggingface_hub.utils import validate_hf_hub_args
 from typing_extensions import Self
 from ..configuration_utils import ConfigMixin
-from ..utils import PushToHubMixin, get_logger
+from ..utils import BaseOutput, PushToHubMixin, get_logger
 if TYPE_CHECKING:
@@ -284,6 +284,12 @@ class BaseGuidance(ConfigMixin, PushToHubMixin):
        self.save_config(save_directory=save_directory, push_to_hub=push_to_hub, **kwargs)
+class GuiderOutput(BaseOutput):
+    pred: torch.Tensor
+    pred_cond: Optional[torch.Tensor]
+    pred_uncond: Optional[torch.Tensor]
 def rescale_noise_cfg(noise_cfg, noise_pred_text, guidance_rescale=0.0):
    r"""
    Rescales `noise_cfg` tensor based on `guidance_rescale` to improve image quality and fix overexposure. Based on

--- a/src/diffusers/guiders/perturbed_attention_guidance.py
+++ b/src/diffusers/guiders/perturbed_attention_guidance.py
@@ -21,7 +21,7 @@ from ..configuration_utils import register_to_config
 from ..hooks import HookRegistry, LayerSkipConfig
 from ..hooks.layer_skip import _apply_layer_skip_hook
 from ..utils import get_logger
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -197,7 +197,7 @@ class PerturbedAttentionGuidance(BaseGuidance):
        pred_cond: torch.Tensor,
        pred_uncond: Optional[torch.Tensor] = None,
        pred_cond_skip: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
+    ) -> GuiderOutput:
        pred = None
        if not self._is_cfg_enabled() and not self._is_slg_enabled():
@@ -219,7 +219,7 @@ class PerturbedAttentionGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    # Copied from diffusers.guiders.skip_layer_guidance.SkipLayerGuidance.is_conditional

--- a/src/diffusers/guiders/skip_layer_guidance.py
+++ b/src/diffusers/guiders/skip_layer_guidance.py
@@ -20,7 +20,7 @@ import torch
 from ..configuration_utils import register_to_config
 from ..hooks import HookRegistry, LayerSkipConfig
 from ..hooks.layer_skip import _apply_layer_skip_hook
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -192,7 +192,7 @@ class SkipLayerGuidance(BaseGuidance):
        pred_cond: torch.Tensor,
        pred_uncond: Optional[torch.Tensor] = None,
        pred_cond_skip: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
+    ) -> GuiderOutput:
        pred = None
        if not self._is_cfg_enabled() and not self._is_slg_enabled():
@@ -214,7 +214,7 @@ class SkipLayerGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/smoothed_energy_guidance.py
+++ b/src/diffusers/guiders/smoothed_energy_guidance.py
@@ -20,7 +20,7 @@ import torch
 from ..configuration_utils import register_to_config
 from ..hooks import HookRegistry
 from ..hooks.smoothed_energy_guidance_utils import SmoothedEnergyGuidanceConfig, _apply_smoothed_energy_guidance_hook
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -181,7 +181,7 @@ class SmoothedEnergyGuidance(BaseGuidance):
        pred_cond: torch.Tensor,
        pred_uncond: Optional[torch.Tensor] = None,
        pred_cond_seg: Optional[torch.Tensor] = None,
-    ) -> torch.Tensor:
+    ) -> GuiderOutput:
        pred = None
        if not self._is_cfg_enabled() and not self._is_seg_enabled():
@@ -203,7 +203,7 @@ class SmoothedEnergyGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/guiders/tangential_classifier_free_guidance.py
+++ b/src/diffusers/guiders/tangential_classifier_free_guidance.py
@@ -18,7 +18,7 @@ from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
 import torch
 from ..configuration_utils import register_to_config
-from .guider_utils import BaseGuidance, rescale_noise_cfg
+from .guider_utils import BaseGuidance, GuiderOutput, rescale_noise_cfg
 if TYPE_CHECKING:
@@ -78,7 +78,7 @@ class TangentialClassifierFreeGuidance(BaseGuidance):
            data_batches.append(data_batch)
        return data_batches
-    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> torch.Tensor:
+    def forward(self, pred_cond: torch.Tensor, pred_uncond: Optional[torch.Tensor] = None) -> GuiderOutput:
        pred = None
        if not self._is_tcfg_enabled():
@@ -89,7 +89,7 @@ class TangentialClassifierFreeGuidance(BaseGuidance):
        if self.guidance_rescale > 0.0:
            pred = rescale_noise_cfg(pred, pred_cond, self.guidance_rescale)
-        return pred, {}
+        return GuiderOutput(pred=pred, pred_cond=pred_cond, pred_uncond=pred_uncond)
    @property
    def is_conditional(self) -> bool:

--- a/src/diffusers/modular_pipelines/stable_diffusion_xl/denoise.py
+++ b/src/diffusers/modular_pipelines/stable_diffusion_xl/denoise.py
@@ -238,7 +238,7 @@ class StableDiffusionXLLoopDenoiser(ModularPipelineBlocks):
            components.guider.cleanup_models(components.unet)
        # Perform guidance
-        block_state.noise_pred, block_state.scheduler_step_kwargs = components.guider(guider_state)
+        block_state.noise_pred = components.guider(guider_state)[0]
        return components, block_state
@@ -433,7 +433,7 @@ class StableDiffusionXLControlNetLoopDenoiser(ModularPipelineBlocks):
            components.guider.cleanup_models(components.unet)
        # Perform guidance
-        block_state.noise_pred, block_state.scheduler_step_kwargs = components.guider(guider_state)
+        block_state.noise_pred = components.guider(guider_state)[0]
        return components, block_state
@@ -492,7 +492,6 @@ class StableDiffusionXLLoopAfterDenoiser(ModularPipelineBlocks):
            t,
            block_state.latents,
            **block_state.extra_step_kwargs,
-            **block_state.scheduler_step_kwargs,
            return_dict=False,
        )[0]
@@ -590,7 +589,6 @@ class StableDiffusionXLInpaintLoopAfterDenoiser(ModularPipelineBlocks):
            t,
            block_state.latents,
            **block_state.extra_step_kwargs,
-            **block_state.scheduler_step_kwargs,
            return_dict=False,
        )[0]

--- a/src/diffusers/modular_pipelines/wan/denoise.py
+++ b/src/diffusers/modular_pipelines/wan/denoise.py
@@ -127,7 +127,7 @@ class WanLoopDenoiser(ModularPipelineBlocks):
            components.guider.cleanup_models(components.transformer)
        # Perform guidance
-        block_state.noise_pred, block_state.scheduler_step_kwargs = components.guider(guider_state)
+        block_state.noise_pred = components.guider(guider_state)[0]
        return components, block_state
@@ -171,7 +171,6 @@ class WanLoopAfterDenoiser(ModularPipelineBlocks):
            block_state.noise_pred.float(),
            t,
            block_state.latents.float(),
-            **block_state.scheduler_step_kwargs,
            return_dict=False,
        )[0]