[AudioLDM Docs] Update docstring (#4744)

0a0fe69a · Sanchit Gandhi · GitHub · 124e76dd · 0a0fe69a
Unverified Commit 0a0fe69a authored Aug 23, 2023 by Sanchit Gandhi Committed by GitHub Aug 23, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 5 deletions

src/diffusers/pipelines/audioldm/pipeline_audioldm.py src/diffusers/pipelines/audioldm/pipeline_audioldm.py +4 -5

No files found.
--- a/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
+++ b/src/diffusers/pipelines/audioldm/pipeline_audioldm.py
@@ -418,8 +418,7 @@ class AudioLDMPipeline(DiffusionPipeline):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
            return_dict (`bool`, *optional*, defaults to `True`):
-                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
+                Whether or not to return a [`~pipelines.AudioPipelineOutput`] instead of a plain tuple.
-                plain tuple.
            callback (`Callable`, *optional*):
                A function that calls every `callback_steps` steps during inference. The function is called with the
                following arguments: `callback(step: int, timestep: int, latents: torch.FloatTensor)`.
@@ -436,9 +435,9 @@ class AudioLDMPipeline(DiffusionPipeline):
        Examples:
        Returns:
-            [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] or `tuple`:
+            [`~pipelines.AudioPipelineOutput`] or `tuple`:
-                If `return_dict` is `True`, [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] is returned,
+                If `return_dict` is `True`, [`~pipelines.AudioPipelineOutput`] is returned, otherwise a `tuple` is
-                otherwise a `tuple` is returned where the first element is a list with the generated audio.
+                returned where the first element is a list with the generated audio.
        """
        # 0. Convert audio input length from seconds to spectrogram height
        vocoder_upsample_factor = np.prod(self.vocoder.config.upsample_rates) / self.vocoder.config.sampling_rate