Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
fa3c86be
Unverified
Commit
fa3c86be
authored
Dec 18, 2023
by
Patrick von Platen
Committed by
GitHub
Dec 18, 2023
Browse files
[SVD] Fix guidance scale (#6002)
* [SVD] Fix guidance scale * make style
parent
7d0a47f3
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
7 deletions
+9
-7
src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
...stable_video_diffusion/pipeline_stable_video_diffusion.py
+9
-7
No files found.
src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
View file @
fa3c86be
...
...
@@ -291,7 +291,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
# corresponds to doing no classifier free guidance.
@
property
def
do_classifier_free_guidance
(
self
):
return
self
.
_guidance_scale
>
1
and
self
.
unet
.
config
.
time_cond_proj_dim
is
None
if
isinstance
(
self
.
guidance_scale
,
(
int
,
float
)):
return
self
.
guidance_scale
return
self
.
guidance_scale
.
max
()
>
1
@
property
def
num_timesteps
(
self
):
...
...
@@ -416,10 +418,10 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
do_classifier_free
_guidance
=
max_guidance_scale
>
1.0
self
.
_guidance
_scale
=
max_guidance_scale
# 3. Encode input image
image_embeddings
=
self
.
_encode_image
(
image
,
device
,
num_videos_per_prompt
,
do_classifier_free_guidance
)
image_embeddings
=
self
.
_encode_image
(
image
,
device
,
num_videos_per_prompt
,
self
.
do_classifier_free_guidance
)
# NOTE: Stable Diffusion Video was conditioned on fps - 1, which
# is why it is reduced here.
...
...
@@ -435,7 +437,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
if
needs_upcasting
:
self
.
vae
.
to
(
dtype
=
torch
.
float32
)
image_latents
=
self
.
_encode_vae_image
(
image
,
device
,
num_videos_per_prompt
,
do_classifier_free_guidance
)
image_latents
=
self
.
_encode_vae_image
(
image
,
device
,
num_videos_per_prompt
,
self
.
do_classifier_free_guidance
)
image_latents
=
image_latents
.
to
(
image_embeddings
.
dtype
)
# cast back to fp16 if needed
...
...
@@ -454,7 +456,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
image_embeddings
.
dtype
,
batch_size
,
num_videos_per_prompt
,
do_classifier_free_guidance
,
self
.
do_classifier_free_guidance
,
)
added_time_ids
=
added_time_ids
.
to
(
device
)
...
...
@@ -490,7 +492,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
with
self
.
progress_bar
(
total
=
num_inference_steps
)
as
progress_bar
:
for
i
,
t
in
enumerate
(
timesteps
):
# expand the latents if we are doing classifier free guidance
latent_model_input
=
torch
.
cat
([
latents
]
*
2
)
if
do_classifier_free_guidance
else
latents
latent_model_input
=
torch
.
cat
([
latents
]
*
2
)
if
self
.
do_classifier_free_guidance
else
latents
latent_model_input
=
self
.
scheduler
.
scale_model_input
(
latent_model_input
,
t
)
# Concatenate image_latents over channels dimention
...
...
@@ -506,7 +508,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
)[
0
]
# perform guidance
if
do_classifier_free_guidance
:
if
self
.
do_classifier_free_guidance
:
noise_pred_uncond
,
noise_pred_cond
=
noise_pred
.
chunk
(
2
)
noise_pred
=
noise_pred_uncond
+
self
.
guidance_scale
*
(
noise_pred_cond
-
noise_pred_uncond
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment