Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
diffusers
Commits
fa3c86be
Unverified
Commit
fa3c86be
authored
Dec 18, 2023
by
Patrick von Platen
Committed by
GitHub
Dec 18, 2023
Browse files
[SVD] Fix guidance scale (#6002)
* [SVD] Fix guidance scale * make style
parent
7d0a47f3
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
7 deletions
+9
-7
src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
...stable_video_diffusion/pipeline_stable_video_diffusion.py
+9
-7
No files found.
src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
View file @
fa3c86be
...
@@ -291,7 +291,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -291,7 +291,9 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
# corresponds to doing no classifier free guidance.
# corresponds to doing no classifier free guidance.
@
property
@
property
def
do_classifier_free_guidance
(
self
):
def
do_classifier_free_guidance
(
self
):
return
self
.
_guidance_scale
>
1
and
self
.
unet
.
config
.
time_cond_proj_dim
is
None
if
isinstance
(
self
.
guidance_scale
,
(
int
,
float
)):
return
self
.
guidance_scale
return
self
.
guidance_scale
.
max
()
>
1
@
property
@
property
def
num_timesteps
(
self
):
def
num_timesteps
(
self
):
...
@@ -416,10 +418,10 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -416,10 +418,10 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# here `guidance_scale` is defined analog to the guidance weight `w` of equation (2)
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# of the Imagen paper: https://arxiv.org/pdf/2205.11487.pdf . `guidance_scale = 1`
# corresponds to doing no classifier free guidance.
# corresponds to doing no classifier free guidance.
do_classifier_free
_guidance
=
max_guidance_scale
>
1.0
self
.
_guidance
_scale
=
max_guidance_scale
# 3. Encode input image
# 3. Encode input image
image_embeddings
=
self
.
_encode_image
(
image
,
device
,
num_videos_per_prompt
,
do_classifier_free_guidance
)
image_embeddings
=
self
.
_encode_image
(
image
,
device
,
num_videos_per_prompt
,
self
.
do_classifier_free_guidance
)
# NOTE: Stable Diffusion Video was conditioned on fps - 1, which
# NOTE: Stable Diffusion Video was conditioned on fps - 1, which
# is why it is reduced here.
# is why it is reduced here.
...
@@ -435,7 +437,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -435,7 +437,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
if
needs_upcasting
:
if
needs_upcasting
:
self
.
vae
.
to
(
dtype
=
torch
.
float32
)
self
.
vae
.
to
(
dtype
=
torch
.
float32
)
image_latents
=
self
.
_encode_vae_image
(
image
,
device
,
num_videos_per_prompt
,
do_classifier_free_guidance
)
image_latents
=
self
.
_encode_vae_image
(
image
,
device
,
num_videos_per_prompt
,
self
.
do_classifier_free_guidance
)
image_latents
=
image_latents
.
to
(
image_embeddings
.
dtype
)
image_latents
=
image_latents
.
to
(
image_embeddings
.
dtype
)
# cast back to fp16 if needed
# cast back to fp16 if needed
...
@@ -454,7 +456,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -454,7 +456,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
image_embeddings
.
dtype
,
image_embeddings
.
dtype
,
batch_size
,
batch_size
,
num_videos_per_prompt
,
num_videos_per_prompt
,
do_classifier_free_guidance
,
self
.
do_classifier_free_guidance
,
)
)
added_time_ids
=
added_time_ids
.
to
(
device
)
added_time_ids
=
added_time_ids
.
to
(
device
)
...
@@ -490,7 +492,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -490,7 +492,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
with
self
.
progress_bar
(
total
=
num_inference_steps
)
as
progress_bar
:
with
self
.
progress_bar
(
total
=
num_inference_steps
)
as
progress_bar
:
for
i
,
t
in
enumerate
(
timesteps
):
for
i
,
t
in
enumerate
(
timesteps
):
# expand the latents if we are doing classifier free guidance
# expand the latents if we are doing classifier free guidance
latent_model_input
=
torch
.
cat
([
latents
]
*
2
)
if
do_classifier_free_guidance
else
latents
latent_model_input
=
torch
.
cat
([
latents
]
*
2
)
if
self
.
do_classifier_free_guidance
else
latents
latent_model_input
=
self
.
scheduler
.
scale_model_input
(
latent_model_input
,
t
)
latent_model_input
=
self
.
scheduler
.
scale_model_input
(
latent_model_input
,
t
)
# Concatenate image_latents over channels dimention
# Concatenate image_latents over channels dimention
...
@@ -506,7 +508,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
...
@@ -506,7 +508,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
)[
0
]
)[
0
]
# perform guidance
# perform guidance
if
do_classifier_free_guidance
:
if
self
.
do_classifier_free_guidance
:
noise_pred_uncond
,
noise_pred_cond
=
noise_pred
.
chunk
(
2
)
noise_pred_uncond
,
noise_pred_cond
=
noise_pred
.
chunk
(
2
)
noise_pred
=
noise_pred_uncond
+
self
.
guidance_scale
*
(
noise_pred_cond
-
noise_pred_uncond
)
noise_pred
=
noise_pred_uncond
+
self
.
guidance_scale
*
(
noise_pred_cond
-
noise_pred_uncond
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment