Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
diffusers
Commits
e3568d14
Unverified
Commit
e3568d14
authored
Aug 07, 2024
by
Dhruv Nair
Committed by
GitHub
Aug 07, 2024
Browse files
Freenoise change `vae_batch_size` to `decode_chunk_size` (#9110)
* update * update
parent
f6df2244
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
29 deletions
+30
-29
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+6
-6
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
.../pipelines/animatediff/pipeline_animatediff_controlnet.py
+5
-5
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
...pipelines/animatediff/pipeline_animatediff_video2video.py
+14
-13
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
+5
-5
No files found.
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
View file @
e3568d14
...
...
@@ -396,15 +396,15 @@ class AnimateDiffPipeline(
return
ip_adapter_image_embeds
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
...
...
@@ -582,7 +582,7 @@ class AnimateDiffPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
**
kwargs
,
):
r
"""
...
...
@@ -651,7 +651,7 @@ class AnimateDiffPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
...
...
@@ -824,7 +824,7 @@ class AnimateDiffPipeline(
if
output_type
==
"latent"
:
video
=
latents
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
View file @
e3568d14
...
...
@@ -435,15 +435,15 @@ class AnimateDiffControlNetPipeline(
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
...
...
@@ -728,7 +728,7 @@ class AnimateDiffControlNetPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
r
"""
The call function to the pipeline for generation.
...
...
@@ -1064,7 +1064,7 @@ class AnimateDiffControlNetPipeline(
if
output_type
==
"latent"
:
video
=
latents
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
View file @
e3568d14
...
...
@@ -500,24 +500,24 @@ class AnimateDiffVideoToVideoPipeline(
return
ip_adapter_image_embeds
def
encode_video
(
self
,
video
,
generator
,
vae_batch
_size
:
int
=
16
)
->
torch
.
Tensor
:
def
encode_video
(
self
,
video
,
generator
,
decode_chunk
_size
:
int
=
16
)
->
torch
.
Tensor
:
latents
=
[]
for
i
in
range
(
0
,
len
(
video
),
vae_batch
_size
):
batch_video
=
video
[
i
:
i
+
vae_batch
_size
]
for
i
in
range
(
0
,
len
(
video
),
decode_chunk
_size
):
batch_video
=
video
[
i
:
i
+
decode_chunk
_size
]
batch_video
=
retrieve_latents
(
self
.
vae
.
encode
(
batch_video
),
generator
=
generator
)
latents
.
append
(
batch_video
)
return
torch
.
cat
(
latents
)
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
...
...
@@ -638,7 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
device
,
generator
,
latents
=
None
,
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
if
latents
is
None
:
num_frames
=
video
.
shape
[
1
]
...
...
@@ -673,10 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
)
init_latents
=
[
self
.
encode_video
(
video
[
i
],
generator
[
i
],
vae_batch_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
self
.
encode_video
(
video
[
i
],
generator
[
i
],
decode_chunk_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
]
else
:
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
vae_batch
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
decode_chunk
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
torch
.
cat
(
init_latents
,
dim
=
0
)
...
...
@@ -761,7 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
r
"""
The call function to the pipeline for generation.
...
...
@@ -837,7 +838,7 @@ class AnimateDiffVideoToVideoPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
...
...
@@ -940,7 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
device
=
device
,
generator
=
generator
,
latents
=
latents
,
vae_batch_size
=
vae_batch
_size
,
decode_chunk_size
=
decode_chunk
_size
,
)
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
...
...
@@ -1008,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
if
output_type
==
"latent"
:
video
=
latents
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
...
...
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
View file @
e3568d14
...
...
@@ -407,15 +407,15 @@ class AnimateDiffPAGPipeline(
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
...
...
@@ -588,7 +588,7 @@ class AnimateDiffPAGPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
pag_scale
:
float
=
3.0
,
pag_adaptive_scale
:
float
=
0.0
,
):
...
...
@@ -847,7 +847,7 @@ class AnimateDiffPAGPipeline(
if
output_type
==
"latent"
:
video
=
latents
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment