Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
diffusers
Commits
e3568d14
Unverified
Commit
e3568d14
authored
Aug 07, 2024
by
Dhruv Nair
Committed by
GitHub
Aug 07, 2024
Browse files
Freenoise change `vae_batch_size` to `decode_chunk_size` (#9110)
* update * update
parent
f6df2244
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
29 deletions
+30
-29
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+6
-6
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
.../pipelines/animatediff/pipeline_animatediff_controlnet.py
+5
-5
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
...pipelines/animatediff/pipeline_animatediff_video2video.py
+14
-13
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
+5
-5
No files found.
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
View file @
e3568d14
...
@@ -396,15 +396,15 @@ class AnimateDiffPipeline(
...
@@ -396,15 +396,15 @@ class AnimateDiffPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -582,7 +582,7 @@ class AnimateDiffPipeline(
...
@@ -582,7 +582,7 @@ class AnimateDiffPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
**
kwargs
,
**
kwargs
,
):
):
r
"""
r
"""
...
@@ -651,7 +651,7 @@ class AnimateDiffPipeline(
...
@@ -651,7 +651,7 @@ class AnimateDiffPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
Examples:
...
@@ -824,7 +824,7 @@ class AnimateDiffPipeline(
...
@@ -824,7 +824,7 @@ class AnimateDiffPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
View file @
e3568d14
...
@@ -435,15 +435,15 @@ class AnimateDiffControlNetPipeline(
...
@@ -435,15 +435,15 @@ class AnimateDiffControlNetPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -728,7 +728,7 @@ class AnimateDiffControlNetPipeline(
...
@@ -728,7 +728,7 @@ class AnimateDiffControlNetPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
r
"""
r
"""
The call function to the pipeline for generation.
The call function to the pipeline for generation.
...
@@ -1064,7 +1064,7 @@ class AnimateDiffControlNetPipeline(
...
@@ -1064,7 +1064,7 @@ class AnimateDiffControlNetPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
View file @
e3568d14
...
@@ -500,24 +500,24 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -500,24 +500,24 @@ class AnimateDiffVideoToVideoPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
def
encode_video
(
self
,
video
,
generator
,
vae_batch
_size
:
int
=
16
)
->
torch
.
Tensor
:
def
encode_video
(
self
,
video
,
generator
,
decode_chunk
_size
:
int
=
16
)
->
torch
.
Tensor
:
latents
=
[]
latents
=
[]
for
i
in
range
(
0
,
len
(
video
),
vae_batch
_size
):
for
i
in
range
(
0
,
len
(
video
),
decode_chunk
_size
):
batch_video
=
video
[
i
:
i
+
vae_batch
_size
]
batch_video
=
video
[
i
:
i
+
decode_chunk
_size
]
batch_video
=
retrieve_latents
(
self
.
vae
.
encode
(
batch_video
),
generator
=
generator
)
batch_video
=
retrieve_latents
(
self
.
vae
.
encode
(
batch_video
),
generator
=
generator
)
latents
.
append
(
batch_video
)
latents
.
append
(
batch_video
)
return
torch
.
cat
(
latents
)
return
torch
.
cat
(
latents
)
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -638,7 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -638,7 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
device
,
device
,
generator
,
generator
,
latents
=
None
,
latents
=
None
,
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
if
latents
is
None
:
if
latents
is
None
:
num_frames
=
video
.
shape
[
1
]
num_frames
=
video
.
shape
[
1
]
...
@@ -673,10 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -673,10 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
)
)
init_latents
=
[
init_latents
=
[
self
.
encode_video
(
video
[
i
],
generator
[
i
],
vae_batch_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
self
.
encode_video
(
video
[
i
],
generator
[
i
],
decode_chunk_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
]
]
else
:
else
:
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
vae_batch
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
decode_chunk
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
torch
.
cat
(
init_latents
,
dim
=
0
)
init_latents
=
torch
.
cat
(
init_latents
,
dim
=
0
)
...
@@ -761,7 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -761,7 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
r
"""
r
"""
The call function to the pipeline for generation.
The call function to the pipeline for generation.
...
@@ -837,7 +838,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -837,7 +838,7 @@ class AnimateDiffVideoToVideoPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
Examples:
...
@@ -940,7 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -940,7 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
device
=
device
,
device
=
device
,
generator
=
generator
,
generator
=
generator
,
latents
=
latents
,
latents
=
latents
,
vae_batch_size
=
vae_batch
_size
,
decode_chunk_size
=
decode_chunk
_size
,
)
)
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
...
@@ -1008,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -1008,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
View file @
e3568d14
...
@@ -407,15 +407,15 @@ class AnimateDiffPAGPipeline(
...
@@ -407,15 +407,15 @@ class AnimateDiffPAGPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -588,7 +588,7 @@ class AnimateDiffPAGPipeline(
...
@@ -588,7 +588,7 @@ class AnimateDiffPAGPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
pag_scale
:
float
=
3.0
,
pag_scale
:
float
=
3.0
,
pag_adaptive_scale
:
float
=
0.0
,
pag_adaptive_scale
:
float
=
0.0
,
):
):
...
@@ -847,7 +847,7 @@ class AnimateDiffPAGPipeline(
...
@@ -847,7 +847,7 @@ class AnimateDiffPAGPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment