Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
renzhc
diffusers_dcu
Commits
e3568d14
Unverified
Commit
e3568d14
authored
Aug 07, 2024
by
Dhruv Nair
Committed by
GitHub
Aug 07, 2024
Browse files
Freenoise change `vae_batch_size` to `decode_chunk_size` (#9110)
* update * update
parent
f6df2244
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
30 additions
and
29 deletions
+30
-29
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
+6
-6
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
.../pipelines/animatediff/pipeline_animatediff_controlnet.py
+5
-5
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
...pipelines/animatediff/pipeline_animatediff_video2video.py
+14
-13
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
+5
-5
No files found.
src/diffusers/pipelines/animatediff/pipeline_animatediff.py
View file @
e3568d14
...
@@ -396,15 +396,15 @@ class AnimateDiffPipeline(
...
@@ -396,15 +396,15 @@ class AnimateDiffPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -582,7 +582,7 @@ class AnimateDiffPipeline(
...
@@ -582,7 +582,7 @@ class AnimateDiffPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
**
kwargs
,
**
kwargs
,
):
):
r
"""
r
"""
...
@@ -651,7 +651,7 @@ class AnimateDiffPipeline(
...
@@ -651,7 +651,7 @@ class AnimateDiffPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
Examples:
...
@@ -824,7 +824,7 @@ class AnimateDiffPipeline(
...
@@ -824,7 +824,7 @@ class AnimateDiffPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_controlnet.py
View file @
e3568d14
...
@@ -435,15 +435,15 @@ class AnimateDiffControlNetPipeline(
...
@@ -435,15 +435,15 @@ class AnimateDiffControlNetPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -728,7 +728,7 @@ class AnimateDiffControlNetPipeline(
...
@@ -728,7 +728,7 @@ class AnimateDiffControlNetPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
r
"""
r
"""
The call function to the pipeline for generation.
The call function to the pipeline for generation.
...
@@ -1064,7 +1064,7 @@ class AnimateDiffControlNetPipeline(
...
@@ -1064,7 +1064,7 @@ class AnimateDiffControlNetPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/animatediff/pipeline_animatediff_video2video.py
View file @
e3568d14
...
@@ -500,24 +500,24 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -500,24 +500,24 @@ class AnimateDiffVideoToVideoPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
def
encode_video
(
self
,
video
,
generator
,
vae_batch
_size
:
int
=
16
)
->
torch
.
Tensor
:
def
encode_video
(
self
,
video
,
generator
,
decode_chunk
_size
:
int
=
16
)
->
torch
.
Tensor
:
latents
=
[]
latents
=
[]
for
i
in
range
(
0
,
len
(
video
),
vae_batch
_size
):
for
i
in
range
(
0
,
len
(
video
),
decode_chunk
_size
):
batch_video
=
video
[
i
:
i
+
vae_batch
_size
]
batch_video
=
video
[
i
:
i
+
decode_chunk
_size
]
batch_video
=
retrieve_latents
(
self
.
vae
.
encode
(
batch_video
),
generator
=
generator
)
batch_video
=
retrieve_latents
(
self
.
vae
.
encode
(
batch_video
),
generator
=
generator
)
latents
.
append
(
batch_video
)
latents
.
append
(
batch_video
)
return
torch
.
cat
(
latents
)
return
torch
.
cat
(
latents
)
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -638,7 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -638,7 +638,7 @@ class AnimateDiffVideoToVideoPipeline(
device
,
device
,
generator
,
generator
,
latents
=
None
,
latents
=
None
,
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
if
latents
is
None
:
if
latents
is
None
:
num_frames
=
video
.
shape
[
1
]
num_frames
=
video
.
shape
[
1
]
...
@@ -673,10 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -673,10 +673,11 @@ class AnimateDiffVideoToVideoPipeline(
)
)
init_latents
=
[
init_latents
=
[
self
.
encode_video
(
video
[
i
],
generator
[
i
],
vae_batch_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
self
.
encode_video
(
video
[
i
],
generator
[
i
],
decode_chunk_size
).
unsqueeze
(
0
)
for
i
in
range
(
batch_size
)
]
]
else
:
else
:
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
vae_batch
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
[
self
.
encode_video
(
vid
,
generator
,
decode_chunk
_size
).
unsqueeze
(
0
)
for
vid
in
video
]
init_latents
=
torch
.
cat
(
init_latents
,
dim
=
0
)
init_latents
=
torch
.
cat
(
init_latents
,
dim
=
0
)
...
@@ -761,7 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -761,7 +762,7 @@ class AnimateDiffVideoToVideoPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
):
):
r
"""
r
"""
The call function to the pipeline for generation.
The call function to the pipeline for generation.
...
@@ -837,7 +838,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -837,7 +838,7 @@ class AnimateDiffVideoToVideoPipeline(
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
`._callback_tensor_inputs` attribute of your pipeline class.
`._callback_tensor_inputs` attribute of your pipeline class.
vae_batch
_size (`int`, defaults to `16`):
decode_chunk
_size (`int`, defaults to `16`):
The number of frames to decode at a time when calling `decode_latents` method.
The number of frames to decode at a time when calling `decode_latents` method.
Examples:
Examples:
...
@@ -940,7 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -940,7 +941,7 @@ class AnimateDiffVideoToVideoPipeline(
device
=
device
,
device
=
device
,
generator
=
generator
,
generator
=
generator
,
latents
=
latents
,
latents
=
latents
,
vae_batch_size
=
vae_batch
_size
,
decode_chunk_size
=
decode_chunk
_size
,
)
)
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
...
@@ -1008,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
...
@@ -1008,7 +1009,7 @@ class AnimateDiffVideoToVideoPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
src/diffusers/pipelines/pag/pipeline_pag_sd_animatediff.py
View file @
e3568d14
...
@@ -407,15 +407,15 @@ class AnimateDiffPAGPipeline(
...
@@ -407,15 +407,15 @@ class AnimateDiffPAGPipeline(
return
ip_adapter_image_embeds
return
ip_adapter_image_embeds
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
# Copied from diffusers.pipelines.animatediff.pipeline_animatediff.AnimateDiffPipeline.decode_latents
def
decode_latents
(
self
,
latents
,
vae_batch
_size
:
int
=
16
):
def
decode_latents
(
self
,
latents
,
decode_chunk
_size
:
int
=
16
):
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
latents
=
1
/
self
.
vae
.
config
.
scaling_factor
*
latents
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
batch_size
,
channels
,
num_frames
,
height
,
width
=
latents
.
shape
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
latents
=
latents
.
permute
(
0
,
2
,
1
,
3
,
4
).
reshape
(
batch_size
*
num_frames
,
channels
,
height
,
width
)
video
=
[]
video
=
[]
for
i
in
range
(
0
,
latents
.
shape
[
0
],
vae_batch
_size
):
for
i
in
range
(
0
,
latents
.
shape
[
0
],
decode_chunk
_size
):
batch_latents
=
latents
[
i
:
i
+
vae_batch
_size
]
batch_latents
=
latents
[
i
:
i
+
decode_chunk
_size
]
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
batch_latents
=
self
.
vae
.
decode
(
batch_latents
).
sample
video
.
append
(
batch_latents
)
video
.
append
(
batch_latents
)
...
@@ -588,7 +588,7 @@ class AnimateDiffPAGPipeline(
...
@@ -588,7 +588,7 @@ class AnimateDiffPAGPipeline(
clip_skip
:
Optional
[
int
]
=
None
,
clip_skip
:
Optional
[
int
]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end
:
Optional
[
Callable
[[
int
,
int
,
Dict
],
None
]]
=
None
,
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
callback_on_step_end_tensor_inputs
:
List
[
str
]
=
[
"latents"
],
vae_batch
_size
:
int
=
16
,
decode_chunk
_size
:
int
=
16
,
pag_scale
:
float
=
3.0
,
pag_scale
:
float
=
3.0
,
pag_adaptive_scale
:
float
=
0.0
,
pag_adaptive_scale
:
float
=
0.0
,
):
):
...
@@ -847,7 +847,7 @@ class AnimateDiffPAGPipeline(
...
@@ -847,7 +847,7 @@ class AnimateDiffPAGPipeline(
if
output_type
==
"latent"
:
if
output_type
==
"latent"
:
video
=
latents
video
=
latents
else
:
else
:
video_tensor
=
self
.
decode_latents
(
latents
,
vae_batch
_size
)
video_tensor
=
self
.
decode_latents
(
latents
,
decode_chunk
_size
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
video
=
self
.
video_processor
.
postprocess_video
(
video
=
video_tensor
,
output_type
=
output_type
)
# 10. Offload all models
# 10. Offload all models
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment