add callable object to convert frame into control_frame to reduce cpu memory usage. (#10501)

* Update rerender_a_video.py * Update rerender_a_video.py * Update examples/community/rerender_a_video.py Co-authored-by: hlky <hlky@hlky.ac> --------- Co-authored-by: hlky <hlky@hlky.ac> Co-authored-by: YiYi Xu <yixu310@gmail.com>

add callable object to convert frame into control_frame to reduce cpu memory usage. (#10501)
* Update rerender_a_video.py * Update rerender_a_video.py * Update examples/community/rerender_a_video.py Co-authored-by: hlky <hlky@hlky.ac> --------- Co-authored-by: hlky <hlky@hlky.ac> Co-authored-by: YiYi Xu <yixu310@gmail.com>
7bc8b923 · chaowenguo · GitHub · f0c6d978 · 7bc8b923
Unverified Commit 7bc8b923 authored Jan 09, 2025 by chaowenguo Committed by GitHub Jan 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

examples/community/rerender_a_video.py examples/community/rerender_a_video.py +3 -3

No files found.
--- a/examples/community/rerender_a_video.py
+++ b/examples/community/rerender_a_video.py
@@ -632,7 +632,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            frames (`List[np.ndarray]` or `torch.Tensor`): The input images to be used as the starting point for the image generation process.
-            control_frames (`List[np.ndarray]` or `torch.Tensor`): The ControlNet input images condition to provide guidance to the `unet` for generation.
+            control_frames (`List[np.ndarray]` or `torch.Tensor` or `Callable`): The ControlNet input images condition to provide guidance to the `unet` for generation or any callable object to convert frame to control_frame.
            strength ('float'): SDEdit strength.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
@@ -789,7 +789,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
        # Currently we only support single control
        if isinstance(controlnet, ControlNetModel):
            control_image = self.prepare_control_image(
-                image=control_frames[0],
+                image=control_frames(frames[0]) if callable(control_frames) else control_frames[0],
                width=width,
                height=height,
                batch_size=batch_size,
@@ -924,7 +924,7 @@ class RerenderAVideoPipeline(StableDiffusionControlNetImg2ImgPipeline):
        for idx in range(1, len(frames)):
            image = frames[idx]
            prev_image = frames[idx - 1]
-            control_image = control_frames[idx]
+            control_image = control_frames(image) if callable(control_frames) else control_frames[idx]
            # 5.1 prepare frames
            image = self.image_processor.preprocess(image).to(dtype=self.dtype)
            prev_image = self.image_processor.preprocess(prev_image).to(dtype=self.dtype)