[SVD] fix a bug when passing image as tensor (#6999)

* fix * update docstring --------- Co-authored-by: yiyixuxu <yixu310@gmail,com>

[SVD] fix a bug when passing image as tensor (#6999)
* fix * update docstring --------- Co-authored-by: yiyixuxu <yixu310@gmail,com>
8974c50b · YiYi Xu · GitHub · c18058b4 · 8974c50b
Unverified Commit 8974c50b authored Feb 17, 2024 by YiYi Xu Committed by GitHub Feb 17, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 10 additions and 11 deletions

src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py ...stable_video_diffusion/pipeline_stable_video_diffusion.py +10 -11

No files found.
--- a/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
+++ b/src/diffusers/pipelines/stable_video_diffusion/pipeline_stable_video_diffusion.py
@@ -132,15 +132,15 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):
            image = _resize_with_antialiasing(image, (224, 224))
            image = (image + 1.0) / 2.0

-            # Normalize the image with for CLIP input
-            image = self.feature_extractor(
-                images=image,
-                do_normalize=True,
-                do_center_crop=False,
-                do_resize=False,
-                do_rescale=False,
-                return_tensors="pt",
-            ).pixel_values
+        # Normalize the image with for CLIP input
+        image = self.feature_extractor(
+            images=image,
+            do_normalize=True,
+            do_center_crop=False,
+            do_resize=False,
+            do_rescale=False,
+            return_tensors="pt",
+        ).pixel_values

        image = image.to(device=device, dtype=dtype)
        image_embeddings = self.image_encoder(image).image_embeds
@@ -333,8 +333,7 @@ class StableVideoDiffusionPipeline(DiffusionPipeline):

        Args:
            image (`PIL.Image.Image` or `List[PIL.Image.Image]` or `torch.FloatTensor`):
-                Image or images to guide image generation. If you provide a tensor, it needs to be compatible with
-                [`CLIPImageProcessor`](https://huggingface.co/lambdalabs/sd-image-variations-diffusers/blob/main/feature_extractor/preprocessor_config.json).
+                Image or images to guide image generation. If you provide a tensor, the expected value range is between `[0,1]`.
            height (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):