Fix pipeline dtype unexpected change when using SDXL reference community...

Fix pipeline dtype unexpected change when using SDXL reference community pipelines in float16 mode (#10670) Fix pipeline dtype unexpected change when using SDXL reference community pipelines

Fix pipeline dtype unexpected change when using SDXL reference community...
Fix pipeline dtype unexpected change when using SDXL reference community pipelines in float16 mode (#10670) Fix pipeline dtype unexpected change when using SDXL reference community pipelines
196aef5a · Dimitri Barbot · GitHub · 7b100ce5 · 196aef5a · 196aef5a
Unverified Commit 196aef5a authored Jan 28, 2025 by Dimitri Barbot Committed by GitHub Jan 28, 2025
2 changed files
--- a/examples/community/stable_diffusion_xl_controlnet_reference.py
+++ b/examples/community/stable_diffusion_xl_controlnet_reference.py
@@ -193,7 +193,8 @@ class StableDiffusionXLControlNetReferencePipeline(StableDiffusionXLControlNetPi
    def prepare_ref_latents(self, refimage, batch_size, dtype, device, generator, do_classifier_free_guidance):
        refimage = refimage.to(device=device)
-        if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+        needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+        if needs_upcasting:
            self.upcast_vae()
            refimage = refimage.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
        if refimage.dtype != self.vae.dtype:
@@ -223,6 +224,11 @@ class StableDiffusionXLControlNetReferencePipeline(StableDiffusionXLControlNetPi
        # aligning device to prevent device errors when concating it with the latent model input
        ref_image_latents = ref_image_latents.to(device=device, dtype=dtype)
+        # cast back to fp16 if needed
+        if needs_upcasting:
+            self.vae.to(dtype=torch.float16)
        return ref_image_latents
    def prepare_ref_image(

--- a/examples/community/stable_diffusion_xl_reference.py
+++ b/examples/community/stable_diffusion_xl_reference.py
@@ -139,7 +139,8 @@ def retrieve_timesteps(
 class StableDiffusionXLReferencePipeline(StableDiffusionXLPipeline):
    def prepare_ref_latents(self, refimage, batch_size, dtype, device, generator, do_classifier_free_guidance):
        refimage = refimage.to(device=device)
-        if self.vae.dtype == torch.float16 and self.vae.config.force_upcast:
+        needs_upcasting = self.vae.dtype == torch.float16 and self.vae.config.force_upcast
+        if needs_upcasting:
            self.upcast_vae()
            refimage = refimage.to(next(iter(self.vae.post_quant_conv.parameters())).dtype)
        if refimage.dtype != self.vae.dtype:
@@ -169,6 +170,11 @@ class StableDiffusionXLReferencePipeline(StableDiffusionXLPipeline):
        # aligning device to prevent device errors when concating it with the latent model input
        ref_image_latents = ref_image_latents.to(device=device, dtype=dtype)
+        # cast back to fp16 if needed
+        if needs_upcasting:
+            self.vae.to(dtype=torch.float16)
        return ref_image_latents
    def prepare_ref_image(