fix vae dtype when accelerate config using --mixed_precision="fp16" (#9601)

* fix vae dtype when accelerate config using --mixed_precision="fp16" * Add param for upcast vae

fix vae dtype when accelerate config using --mixed_precision="fp16" (#9601)
* fix vae dtype when accelerate config using --mixed_precision="fp16" * Add param for upcast vae
2cb383f5 · captainzz · GitHub · 31010ecc · 2cb383f5
Unverified Commit 2cb383f5 authored Oct 07, 2024 by captainzz Committed by GitHub Oct 07, 2024
Show whitespace changes
Inline Side-by-side

Showing with 9 additions and 1 deletion

examples/controlnet/train_controlnet_sd3.py examples/controlnet/train_controlnet_sd3.py +9 -1

No files found.
--- a/examples/controlnet/train_controlnet_sd3.py
+++ b/examples/controlnet/train_controlnet_sd3.py
@@ -357,6 +357,11 @@ def parse_args(input_args=None):
        action="store_true",
        help="Whether or not to use gradient checkpointing to save memory at the expense of slower backward pass.",
    )
+    parser.add_argument(
+        "--upcast_vae",
+        action="store_true",
+        help="Whether or not to upcast vae to fp32",
+    )
    parser.add_argument(
        "--learning_rate",
        type=float,
@@ -1094,7 +1099,10 @@ def main(args):
        weight_dtype = torch.bfloat16
    # Move vae, transformer and text_encoder to device and cast to weight_dtype
+    if args.upcast_vae:
        vae.to(accelerator.device, dtype=torch.float32)
+    else:
+        vae.to(accelerator.device, dtype=weight_dtype)
    transformer.to(accelerator.device, dtype=weight_dtype)
    text_encoder_one.to(accelerator.device, dtype=weight_dtype)
    text_encoder_two.to(accelerator.device, dtype=weight_dtype)