Fixes training resuming: Advanced Dreambooth LoRa Training (#6566)

* Fixes #6418 Advanced Dreambooth LoRa Training * change order of import to fix nit * fix nit, use cast_training_params * remove torch.compile fix, will move to a new PR * remove unnecessary import

Fixes training resuming: Advanced Dreambooth LoRa Training (#6566)
* Fixes #6418 Advanced Dreambooth LoRa Training * change order of import to fix nit * fix nit, use cast_training_params * remove torch.compile fix, will move to a new PR * remove unnecessary import
181280ba · Steve Rhoades · GitHub · 53f498d2 · 181280ba
Unverified Commit 181280ba authored Jan 16, 2024 by Steve Rhoades Committed by GitHub Jan 16, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 22 deletions

examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py ...diffusion_training/train_dreambooth_lora_sdxl_advanced.py +36 -22

No files found.
--- a/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py
+++ b/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py
@@ -38,7 +38,7 @@ from accelerate.logging import get_logger
 from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
-from peft import LoraConfig
+from peft import LoraConfig, set_peft_model_state_dict
 from peft.utils import get_peft_model_state_dict
 from PIL import Image
 from PIL.ImageOps import exif_transpose
@@ -58,12 +58,13 @@ from diffusers import (
 )
 from diffusers.loaders import LoraLoaderMixin
 from diffusers.optimization import get_scheduler
-from diffusers.training_utils import compute_snr
+from diffusers.training_utils import _set_state_dict_into_text_encoder, cast_training_params, compute_snr
 from diffusers.utils import (
    check_min_version,
    convert_all_state_dict_to_peft,
    convert_state_dict_to_diffusers,
    convert_state_dict_to_kohya,
+    convert_unet_state_dict_to_peft,
    is_wandb_available,
 )
 from diffusers.utils.import_utils import is_xformers_available
@@ -1292,17 +1293,6 @@ def main(args):
            else:
                param.requires_grad = False
-    # Make sure the trainable params are in float32.
-    if args.mixed_precision == "fp16":
-        models = [unet]
-        if args.train_text_encoder:
-            models.extend([text_encoder_one, text_encoder_two])
-        for model in models:
-            for param in model.parameters():
-                # only upcast trainable parameters (LoRA) into fp32
-                if param.requires_grad:
-                    param.data = param.to(torch.float32)
    # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
    def save_model_hook(models, weights, output_dir):
        if accelerator.is_main_process:
@@ -1358,17 +1348,34 @@ def main(args):
                raise ValueError(f"unexpected save model: {model.__class__}")
        lora_state_dict, network_alphas = LoraLoaderMixin.lora_state_dict(input_dir)
-        LoraLoaderMixin.load_lora_into_unet(lora_state_dict, network_alphas=network_alphas, unet=unet_)
-        text_encoder_state_dict = {k: v for k, v in lora_state_dict.items() if "text_encoder." in k}
+        unet_state_dict = {f'{k.replace("unet.", "")}': v for k, v in lora_state_dict.items() if k.startswith("unet.")}
-        LoraLoaderMixin.load_lora_into_text_encoder(
+        unet_state_dict = convert_unet_state_dict_to_peft(unet_state_dict)
-            text_encoder_state_dict, network_alphas=network_alphas, text_encoder=text_encoder_one_
+        incompatible_keys = set_peft_model_state_dict(unet_, unet_state_dict, adapter_name="default")
-        )
+        if incompatible_keys is not None:
+            # check only for unexpected keys
+            unexpected_keys = getattr(incompatible_keys, "unexpected_keys", None)
+            if unexpected_keys:
+                logger.warning(
+                    f"Loading adapter weights from state_dict led to unexpected keys not found in the model: "
+                    f" {unexpected_keys}. "
+                )
-        text_encoder_2_state_dict = {k: v for k, v in lora_state_dict.items() if "text_encoder_2." in k}
+        if args.train_text_encoder:
-        LoraLoaderMixin.load_lora_into_text_encoder(
+            _set_state_dict_into_text_encoder(lora_state_dict, prefix="text_encoder.", text_encoder=text_encoder_one_)
-            text_encoder_2_state_dict, network_alphas=network_alphas, text_encoder=text_encoder_two_
-        )
+            _set_state_dict_into_text_encoder(
+                lora_state_dict, prefix="text_encoder_2.", text_encoder=text_encoder_two_
+            )
+        # Make sure the trainable params are in float32. This is again needed since the base models
+        # are in `weight_dtype`. More details:
+        # https://github.com/huggingface/diffusers/pull/6514#discussion_r1449796804
+        if args.mixed_precision == "fp16":
+            models = [unet_]
+            if args.train_text_encoder:
+                models.extend([text_encoder_one_, text_encoder_two_])
+            cast_training_params(models)
    accelerator.register_save_state_pre_hook(save_model_hook)
    accelerator.register_load_state_pre_hook(load_model_hook)
@@ -1383,6 +1390,13 @@ def main(args):
            args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
        )
+    # Make sure the trainable params are in float32.
+    if args.mixed_precision == "fp16":
+        models = [unet]
+        if args.train_text_encoder:
+            models.extend([text_encoder_one, text_encoder_two])
+        cast_training_params(models, dtype=torch.float32)
    unet_lora_parameters = list(filter(lambda p: p.requires_grad, unet.parameters()))
    if args.train_text_encoder: