Commit 3d1b9667 authored by wangwei990215's avatar wangwei990215
Browse files

更新diffusers文件夹

parent b6a53272
accelerate>=0.16.0
torchvision
transformers>=4.25.1
wandb
bitsandbytes
deepspeed
peft>=0.6.0
[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741", "F402", "F823"]
select = ["C", "E", "F", "I", "W"]
line-length = 119
# Ignore import violations in all `__init__.py` files.
[tool.ruff.per-file-ignores]
"__init__.py" = ["E402", "F401", "F403", "F811"]
"src/diffusers/utils/dummy_*.py" = ["F401"]
[tool.ruff.isort]
lines-after-imports = 2
known-first-party = ["diffusers"]
[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"
# Like Black, indent with spaces, rather than tabs.
indent-style = "space"
# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false
# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
# coding=utf-8
# Copyright 2024 The HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Conversion script for the LDM checkpoints. """
import argparse
import json
import os
import torch
from transformers.file_utils import has_file
from diffusers import UNet2DConditionModel, UNet2DModel
do_only_config = False
do_only_weights = True
do_only_renaming = False
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--repo_path",
default=None,
type=str,
required=True,
help="The config json file corresponding to the architecture.",
)
parser.add_argument("--dump_path", default=None, type=str, required=True, help="Path to the output model.")
args = parser.parse_args()
config_parameters_to_change = {
"image_size": "sample_size",
"num_res_blocks": "layers_per_block",
"block_channels": "block_out_channels",
"down_blocks": "down_block_types",
"up_blocks": "up_block_types",
"downscale_freq_shift": "freq_shift",
"resnet_num_groups": "norm_num_groups",
"resnet_act_fn": "act_fn",
"resnet_eps": "norm_eps",
"num_head_channels": "attention_head_dim",
}
key_parameters_to_change = {
"time_steps": "time_proj",
"mid": "mid_block",
"downsample_blocks": "down_blocks",
"upsample_blocks": "up_blocks",
}
subfolder = "" if has_file(args.repo_path, "config.json") else "unet"
with open(os.path.join(args.repo_path, subfolder, "config.json"), "r", encoding="utf-8") as reader:
text = reader.read()
config = json.loads(text)
if do_only_config:
for key in config_parameters_to_change.keys():
config.pop(key, None)
if has_file(args.repo_path, "config.json"):
model = UNet2DModel(**config)
else:
class_name = UNet2DConditionModel if "ldm-text2im-large-256" in args.repo_path else UNet2DModel
model = class_name(**config)
if do_only_config:
model.save_config(os.path.join(args.repo_path, subfolder))
config = dict(model.config)
if do_only_renaming:
for key, value in config_parameters_to_change.items():
if key in config:
config[value] = config[key]
del config[key]
config["down_block_types"] = [k.replace("UNetRes", "") for k in config["down_block_types"]]
config["up_block_types"] = [k.replace("UNetRes", "") for k in config["up_block_types"]]
if do_only_weights:
state_dict = torch.load(os.path.join(args.repo_path, subfolder, "diffusion_pytorch_model.bin"))
new_state_dict = {}
for param_key, param_value in state_dict.items():
if param_key.endswith(".op.bias") or param_key.endswith(".op.weight"):
continue
has_changed = False
for key, new_key in key_parameters_to_change.items():
if not has_changed and param_key.split(".")[0] == key:
new_state_dict[".".join([new_key] + param_key.split(".")[1:])] = param_value
has_changed = True
if not has_changed:
new_state_dict[param_key] = param_value
model.load_state_dict(new_state_dict)
model.save_pretrained(os.path.join(args.repo_path, subfolder))
import argparse
import torch
import yaml
from diffusers import DDIMScheduler, LDMPipeline, UNetLDMModel, VQModel
def convert_ldm_original(checkpoint_path, config_path, output_path):
config = yaml.safe_load(config_path)
state_dict = torch.load(checkpoint_path, map_location="cpu")["model"]
keys = list(state_dict.keys())
# extract state_dict for VQVAE
first_stage_dict = {}
first_stage_key = "first_stage_model."
for key in keys:
if key.startswith(first_stage_key):
first_stage_dict[key.replace(first_stage_key, "")] = state_dict[key]
# extract state_dict for UNetLDM
unet_state_dict = {}
unet_key = "model.diffusion_model."
for key in keys:
if key.startswith(unet_key):
unet_state_dict[key.replace(unet_key, "")] = state_dict[key]
vqvae_init_args = config["model"]["params"]["first_stage_config"]["params"]
unet_init_args = config["model"]["params"]["unet_config"]["params"]
vqvae = VQModel(**vqvae_init_args).eval()
vqvae.load_state_dict(first_stage_dict)
unet = UNetLDMModel(**unet_init_args).eval()
unet.load_state_dict(unet_state_dict)
noise_scheduler = DDIMScheduler(
timesteps=config["model"]["params"]["timesteps"],
beta_schedule="scaled_linear",
beta_start=config["model"]["params"]["linear_start"],
beta_end=config["model"]["params"]["linear_end"],
clip_sample=False,
)
pipeline = LDMPipeline(vqvae, unet, noise_scheduler)
pipeline.save_pretrained(output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint_path", type=str, required=True)
parser.add_argument("--config_path", type=str, required=True)
parser.add_argument("--output_path", type=str, required=True)
args = parser.parse_args()
convert_ldm_original(args.checkpoint_path, args.config_path, args.output_path)
This diff is collapsed.
import argparse
import torch
from safetensors.torch import save_file
def convert_motion_module(original_state_dict):
converted_state_dict = {}
for k, v in original_state_dict.items():
if "pos_encoder" in k:
continue
else:
converted_state_dict[
k.replace(".norms.0", ".norm1")
.replace(".norms.1", ".norm2")
.replace(".ff_norm", ".norm3")
.replace(".attention_blocks.0", ".attn1")
.replace(".attention_blocks.1", ".attn2")
.replace(".temporal_transformer", "")
] = v
return converted_state_dict
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--ckpt_path", type=str, required=True)
parser.add_argument("--output_path", type=str, required=True)
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
state_dict = torch.load(args.ckpt_path, map_location="cpu")
if "state_dict" in state_dict.keys():
state_dict = state_dict["state_dict"]
conv_state_dict = convert_motion_module(state_dict)
# convert to new format
output_dict = {}
for module_name, params in conv_state_dict.items():
if type(params) is not torch.Tensor:
continue
output_dict.update({f"unet.{module_name}": params})
save_file(output_dict, f"{args.output_path}/diffusion_pytorch_model.safetensors")
import argparse
import torch
from diffusers import MotionAdapter
def convert_motion_module(original_state_dict):
converted_state_dict = {}
for k, v in original_state_dict.items():
if "pos_encoder" in k:
continue
else:
converted_state_dict[
k.replace(".norms.0", ".norm1")
.replace(".norms.1", ".norm2")
.replace(".ff_norm", ".norm3")
.replace(".attention_blocks.0", ".attn1")
.replace(".attention_blocks.1", ".attn2")
.replace(".temporal_transformer", "")
] = v
return converted_state_dict
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--ckpt_path", type=str, required=True)
parser.add_argument("--output_path", type=str, required=True)
parser.add_argument("--use_motion_mid_block", action="store_true")
parser.add_argument("--motion_max_seq_length", type=int, default=32)
parser.add_argument("--save_fp16", action="store_true")
return parser.parse_args()
if __name__ == "__main__":
args = get_args()
state_dict = torch.load(args.ckpt_path, map_location="cpu")
if "state_dict" in state_dict.keys():
state_dict = state_dict["state_dict"]
conv_state_dict = convert_motion_module(state_dict)
adapter = MotionAdapter(
use_motion_mid_block=args.use_motion_mid_block, motion_max_seq_length=args.motion_max_seq_length
)
# skip loading position embeddings
adapter.load_state_dict(conv_state_dict, strict=False)
adapter.save_pretrained(args.output_path)
if args.save_fp16:
adapter.to(torch.float16).save_pretrained(args.output_path, variant="fp16")
import argparse
import time
from pathlib import Path
from typing import Any, Dict, Literal
import torch
from diffusers import AsymmetricAutoencoderKL
ASYMMETRIC_AUTOENCODER_KL_x_1_5_CONFIG = {
"in_channels": 3,
"out_channels": 3,
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
],
"down_block_out_channels": [128, 256, 512, 512],
"layers_per_down_block": 2,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
],
"up_block_out_channels": [192, 384, 768, 768],
"layers_per_up_block": 3,
"act_fn": "silu",
"latent_channels": 4,
"norm_num_groups": 32,
"sample_size": 256,
"scaling_factor": 0.18215,
}
ASYMMETRIC_AUTOENCODER_KL_x_2_CONFIG = {
"in_channels": 3,
"out_channels": 3,
"down_block_types": [
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
"DownEncoderBlock2D",
],
"down_block_out_channels": [128, 256, 512, 512],
"layers_per_down_block": 2,
"up_block_types": [
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
"UpDecoderBlock2D",
],
"up_block_out_channels": [256, 512, 1024, 1024],
"layers_per_up_block": 5,
"act_fn": "silu",
"latent_channels": 4,
"norm_num_groups": 32,
"sample_size": 256,
"scaling_factor": 0.18215,
}
def convert_asymmetric_autoencoder_kl_state_dict(original_state_dict: Dict[str, Any]) -> Dict[str, Any]:
converted_state_dict = {}
for k, v in original_state_dict.items():
if k.startswith("encoder."):
converted_state_dict[
k.replace("encoder.down.", "encoder.down_blocks.")
.replace("encoder.mid.", "encoder.mid_block.")
.replace("encoder.norm_out.", "encoder.conv_norm_out.")
.replace(".downsample.", ".downsamplers.0.")
.replace(".nin_shortcut.", ".conv_shortcut.")
.replace(".block.", ".resnets.")
.replace(".block_1.", ".resnets.0.")
.replace(".block_2.", ".resnets.1.")
.replace(".attn_1.k.", ".attentions.0.to_k.")
.replace(".attn_1.q.", ".attentions.0.to_q.")
.replace(".attn_1.v.", ".attentions.0.to_v.")
.replace(".attn_1.proj_out.", ".attentions.0.to_out.0.")
.replace(".attn_1.norm.", ".attentions.0.group_norm.")
] = v
elif k.startswith("decoder.") and "up_layers" not in k:
converted_state_dict[
k.replace("decoder.encoder.", "decoder.condition_encoder.")
.replace(".norm_out.", ".conv_norm_out.")
.replace(".up.0.", ".up_blocks.3.")
.replace(".up.1.", ".up_blocks.2.")
.replace(".up.2.", ".up_blocks.1.")
.replace(".up.3.", ".up_blocks.0.")
.replace(".block.", ".resnets.")
.replace("mid", "mid_block")
.replace(".0.upsample.", ".0.upsamplers.0.")
.replace(".1.upsample.", ".1.upsamplers.0.")
.replace(".2.upsample.", ".2.upsamplers.0.")
.replace(".nin_shortcut.", ".conv_shortcut.")
.replace(".block_1.", ".resnets.0.")
.replace(".block_2.", ".resnets.1.")
.replace(".attn_1.k.", ".attentions.0.to_k.")
.replace(".attn_1.q.", ".attentions.0.to_q.")
.replace(".attn_1.v.", ".attentions.0.to_v.")
.replace(".attn_1.proj_out.", ".attentions.0.to_out.0.")
.replace(".attn_1.norm.", ".attentions.0.group_norm.")
] = v
elif k.startswith("quant_conv."):
converted_state_dict[k] = v
elif k.startswith("post_quant_conv."):
converted_state_dict[k] = v
else:
print(f" skipping key `{k}`")
# fix weights shape
for k, v in converted_state_dict.items():
if (
(k.startswith("encoder.mid_block.attentions.0") or k.startswith("decoder.mid_block.attentions.0"))
and k.endswith("weight")
and ("to_q" in k or "to_k" in k or "to_v" in k or "to_out" in k)
):
converted_state_dict[k] = converted_state_dict[k][:, :, 0, 0]
return converted_state_dict
def get_asymmetric_autoencoder_kl_from_original_checkpoint(
scale: Literal["1.5", "2"], original_checkpoint_path: str, map_location: torch.device
) -> AsymmetricAutoencoderKL:
print("Loading original state_dict")
original_state_dict = torch.load(original_checkpoint_path, map_location=map_location)
original_state_dict = original_state_dict["state_dict"]
print("Converting state_dict")
converted_state_dict = convert_asymmetric_autoencoder_kl_state_dict(original_state_dict)
kwargs = ASYMMETRIC_AUTOENCODER_KL_x_1_5_CONFIG if scale == "1.5" else ASYMMETRIC_AUTOENCODER_KL_x_2_CONFIG
print("Initializing AsymmetricAutoencoderKL model")
asymmetric_autoencoder_kl = AsymmetricAutoencoderKL(**kwargs)
print("Loading weight from converted state_dict")
asymmetric_autoencoder_kl.load_state_dict(converted_state_dict)
asymmetric_autoencoder_kl.eval()
print("AsymmetricAutoencoderKL successfully initialized")
return asymmetric_autoencoder_kl
if __name__ == "__main__":
start = time.time()
parser = argparse.ArgumentParser()
parser.add_argument(
"--scale",
default=None,
type=str,
required=True,
help="Asymmetric VQGAN scale: `1.5` or `2`",
)
parser.add_argument(
"--original_checkpoint_path",
default=None,
type=str,
required=True,
help="Path to the original Asymmetric VQGAN checkpoint",
)
parser.add_argument(
"--output_path",
default=None,
type=str,
required=True,
help="Path to save pretrained AsymmetricAutoencoderKL model",
)
parser.add_argument(
"--map_location",
default="cpu",
type=str,
required=False,
help="The device passed to `map_location` when loading the checkpoint",
)
args = parser.parse_args()
assert args.scale in ["1.5", "2"], f"{args.scale} should be `1.5` of `2`"
assert Path(args.original_checkpoint_path).is_file()
asymmetric_autoencoder_kl = get_asymmetric_autoencoder_kl_from_original_checkpoint(
scale=args.scale,
original_checkpoint_path=args.original_checkpoint_path,
map_location=torch.device(args.map_location),
)
print("Saving pretrained AsymmetricAutoencoderKL")
asymmetric_autoencoder_kl.save_pretrained(args.output_path)
print(f"Done in {time.time() - start:.2f} seconds")
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
# Script for converting a Hugging Face Diffusers trained SDXL LoRAs to Kohya format
# This means that you can input your diffusers-trained LoRAs and
# Get the output to work with WebUIs such as AUTOMATIC1111, ComfyUI, SD.Next and others.
# To get started you can find some cool `diffusers` trained LoRAs such as this cute Corgy
# https://huggingface.co/ignasbud/corgy_dog_LoRA/, download its `pytorch_lora_weights.safetensors` file
# and run the script:
# python convert_diffusers_sdxl_lora_to_webui.py --input_lora pytorch_lora_weights.safetensors --output_lora corgy.safetensors
# now you can use corgy.safetensors in your WebUI of choice!
# To train your own, here are some diffusers training scripts and utils that you can use and then convert:
# LoRA Ease - no code SDXL Dreambooth LoRA trainer: https://huggingface.co/spaces/multimodalart/lora-ease
# Dreambooth Advanced Training Script - state of the art techniques such as pivotal tuning and prodigy optimizer:
# - Script: https://github.com/huggingface/diffusers/blob/main/examples/advanced_diffusion_training/train_dreambooth_lora_sdxl_advanced.py
# - Colab (only on Pro): https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/SDXL_Dreambooth_LoRA_advanced_example.ipynb
# Canonical diffusers training scripts:
# - Script: https://github.com/huggingface/diffusers/blob/main/examples/dreambooth/train_dreambooth_lora_sdxl.py
# - Colab (runs on free tier): https://colab.research.google.com/github/huggingface/notebooks/blob/main/diffusers/SDXL_DreamBooth_LoRA_.ipynb
import argparse
import os
from safetensors.torch import load_file, save_file
from diffusers.utils import convert_all_state_dict_to_peft, convert_state_dict_to_kohya
def convert_and_save(input_lora, output_lora=None):
if output_lora is None:
base_name = os.path.splitext(input_lora)[0]
output_lora = f"{base_name}_webui.safetensors"
diffusers_state_dict = load_file(input_lora)
peft_state_dict = convert_all_state_dict_to_peft(diffusers_state_dict)
kohya_state_dict = convert_state_dict_to_kohya(peft_state_dict)
save_file(kohya_state_dict, output_lora)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert LoRA model to PEFT and then to Kohya format.")
parser.add_argument(
"--input_lora",
type=str,
required=True,
help="Path to the input LoRA model file in the diffusers format.",
)
parser.add_argument(
"--output_lora",
type=str,
required=False,
help="Path for the converted LoRA (safetensors format for AUTOMATIC1111, ComfyUI, etc.). Optional, defaults to input name with a _webui suffix.",
)
args = parser.parse_args()
convert_and_save(args.input_lora, args.output_lora)
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment