Commit 727428ec authored by jerrrrry's avatar jerrrrry
Browse files

Initial commit CI/CD

parents
{
"vocab_size": 47020,
"text_attention_probs_dropout_prob": 0.1,
"text_hidden_act": "gelu",
"text_hidden_dropout_prob": 0.1,
"text_hidden_size": 1024,
"text_initializer_range": 0.02,
"text_intermediate_size": 4096,
"text_max_position_embeddings": 512,
"text_num_attention_heads": 16,
"text_num_hidden_layers": 24,
"text_type_vocab_size": 2
}
\ No newline at end of file
{
"embed_dim": 1024,
"image_resolution": 224,
"vision_layers": 32,
"vision_width": 1280,
"vision_head_width": 80,
"vision_patch_size": 14
}
\ No newline at end of file
export CUDA_VISIBLE_DEVICES=0,1,2,3
task_flag="IP_Adapter" # the task flag is used to identify folders. # checkpoint root for resume
index_file=dataset/porcelain/jsons/porcelain_mt.json
results_dir=./log_EXP # save root for results
batch_size=1 # training batch size
image_size=1024 # training image resolution
grad_accu_steps=1 # gradient accumulation
warmup_num_steps=0 # warm-up steps
lr=0.0001 # learning rate
ckpt_every=10 # create a ckpt every a few steps.
ckpt_latest_every=10000 # create a ckpt named `latest.pt` every a few steps.
ckpt_every_n_epoch=2 # create a ckpt every a few epochs.
epochs=8 # total training epochs
PYTHONPATH=. \
sh ./hydit/run_g_ipadapter.sh \
--task-flag ${task_flag} \
--noise-schedule scaled_linear --beta-start 0.00085 --beta-end 0.018 \
--predict-type v_prediction \
--multireso \
--reso-step 64 \
--uncond-p 0.22 \
--uncond-p-t5 0.22\
--uncond-p-img 0.05\
--index-file ${index_file} \
--random-flip \
--lr ${lr} \
--batch-size ${batch_size} \
--image-size ${image_size} \
--global-seed 999 \
--grad-accu-steps ${grad_accu_steps} \
--warmup-num-steps ${warmup_num_steps} \
--use-flash-attn \
--use-fp16 \
--extra-fp16 \
--results-dir ${results_dir} \
--resume\
--resume-module-root ./ckpts/t2i/model/pytorch_model_distill.pt \
--epochs ${epochs} \
--ckpt-every ${ckpt_every} \
--ckpt-latest-every ${ckpt_latest_every} \
--ckpt-every-n-epoch ${ckpt_every_n_epoch} \
--log-every 10 \
--deepspeed \
--use-zero-stage 2 \
--gradient-checkpointing \
--no-strict \
--training-parts ipadapter \
--is-ipa True \
--resume-ipa True \
--resume-ipa-root ./ckpts/t2i/model/ipa.pt \
"$@"
This source diff could not be displayed because it is too large. You can view the blob instead.
# kohya_ss-hydit
This repository contains custom codes for kohya_ss GUI, and sd-scripts training codes for HunyuanDiT.
## Overview
### Train HunyuanDiT with Kohya_ss
Train HunyuanDiT with Dreambooth.
![dreambooth](img/dreambooth.png)
Train HunyuanDiT with LoRA.
![lora](img/lora.png)
## Usage
We provide several commands to quick start:
```shell
# Download kohya_ss GUI
git clone https://github.com/bmaltais/kohya_ss.git
cd kohya_ss/
# Download sd-scripts training backend, use dev branch
git clone -b dev https://github.com/kohya-ss/sd-scripts ./sd-scripts
# Move the costom GUI codes to the kohya_ss GUI, and replace files with the same name
cp -Rf ${HunyuanDiT}/kohya_ss-hydit/* ./
# Download model weights as before or link the existing model folder to kohya_ss/models.
python -m pip install "huggingface_hub[cli]"
# If you want to download the full model, use the following command
huggingface-cli download Tencent-Hunyuan/HunyuanDiT-v1.1 --local-dir ./models/HunyuanDiT-V1.1
huggingface-cli download Tencent-Hunyuan/HunyuanDiT-V1.2 --local-dir ./models/HunyuanDiT-V1.2
# Or, if you want to download the fp16 pruned model
huggingface-cli download KBlueLeaf/HunYuanDiT-V1.1-fp16-pruned --local-dir ./models/HunyuanDiT-V1.1-fp16-pruned
# After the model is downloaded, you may need to modify the file name an make sure it follows the kohya standard format:
# rename the file name in t2i/ folder as shown below:
# HunyuanDiT-V1.2/t2i/
# - model/ -> denoiser/
# - clip_text_encoder/ -> clip/
# - mt5/ -> mt5/
# - sdxl-vae-fp16-fix/ -> vae/
# Also you may need to move tokenizer/* into clip/ folder
mv HunyuanDiT-V1.2/t2i/model/ HunyuanDiT-V1.2/t2i/denoiser/
mv HunyuanDiT-V1.2/t2i/clip_text_encoder/ HunyuanDiT-V1.2/t2i/clip/
mv HunyuanDiT-V1.2/t2i/mt5/ HunyuanDiT-V1.2/t2i/mt5/
mv HunyuanDiT-V1.2/t2i/sdxl-vae-fp16-fix/ HunyuanDiT-V1.2/t2i/vae/
mv HunyuanDiT-V1.2/t2i/tokenizer/* HunyuanDiT-V1.2/t2i/clip/
# Install some essential python Package,
conda create -n hydit-kohya python=3.10.12
conda activate hydit-kohya
# Install some essential packages, please make sure cuda environment is installed and python version is 3.10
# For cuda 12:
pip install torch==2.1.2 torchvision==0.16.2 xformers==0.0.23.post1
# For cuda 11:
pip install torch==2.1.2+cu118 torchvision==0.16.2+cu118 xformers==0.0.23.post1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
# For cpu offloading to save GPU memory, we recommend to install Deepspeed as follows:
DS_BUILD_CPU_ADAM=1 pip install deepspeed==0.14.1
# Install other python package
pip install -r hunyuan_requirements.txt
# Run the Kohya_ss UI launch command
python kohya_gui.py
```
After the panel is launched, you can use the GUI to train the HunyuanDiT model.
> If you want to train the HunyuanDiT model:
>`v_parameterization` checkbox **is required**.
> Model version checkbox like `HunYuan 1.1` and `HunYuan 1.2` **is required**. Please select the corresponding version
As is shown in the following figure:
![checkbox](img/CheckBox.png)
If you face any WORLD_SIZE key errors in environment variables, try setting it manually, like: export WORLD_SIZE=1.
## Training Notice
> ⚠️ Important Reminder:
> Some arguments used in the training process only support the DEFAULT value so far. Please read the following instructions first.
### Hardware requirements
The table below outlines GPU memory usage for various training methods on the `HunYuanDiT V1.2` model. We suggest using `HunYuanDiT V1.2` over `HunYuanDiT V1.1` due to its superior performance and lower memory consumption.
| Training Method | Gradient checkpoint enable | Deepspeed enable| GPU Memory Usage |
|------------|----------------------------|-----------------|------------------|
| Dreambooth | No | No | 33 GB |
| Dreambooth | Yes | No | 26 GB |
| Dreambooth | Yes | Yes | 16 GB |
| LoRA | No | No (Not support yet) | 24 GB |
| LoRA | Yes | No (Not support yet) | 11 GB |
- If a `CUDA OUT OF MEMORY` error arises, consider using gradient checkpointing or enabling Deepspeed (currently only supports the Dreambooth method) to decrease memory usage as follows:
>
> `Parameters` -> `Advanced` -> `Gradient Checkpointing`
>
> `Parameters` -> `Advanced` -> `Enable deepspeed`
>
> ![deepspeed](img/deepspeed.png)
### Dreambooth method
- In the Dreambooth method, the model will be only saved as `ckpt` format.
- We only support the default `xformers` cross attention.
- You can optionally employ gradient checkpointing, optimizer offloading, and parameter offloading to save GPU memory usage.
- Gradient checkpointing (click on its checkbox) only needs 24GBs instead of the original 33 GBs. Furthermore, optimizer and parameter offloading (click on three checkboxes of `enable deepspeed`, `offload optimizer device` and `offload param device` and set zero stage as 2 or 3) can work together to decrease the memory usage to only 16GBs.
### LoRA method
- We only support the default `xformers` cross attention.
- Only `HunYuan 1.1` supports `LyCORIS/..` right now.
## Inference
We offer a basic Gradio interface for inference. Run the command below to start:
```bash
python ./sd-scripts/hunyuan_inference_gui.py
```
Visit `http://0.0.0.0:7888` to view the GUI in browser.
Ensure to choose the right training method (`Dreambooth`/`LoRA`/`LyCORIS`), model version (`HunYuan 1.1` / `HunYuan 1.2`), and model path as displayed below:
![inference](./img/Inference.png)
You can also initiate inference using the command below.
```shell
# Inference with dreambooth
python ./sd-scripts/hunyuan_test_dreambooth.py
# Inference with lora
python ./sd-scripts/hunyuan_test_lora.py
# Inference with lycoris
python ./sd-scripts/hunyuan_test_lycoris.py
```
## Reference
<a id="1">[1]</a>
https://github.com/bmaltais/kohya_ss
<a id="2">[2]</a>
https://github.com/kohya-ss/sd-scripts
<a id="3">[3]</a>
https://github.com/kohya-ss/sd-scripts/pull/1378
## Acknowledgements
We sincerely appreciate the joint efforts of [KohakuBlueleaf](https://github.com/KohakuBlueleaf/KohakuBlueleaf) and [Bdsqlsz](https://github.com/sdbds).
absl-py==2.1.0
accelerate==0.25.0
aiofiles==23.2.1
aiohttp==3.9.5
aiosignal==1.3.1
altair==4.2.2
annotated-types==0.7.0
antlr4-python3-runtime==4.9.3
anyio==4.4.0
appdirs==1.4.4
astunparse==1.6.3
async-timeout==4.0.3
attrs==23.2.0
bitsandbytes==0.42.0
cachetools==5.3.3
certifi==2022.12.7
charset-normalizer==2.1.1
clean-fid==0.1.35
click==8.1.7
clip-anytorch==2.6.0
contourpy==1.2.1
cycler==0.12.1
dadaptation==3.1
dctorch==0.1.2
diffusers==0.25.0
dnspython==2.6.1
docker-pycreds==0.4.0
easygui==0.98.3
einops==0.7.0
email_validator==2.2.0
engineering-notation==0.10.0
entrypoints==0.4
exceptiongroup==1.2.1
fairscale==0.4.13
fastapi==0.111.0
fastapi-cli==0.0.4
ffmpy==0.3.2
filelock==3.13.1
flatbuffers==24.3.25
fonttools==4.53.0
frozenlist==1.4.1
fsspec==2024.2.0
ftfy==6.1.1
gast==0.5.5
gitdb==4.0.11
GitPython==3.1.43
google-auth==2.30.0
google-auth-oauthlib==1.2.0
google-pasta==0.2.0
gradio==4.37.1
gradio_client==1.0.2
grpcio==1.64.1
h11==0.14.0
h5py==3.11.0
httpcore==1.0.5
httptools==0.6.1
httpx==0.27.0
huggingface-hub==0.20.1
idna==3.4
imageio==2.34.2
imagesize==1.4.1
importlib_metadata==7.2.1
importlib_resources==6.4.0
invisible-watermark==0.2.0
Jinja2==3.1.3
jsonmerge==1.9.2
jsonschema==4.22.0
jsonschema-specifications==2023.12.1
k-diffusion==0.1.1.post1
keras==2.15.0
kiwisolver==1.4.5
kornia==0.7.2
kornia_rs==0.1.3
lazy_loader==0.4
libclang==18.1.1
-e git+https://github.com/kohya-ss/sd-scripts.git@56bb81c9e6483b8b4d5b83639548855b8359f4b4#egg=library
lightning-utilities==0.11.2
lion-pytorch==0.0.6
lycoris_lora==3.0.0.dev12
Markdown==3.6
markdown-it-py==3.0.0
MarkupSafe==2.1.5
matplotlib==3.9.0
mdurl==0.1.2
ml-dtypes==0.2.0
mpmath==1.3.0
multidict==6.0.5
networkx==3.2.1
numpy==1.26.4
oauthlib==3.2.2
omegaconf==2.3.0
onnx==1.15.0
open-clip-torch==2.20.0
opencv-python==4.10.0.84
opt-einsum==3.3.0
orjson==3.10.5
packaging==24.1
pandas==2.2.2
pathtools==0.1.2
pillow==10.2.0
platformdirs==4.2.2
prodigyopt==1.0
protobuf==3.20.3
psutil==6.0.0
pyasn1==0.6.0
pyasn1_modules==0.4.0
pydantic==2.7.4
pydantic_core==2.18.4
pydub==0.25.1
Pygments==2.18.0
pyparsing==3.1.2
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.9
pytorch-lightning==1.9.0
pytz==2024.1
PyWavelets==1.6.0
PyYAML==6.0.1
referencing==0.35.1
regex==2024.5.15
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.7.1
rpds-py==0.18.1
rsa==4.9
ruff==0.4.10
safetensors==0.4.2
scikit-image==0.24.0
scipy==1.11.4
semantic-version==2.10.0
sentencepiece==0.2.0
sentry-sdk==2.6.0
setproctitle==1.3.3
shellingham==1.5.4
six==1.16.0
smmap==5.0.1
sniffio==1.3.1
starlette==0.37.2
sympy==1.12
tensorboard==2.15.2
tensorboard-data-server==0.7.2
tensorflow==2.15.0.post1
tensorflow-estimator==2.15.0
tensorflow-io-gcs-filesystem==0.37.0
termcolor==2.4.0
tifffile==2024.6.18
timm==0.6.12
tk==0.1.0
tk-tools==0.16.0
tokenizers==0.15.2
toml==0.10.2
tomlkit==0.12.0
toolz==0.12.1
torchdiffeq==0.2.4
torchmetrics==1.4.0.post0
torchsde==0.2.6
tqdm==4.66.4
trampoline==0.1.2
transformers==4.38.0
triton==2.1.0
typer==0.12.3
typing_extensions==4.9.0
tzdata==2024.1
ujson==5.10.0
urllib3==2.2.2
uvicorn==0.30.1
uvloop==0.19.0
voluptuous==0.13.1
wandb==0.15.11
watchfiles==0.22.0
wcwidth==0.2.13
websockets==11.0.3
Werkzeug==3.0.3
wrapt==1.14.1
yarl==1.9.4
zipp==3.19.2
{
"LoRA_type": "Standard",
"LyCORIS_preset": "full",
"adaptive_noise_scale": 0,
"additional_parameters": "",
"async_upload": false,
"block_alphas": "",
"block_dims": "",
"block_lr_zero_threshold": "",
"bucket_no_upscale": true,
"bucket_reso_steps": 64,
"bypass_mode": false,
"cache_latents": true,
"cache_latents_to_disk": false,
"caption_dropout_every_n_epochs": 0,
"caption_dropout_rate": 0,
"caption_extension": ".txt",
"clip_skip": 1,
"color_aug": false,
"constrain": 0,
"conv_alpha": 1,
"conv_block_alphas": "",
"conv_block_dims": "",
"conv_dim": 1,
"dataset_config": "",
"debiased_estimation_loss": false,
"decompose_both": false,
"dim_from_weights": false,
"dora_wd": false,
"down_lr_weight": "",
"dynamo_backend": "no",
"dynamo_mode": "default",
"dynamo_use_dynamic": false,
"dynamo_use_fullgraph": false,
"enable_bucket": true,
"epoch": 1,
"extra_accelerate_launch_args": "",
"factor": -1,
"flip_aug": false,
"fp8_base": false,
"full_bf16": false,
"full_fp16": false,
"gpu_ids": "",
"gradient_accumulation_steps": 1,
"gradient_checkpointing": true,
"huber_c": 0.1,
"huber_schedule": "snr",
"huggingface_path_in_repo": "",
"huggingface_repo_id": "",
"huggingface_repo_type": "",
"huggingface_repo_visibility": "",
"huggingface_token": "",
"hunyuan11": false,
"hunyuan12": true,
"ip_noise_gamma": 0,
"ip_noise_gamma_random_strength": false,
"keep_tokens": 0,
"learning_rate": 0.0001,
"log_tracker_config": "",
"log_tracker_name": "",
"log_with": "",
"logging_dir": "",
"loss_type": "l2",
"lr_scheduler": "cosine",
"lr_scheduler_args": "",
"lr_scheduler_num_cycles": 1,
"lr_scheduler_power": 1,
"lr_warmup": 10,
"main_process_port": 0,
"masked_loss": false,
"max_bucket_reso": 2048,
"max_data_loader_n_workers": 0,
"max_grad_norm": 1,
"max_resolution": "1024,1024",
"max_timestep": 1000,
"max_token_length": 75,
"max_train_epochs": 0,
"max_train_steps": 1600,
"mem_eff_attn": false,
"metadata_author": "\u6df7\u5143 & HAI",
"metadata_description": "\u4e00\u4e2a\u5b59\u609f\u7a7a\u7684\u6982\u5ff5\u5f62\u8c61lora",
"metadata_license": "MIT",
"metadata_tags": "\u5b59\u609f\u7a7a",
"metadata_title": "HunyuanDiTv12_wukong_lora",
"mid_lr_weight": "",
"min_bucket_reso": 256,
"min_snr_gamma": 0,
"min_timestep": 0,
"mixed_precision": "fp16",
"model_list": "custom",
"module_dropout": 0.05,
"multi_gpu": false,
"multires_noise_discount": 0.3,
"multires_noise_iterations": 0,
"network_alpha": 128,
"network_dim": 128,
"network_dropout": 0.05,
"network_weights": "",
"noise_offset": 0,
"noise_offset_random_strength": false,
"noise_offset_type": "Original",
"num_cpu_threads_per_process": 2,
"num_machines": 1,
"num_processes": 1,
"optimizer": "AdamW8bit",
"optimizer_args": "",
"output_dir": "/root/kohya_ss/outputs",
"output_name": "last",
"persistent_data_loader_workers": false,
"pretrained_model_name_or_path": "/root/kohya_ss/HunyuanDiT-V1.2/t2i/",
"prior_loss_weight": 1,
"random_crop": false,
"rank_dropout": 0.05,
"rank_dropout_scale": false,
"reg_data_dir": "",
"rescaled": false,
"resume": "",
"resume_from_huggingface": "",
"sample_every_n_epochs": 0,
"sample_every_n_steps": 0,
"sample_prompts": "",
"sample_sampler": "euler_a",
"save_as_bool": false,
"save_every_n_epochs": 1,
"save_every_n_steps": 50,
"save_last_n_steps": 0,
"save_last_n_steps_state": 0,
"save_model_as": "safetensors",
"save_precision": "fp16",
"save_state": false,
"save_state_on_train_end": false,
"save_state_to_huggingface": false,
"scale_v_pred_loss_like_noise_pred": false,
"scale_weight_norms": 0,
"sdxl": false,
"sdxl_cache_text_encoder_outputs": false,
"sdxl_no_half_vae": false,
"seed": 0,
"shuffle_caption": false,
"stop_text_encoder_training": 0,
"text_encoder_lr": 0.0001,
"train_batch_size": 1,
"train_data_dir": "/root/demo_images/wukong",
"train_norm": false,
"train_on_input": true,
"training_comment": "",
"unet_lr": 0.0001,
"unit": 1,
"up_lr_weight": "",
"use_cp": false,
"use_scalar": false,
"use_tucker": false,
"v2": false,
"v_parameterization": true,
"v_pred_like_loss": 0,
"vae": "",
"vae_batch_size": 0,
"wandb_api_key": "",
"wandb_run_name": "",
"weighted_captions": false,
"xformers": "xformers"
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment