Initial commit CI/CD

727428ec · jerrrrry · 727428ec · 727428ec · 727428ec · 727428ec
Commit 727428ec authored Jan 21, 2026 by jerrrrry
20 changed files
--- a/ipadapter/asset/output/beauty_pig.png
+++ b/ipadapter/asset/output/beauty_pig.png
--- a/ipadapter/asset/output/cartoon_res1.jpg
+++ b/ipadapter/asset/output/cartoon_res1.jpg
--- a/ipadapter/asset/output/cartoon_res2.jpg
+++ b/ipadapter/asset/output/cartoon_res2.jpg
--- a/ipadapter/asset/output/cartoon_res3.jpg
+++ b/ipadapter/asset/output/cartoon_res3.jpg
--- a/ipadapter/asset/output/tiger_book.png
+++ b/ipadapter/asset/output/tiger_book.png
--- a/ipadapter/asset/output/tiger_res1.png
+++ b/ipadapter/asset/output/tiger_res1.png
--- a/ipadapter/asset/output/tiger_res2.png
+++ b/ipadapter/asset/output/tiger_res2.png
--- a/ipadapter/asset/output/tiger_res3.png
+++ b/ipadapter/asset/output/tiger_res3.png
--- a/ipadapter/asset/output/tiger_roar.png
+++ b/ipadapter/asset/output/tiger_roar.png
--- a/ipadapter/asset/output/tiger_run.png
+++ b/ipadapter/asset/output/tiger_run.png
--- a/ipadapter/asset/output/xunyicao_dog.png
+++ b/ipadapter/asset/output/xunyicao_dog.png
--- a/ipadapter/asset/output/xunyicao_res.png
+++ b/ipadapter/asset/output/xunyicao_res.png
--- a/ipadapter/asset/output/xunyicao_style.png
+++ b/ipadapter/asset/output/xunyicao_style.png
--- a/ipadapter/model_configs/RoBERTa-wwm-ext-large-cn-en.json
+++ b/ipadapter/model_configs/RoBERTa-wwm-ext-large-cn-en.json
+{
+    "vocab_size": 47020,
+    "text_attention_probs_dropout_prob": 0.1, 
+    "text_hidden_act": "gelu", 
+    "text_hidden_dropout_prob": 0.1, 
+    "text_hidden_size": 1024,
+    "text_initializer_range": 0.02, 
+    "text_intermediate_size": 4096, 
+    "text_max_position_embeddings": 512, 
+    "text_num_attention_heads": 16, 
+    "text_num_hidden_layers": 24, 
+    "text_type_vocab_size": 2
+}
\ No newline at end of file
--- a/ipadapter/model_configs/ViT-H-14.json
+++ b/ipadapter/model_configs/ViT-H-14.json
+{
+    "embed_dim": 1024,
+    "image_resolution": 224,
+    "vision_layers": 32,
+    "vision_width": 1280,
+    "vision_head_width": 80,
+    "vision_patch_size": 14
+}
\ No newline at end of file
--- a/ipadapter/train_ipadapter.sh
+++ b/ipadapter/train_ipadapter.sh
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+task_flag="IP_Adapter"                                # the task flag is used to identify folders.                         # checkpoint root for resume
+index_file=dataset/porcelain/jsons/porcelain_mt.json 
+results_dir=./log_EXP                                        # save root for results
+batch_size=1                                                 # training batch size
+image_size=1024                                              # training image resolution
+grad_accu_steps=1                                            # gradient accumulation
+warmup_num_steps=0                                           # warm-up steps
+lr=0.0001                                                    # learning rate
+ckpt_every=10                                         # create a ckpt every a few steps.
+ckpt_latest_every=10000                                    # create a ckpt named `latest.pt` every a few steps.
+ckpt_every_n_epoch=2                                         # create a ckpt every a few epochs.
+epochs=8                                                     # total training epochs
+
+PYTHONPATH=. \
+sh ./hydit/run_g_ipadapter.sh \
+    --task-flag ${task_flag} \
+    --noise-schedule scaled_linear --beta-start 0.00085 --beta-end 0.018 \
+    --predict-type v_prediction \
+    --multireso \
+    --reso-step 64 \
+    --uncond-p 0.22 \
+    --uncond-p-t5 0.22\
+    --uncond-p-img 0.05\
+    --index-file ${index_file} \
+    --random-flip \
+    --lr ${lr} \
+    --batch-size ${batch_size} \
+    --image-size ${image_size} \
+    --global-seed 999 \
+    --grad-accu-steps ${grad_accu_steps} \
+    --warmup-num-steps ${warmup_num_steps} \
+    --use-flash-attn \
+    --use-fp16 \
+    --extra-fp16 \
+    --results-dir ${results_dir} \
+    --resume\
+    --resume-module-root ./ckpts/t2i/model/pytorch_model_distill.pt \
+    --epochs ${epochs} \
+    --ckpt-every ${ckpt_every} \
+    --ckpt-latest-every ${ckpt_latest_every} \
+    --ckpt-every-n-epoch ${ckpt_every_n_epoch} \
+    --log-every 10 \
+    --deepspeed \
+    --use-zero-stage 2 \
+    --gradient-checkpointing \
+    --no-strict \
+    --training-parts ipadapter \
+    --is-ipa True \
+    --resume-ipa True \
+    --resume-ipa-root ./ckpts/t2i/model/ipa.pt  \
+    "$@"
--- a/kohya_ss-hydit/Kohya_ss GUI训练中文教程.ipynb
+++ b/kohya_ss-hydit/Kohya_ss GUI训练中文教程.ipynb
--- a/kohya_ss-hydit/README.md
+++ b/kohya_ss-hydit/README.md
+# kohya_ss-hydit
+This repository contains custom codes for kohya_ss GUI, and sd-scripts training codes for HunyuanDiT.
+
+## Overview
+
+
+### Train HunyuanDiT with Kohya_ss
+Train HunyuanDiT with Dreambooth.
+
+![dreambooth](img/dreambooth.png)
+
+Train HunyuanDiT with LoRA.
+![lora](img/lora.png)
+
+
+## Usage
+
+We provide several commands to quick start: 
+
+```shell
+# Download kohya_ss GUI
+git clone https://github.com/bmaltais/kohya_ss.git
+cd kohya_ss/
+
+# Download sd-scripts training backend, use dev branch
+git clone -b dev https://github.com/kohya-ss/sd-scripts ./sd-scripts
+
+# Move the costom GUI codes to the kohya_ss GUI, and replace files with the same name
+cp -Rf ${HunyuanDiT}/kohya_ss-hydit/* ./
+
+# Download model weights as before or link the existing model folder to kohya_ss/models.
+python -m pip install "huggingface_hub[cli]"
+# If you want to download the full model, use the following command
+huggingface-cli download Tencent-Hunyuan/HunyuanDiT-v1.1 --local-dir ./models/HunyuanDiT-V1.1
+huggingface-cli download Tencent-Hunyuan/HunyuanDiT-V1.2 --local-dir ./models/HunyuanDiT-V1.2
+# Or, if you want to download the fp16 pruned model
+huggingface-cli download KBlueLeaf/HunYuanDiT-V1.1-fp16-pruned --local-dir ./models/HunyuanDiT-V1.1-fp16-pruned
+
+# After the model is downloaded, you may need to modify the file name an make sure it follows the kohya standard format:
+# rename the file name in t2i/ folder as shown below:
+# HunyuanDiT-V1.2/t2i/
+#  - model/                  -> denoiser/
+#  - clip_text_encoder/      -> clip/
+#  - mt5/                    -> mt5/
+#  - sdxl-vae-fp16-fix/      -> vae/
+# Also you may need to move tokenizer/* into clip/ folder
+mv HunyuanDiT-V1.2/t2i/model/ HunyuanDiT-V1.2/t2i/denoiser/
+mv HunyuanDiT-V1.2/t2i/clip_text_encoder/ HunyuanDiT-V1.2/t2i/clip/
+mv HunyuanDiT-V1.2/t2i/mt5/ HunyuanDiT-V1.2/t2i/mt5/
+mv HunyuanDiT-V1.2/t2i/sdxl-vae-fp16-fix/ HunyuanDiT-V1.2/t2i/vae/
+mv HunyuanDiT-V1.2/t2i/tokenizer/* HunyuanDiT-V1.2/t2i/clip/ 
+
+# Install some essential python Package, 
+conda create -n hydit-kohya python=3.10.12
+conda activate hydit-kohya
+
+# Install some essential packages, please make sure cuda environment is installed and python version is 3.10
+# For cuda 12:
+pip install torch==2.1.2 torchvision==0.16.2 xformers==0.0.23.post1
+# For cuda 11:
+pip install torch==2.1.2+cu118 torchvision==0.16.2+cu118 xformers==0.0.23.post1+cu118 --extra-index-url https://download.pytorch.org/whl/cu118
+# For cpu offloading to save GPU memory, we recommend to install Deepspeed as follows:
+DS_BUILD_CPU_ADAM=1 pip install deepspeed==0.14.1
+
+# Install other python package
+pip install -r hunyuan_requirements.txt
+
+# Run the Kohya_ss UI launch command
+python kohya_gui.py
+```
+After the panel is launched, you can use the GUI to train the HunyuanDiT model.
+> If you want to train the HunyuanDiT model:
+>`v_parameterization` checkbox **is required**.
+> Model version checkbox like `HunYuan 1.1` and `HunYuan 1.2` **is required**. Please select the corresponding version
+As is shown in the following figure:
+![checkbox](img/CheckBox.png)
+
+If you face any WORLD_SIZE key errors in environment variables, try setting it manually, like: export WORLD_SIZE=1.
+
+## Training Notice
+> ⚠️ Important Reminder:  
+> Some arguments used in the training process only support the DEFAULT value so far. Please read the following instructions first.
+
+### Hardware requirements
+The table below outlines GPU memory usage for various training methods on the `HunYuanDiT V1.2` model. We suggest using `HunYuanDiT V1.2` over `HunYuanDiT V1.1` due to its superior performance and lower memory consumption.
+
+| Training Method | Gradient checkpoint enable | Deepspeed enable| GPU Memory Usage |
+|------------|----------------------------|-----------------|------------------|
+| Dreambooth | No                         |  No             | 33 GB            |
+| Dreambooth | Yes                        |  No             | 26 GB            |
+| Dreambooth | Yes                        |  Yes             | 16 GB            |
+| LoRA       | No                         |  No (Not support yet)  | 24 GB            |
+| LoRA       | Yes                        |  No (Not support yet)   | 11 GB            |
+
+- If a `CUDA OUT OF MEMORY` error arises, consider using gradient checkpointing or enabling Deepspeed (currently only supports the Dreambooth method) to decrease memory usage as follows:
+    > 
+    > `Parameters` -> `Advanced` -> `Gradient Checkpointing`
+    > 
+    >  `Parameters` -> `Advanced` -> `Enable deepspeed`
+    > 
+    > ![deepspeed](img/deepspeed.png)
+
+### Dreambooth method
+- In the Dreambooth method, the model will be only saved as `ckpt` format.
+
+- We only support the default `xformers` cross attention.
+
+- You can optionally employ gradient checkpointing, optimizer offloading, and parameter offloading to save GPU memory usage. 
+
+- Gradient checkpointing (click on its checkbox) only needs 24GBs instead of the original 33 GBs. Furthermore, optimizer and parameter offloading  (click on three checkboxes of `enable deepspeed`, `offload optimizer device` and `offload param device` and set zero stage as 2 or 3) can work together to decrease the memory usage to only 16GBs.
+
+### LoRA method
+- We only support the default `xformers` cross attention.
+- Only `HunYuan 1.1` supports `LyCORIS/..`  right now.
+
+
+## Inference
+We offer a basic Gradio interface for inference. Run the command below to start:
+```bash
+python ./sd-scripts/hunyuan_inference_gui.py
+```
+Visit `http://0.0.0.0:7888` to view the GUI in browser.
+
+Ensure to choose the right training method (`Dreambooth`/`LoRA`/`LyCORIS`), model version (`HunYuan 1.1` / `HunYuan 1.2`), and model path as displayed below:
+
+![inference](./img/Inference.png)
+
+You can also initiate inference using the command below.
+
+```shell
+# Inference with dreambooth
+python ./sd-scripts/hunyuan_test_dreambooth.py
+# Inference with lora
+python ./sd-scripts/hunyuan_test_lora.py
+# Inference with lycoris
+python ./sd-scripts/hunyuan_test_lycoris.py
+```
+
+## Reference 
+<a id="1">[1]</a> 
+https://github.com/bmaltais/kohya_ss
+
+<a id="2">[2]</a>
+https://github.com/kohya-ss/sd-scripts
+
+<a id="3">[3]</a>
+https://github.com/kohya-ss/sd-scripts/pull/1378
+
+## Acknowledgements
+We sincerely appreciate the joint efforts of [KohakuBlueleaf](https://github.com/KohakuBlueleaf/KohakuBlueleaf) and [Bdsqlsz](https://github.com/sdbds).
+
--- a/kohya_ss-hydit/hunyuan_requirements.txt
+++ b/kohya_ss-hydit/hunyuan_requirements.txt
+absl-py==2.1.0
+accelerate==0.25.0
+aiofiles==23.2.1
+aiohttp==3.9.5
+aiosignal==1.3.1
+altair==4.2.2
+annotated-types==0.7.0
+antlr4-python3-runtime==4.9.3
+anyio==4.4.0
+appdirs==1.4.4
+astunparse==1.6.3
+async-timeout==4.0.3
+attrs==23.2.0
+bitsandbytes==0.42.0
+cachetools==5.3.3
+certifi==2022.12.7
+charset-normalizer==2.1.1
+clean-fid==0.1.35
+click==8.1.7
+clip-anytorch==2.6.0
+contourpy==1.2.1
+cycler==0.12.1
+dadaptation==3.1
+dctorch==0.1.2
+diffusers==0.25.0
+dnspython==2.6.1
+docker-pycreds==0.4.0
+easygui==0.98.3
+einops==0.7.0
+email_validator==2.2.0
+engineering-notation==0.10.0
+entrypoints==0.4
+exceptiongroup==1.2.1
+fairscale==0.4.13
+fastapi==0.111.0
+fastapi-cli==0.0.4
+ffmpy==0.3.2
+filelock==3.13.1
+flatbuffers==24.3.25
+fonttools==4.53.0
+frozenlist==1.4.1
+fsspec==2024.2.0
+ftfy==6.1.1
+gast==0.5.5
+gitdb==4.0.11
+GitPython==3.1.43
+google-auth==2.30.0
+google-auth-oauthlib==1.2.0
+google-pasta==0.2.0
+gradio==4.37.1
+gradio_client==1.0.2
+grpcio==1.64.1
+h11==0.14.0
+h5py==3.11.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+huggingface-hub==0.20.1
+idna==3.4
+imageio==2.34.2
+imagesize==1.4.1
+importlib_metadata==7.2.1
+importlib_resources==6.4.0
+invisible-watermark==0.2.0
+Jinja2==3.1.3
+jsonmerge==1.9.2
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+k-diffusion==0.1.1.post1
+keras==2.15.0
+kiwisolver==1.4.5
+kornia==0.7.2
+kornia_rs==0.1.3
+lazy_loader==0.4
+libclang==18.1.1
+-e git+https://github.com/kohya-ss/sd-scripts.git@56bb81c9e6483b8b4d5b83639548855b8359f4b4#egg=library
+lightning-utilities==0.11.2
+lion-pytorch==0.0.6
+lycoris_lora==3.0.0.dev12
+Markdown==3.6
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.0
+mdurl==0.1.2
+ml-dtypes==0.2.0
+mpmath==1.3.0
+multidict==6.0.5
+networkx==3.2.1
+numpy==1.26.4
+oauthlib==3.2.2
+omegaconf==2.3.0
+onnx==1.15.0
+open-clip-torch==2.20.0
+opencv-python==4.10.0.84
+opt-einsum==3.3.0
+orjson==3.10.5
+packaging==24.1
+pandas==2.2.2
+pathtools==0.1.2
+pillow==10.2.0
+platformdirs==4.2.2
+prodigyopt==1.0
+protobuf==3.20.3
+psutil==6.0.0
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pydantic==2.7.4
+pydantic_core==2.18.4
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pytorch-lightning==1.9.0
+pytz==2024.1
+PyWavelets==1.6.0
+PyYAML==6.0.1
+referencing==0.35.1
+regex==2024.5.15
+requests==2.32.3
+requests-oauthlib==2.0.0
+rich==13.7.1
+rpds-py==0.18.1
+rsa==4.9
+ruff==0.4.10
+safetensors==0.4.2
+scikit-image==0.24.0
+scipy==1.11.4
+semantic-version==2.10.0
+sentencepiece==0.2.0
+sentry-sdk==2.6.0
+setproctitle==1.3.3
+shellingham==1.5.4
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+starlette==0.37.2
+sympy==1.12
+tensorboard==2.15.2
+tensorboard-data-server==0.7.2
+tensorflow==2.15.0.post1
+tensorflow-estimator==2.15.0
+tensorflow-io-gcs-filesystem==0.37.0
+termcolor==2.4.0
+tifffile==2024.6.18
+timm==0.6.12
+tk==0.1.0
+tk-tools==0.16.0
+tokenizers==0.15.2
+toml==0.10.2
+tomlkit==0.12.0
+toolz==0.12.1
+torchdiffeq==0.2.4
+torchmetrics==1.4.0.post0
+torchsde==0.2.6
+tqdm==4.66.4
+trampoline==0.1.2
+transformers==4.38.0
+triton==2.1.0
+typer==0.12.3
+typing_extensions==4.9.0
+tzdata==2024.1
+ujson==5.10.0
+urllib3==2.2.2
+uvicorn==0.30.1
+uvloop==0.19.0
+voluptuous==0.13.1
+wandb==0.15.11
+watchfiles==0.22.0
+wcwidth==0.2.13
+websockets==11.0.3
+Werkzeug==3.0.3
+wrapt==1.14.1
+yarl==1.9.4
+zipp==3.19.2
--- a/kohya_ss-hydit/huynyuan_lora.json
+++ b/kohya_ss-hydit/huynyuan_lora.json
+{
+  "LoRA_type": "Standard",
+  "LyCORIS_preset": "full",
+  "adaptive_noise_scale": 0,
+  "additional_parameters": "",
+  "async_upload": false,
+  "block_alphas": "",
+  "block_dims": "",
+  "block_lr_zero_threshold": "",
+  "bucket_no_upscale": true,
+  "bucket_reso_steps": 64,
+  "bypass_mode": false,
+  "cache_latents": true,
+  "cache_latents_to_disk": false,
+  "caption_dropout_every_n_epochs": 0,
+  "caption_dropout_rate": 0,
+  "caption_extension": ".txt",
+  "clip_skip": 1,
+  "color_aug": false,
+  "constrain": 0,
+  "conv_alpha": 1,
+  "conv_block_alphas": "",
+  "conv_block_dims": "",
+  "conv_dim": 1,
+  "dataset_config": "",
+  "debiased_estimation_loss": false,
+  "decompose_both": false,
+  "dim_from_weights": false,
+  "dora_wd": false,
+  "down_lr_weight": "",
+  "dynamo_backend": "no",
+  "dynamo_mode": "default",
+  "dynamo_use_dynamic": false,
+  "dynamo_use_fullgraph": false,
+  "enable_bucket": true,
+  "epoch": 1,
+  "extra_accelerate_launch_args": "",
+  "factor": -1,
+  "flip_aug": false,
+  "fp8_base": false,
+  "full_bf16": false,
+  "full_fp16": false,
+  "gpu_ids": "",
+  "gradient_accumulation_steps": 1,
+  "gradient_checkpointing": true,
+  "huber_c": 0.1,
+  "huber_schedule": "snr",
+  "huggingface_path_in_repo": "",
+  "huggingface_repo_id": "",
+  "huggingface_repo_type": "",
+  "huggingface_repo_visibility": "",
+  "huggingface_token": "",
+  "hunyuan11": false,
+  "hunyuan12": true,
+  "ip_noise_gamma": 0,
+  "ip_noise_gamma_random_strength": false,
+  "keep_tokens": 0,
+  "learning_rate": 0.0001,
+  "log_tracker_config": "",
+  "log_tracker_name": "",
+  "log_with": "",
+  "logging_dir": "",
+  "loss_type": "l2",
+  "lr_scheduler": "cosine",
+  "lr_scheduler_args": "",
+  "lr_scheduler_num_cycles": 1,
+  "lr_scheduler_power": 1,
+  "lr_warmup": 10,
+  "main_process_port": 0,
+  "masked_loss": false,
+  "max_bucket_reso": 2048,
+  "max_data_loader_n_workers": 0,
+  "max_grad_norm": 1,
+  "max_resolution": "1024,1024",
+  "max_timestep": 1000,
+  "max_token_length": 75,
+  "max_train_epochs": 0,
+  "max_train_steps": 1600,
+  "mem_eff_attn": false,
+  "metadata_author": "\u6df7\u5143 & HAI",
+  "metadata_description": "\u4e00\u4e2a\u5b59\u609f\u7a7a\u7684\u6982\u5ff5\u5f62\u8c61lora",
+  "metadata_license": "MIT",
+  "metadata_tags": "\u5b59\u609f\u7a7a",
+  "metadata_title": "HunyuanDiTv12_wukong_lora",
+  "mid_lr_weight": "",
+  "min_bucket_reso": 256,
+  "min_snr_gamma": 0,
+  "min_timestep": 0,
+  "mixed_precision": "fp16",
+  "model_list": "custom",
+  "module_dropout": 0.05,
+  "multi_gpu": false,
+  "multires_noise_discount": 0.3,
+  "multires_noise_iterations": 0,
+  "network_alpha": 128,
+  "network_dim": 128,
+  "network_dropout": 0.05,
+  "network_weights": "",
+  "noise_offset": 0,
+  "noise_offset_random_strength": false,
+  "noise_offset_type": "Original",
+  "num_cpu_threads_per_process": 2,
+  "num_machines": 1,
+  "num_processes": 1,
+  "optimizer": "AdamW8bit",
+  "optimizer_args": "",
+  "output_dir": "/root/kohya_ss/outputs",
+  "output_name": "last",
+  "persistent_data_loader_workers": false,
+  "pretrained_model_name_or_path": "/root/kohya_ss/HunyuanDiT-V1.2/t2i/",
+  "prior_loss_weight": 1,
+  "random_crop": false,
+  "rank_dropout": 0.05,
+  "rank_dropout_scale": false,
+  "reg_data_dir": "",
+  "rescaled": false,
+  "resume": "",
+  "resume_from_huggingface": "",
+  "sample_every_n_epochs": 0,
+  "sample_every_n_steps": 0,
+  "sample_prompts": "",
+  "sample_sampler": "euler_a",
+  "save_as_bool": false,
+  "save_every_n_epochs": 1,
+  "save_every_n_steps": 50,
+  "save_last_n_steps": 0,
+  "save_last_n_steps_state": 0,
+  "save_model_as": "safetensors",
+  "save_precision": "fp16",
+  "save_state": false,
+  "save_state_on_train_end": false,
+  "save_state_to_huggingface": false,
+  "scale_v_pred_loss_like_noise_pred": false,
+  "scale_weight_norms": 0,
+  "sdxl": false,
+  "sdxl_cache_text_encoder_outputs": false,
+  "sdxl_no_half_vae": false,
+  "seed": 0,
+  "shuffle_caption": false,
+  "stop_text_encoder_training": 0,
+  "text_encoder_lr": 0.0001,
+  "train_batch_size": 1,
+  "train_data_dir": "/root/demo_images/wukong",
+  "train_norm": false,
+  "train_on_input": true,
+  "training_comment": "",
+  "unet_lr": 0.0001,
+  "unit": 1,
+  "up_lr_weight": "",
+  "use_cp": false,
+  "use_scalar": false,
+  "use_tucker": false,
+  "v2": false,
+  "v_parameterization": true,
+  "v_pred_like_loss": 0,
+  "vae": "",
+  "vae_batch_size": 0,
+  "wandb_api_key": "",
+  "wandb_run_name": "",
+  "weighted_captions": false,
+  "xformers": "xformers"
+}
\ No newline at end of file