Hunyuan1.5 (#484)

Thanks to HunyuanVideo Team and ModelTC Team. --------- Co-authored-by: gushiqiao <975033167@qq.com> Co-authored-by: gushiqiao <77222802+gushiqiao@users.noreply.github.com> Co-authored-by: chendingyu <chendingyu1@sensetime.com> Co-authored-by: XHPlus <xhplus@163.com> Co-authored-by: wangshankun <wangshankun2011@hotmail.com> Co-authored-by: STwangyingrui <86730325+STwangyingrui@users.noreply.github.com> Co-authored-by: root <root@pt-80f094c20fc44a8cad096e5f3dbc962e-worker-0.pt-80f094c20fc44a8cad096e5f3dbc962e.ns-devsft-3460edd0.svc.cluster.local>

Hunyuan1.5 (#484)
Thanks to HunyuanVideo Team and ModelTC Team. --------- Co-authored-by: gushiqiao <975033167@qq.com> Co-authored-by: gushiqiao <77222802+gushiqiao@users.noreply.github.com> Co-authored-by: chendingyu <chendingyu1@sensetime.com> Co-authored-by: XHPlus <xhplus@163.com> Co-authored-by: wangshankun <wangshankun2011@hotmail.com> Co-authored-by: STwangyingrui <86730325+STwangyingrui@users.noreply.github.com> Co-authored-by: root <root@pt-80f094c20fc44a8cad096e5f3dbc962e-worker-0.pt-80f094c20fc44a8cad096e5f3dbc962e.ns-devsft-3460edd0.svc.cluster.local>
f21da849 · Yang Yong (雍洋) · GitHub · 3efc43f5 · f21da849 · f21da849
Unverified Commit f21da849 authored Nov 21, 2025 by Yang Yong (雍洋) Committed by GitHub Nov 21, 2025
16 changed files
--- a/scripts/seko_talk/run_seko_talk_14_fp8_dist_bucket_shape_8gpus_1s_realtime.sh
+++ b/scripts/seko_talk/run_seko_talk_14_fp8_dist_bucket_shape_8gpus_1s_realtime.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_15_base_compile.sh
+++ b/scripts/seko_talk/run_seko_talk_15_base_compile.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_16_fp8_dist_compile.sh
+++ b/scripts/seko_talk/run_seko_talk_16_fp8_dist_compile.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_17_vsr.sh
+++ b/scripts/seko_talk/run_seko_talk_17_vsr.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_18_5090_base.sh
+++ b/scripts/seko_talk/run_seko_talk_18_5090_base.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_19_A800_int8_dist.sh
+++ b/scripts/seko_talk/run_seko_talk_19_A800_int8_dist.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_20_A800_int8.sh
+++ b/scripts/seko_talk/run_seko_talk_20_A800_int8.sh
@@ -9,7 +9,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_21_5090_int8.sh
+++ b/scripts/seko_talk/run_seko_talk_21_5090_int8.sh
@@ -9,7 +9,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
+++ b/scripts/seko_talk/run_seko_talk_21_5090_int8_dist.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_22_nbhd_attn.sh
+++ b/scripts/seko_talk/run_seko_talk_22_nbhd_attn.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_23_fp8_dist_nbhd_attn.sh
+++ b/scripts/seko_talk/run_seko_talk_23_fp8_dist_nbhd_attn.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/seko_talk/run_seko_talk_24_fp8_dist_compile_nbhd_attn.sh
+++ b/scripts/seko_talk/run_seko_talk_24_fp8_dist_compile_nbhd_attn.sh
@@ -8,7 +8,6 @@ export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 export SENSITIVE_LAYER_DTYPE=None

--- a/scripts/wan/run_wan_i2v_lazy_load.sh
+++ b/scripts/wan/run_wan_i2v_lazy_load.sh
@@ -5,7 +5,6 @@ lightx2v_path=
 model_path=
 export CUDA_VISIBLE_DEVICES=0
-export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
 # set environment variables
 source ${lightx2v_path}/scripts/base/base.sh
 export DTYPE=FP16

--- a/tools/convert/converter.py
+++ b/tools/convert/converter.py
@@ -800,7 +800,7 @@ def main():
    parser.add_argument(
        "-t",
        "--model_type",
-        choices=["wan_dit", "hunyuan_dit", "wan_t5", "wan_clip", "wan_animate_dit", "qwen_image_dit"],
+        choices=["wan_dit", "hunyuan_dit", "wan_t5", "wan_clip", "wan_animate_dit", "qwen_image_dit", "qwen25vl_llm"],
        default="wan_dit",
        help="Model type",
    )
@@ -897,16 +897,17 @@ def main():
                "key_idx": 2,
                "target_keys": [
                    "img_mod",
-                    "img_attn_qkv",
+                    "img_attn_q",
+                    "img_attn_k",
+                    "img_attn_v",
                    "img_attn_proj",
                    "img_mlp",
                    "txt_mod",
-                    "txt_attn_qkv",
+                    "txt_attn_q",
+                    "txt_attn_k",
+                    "txt_attn_v",
                    "txt_attn_proj",
                    "txt_mlp",
-                    "linear1",
-                    "linear2",
-                    "modulation",
                ],
                "ignore_key": None,
            },
@@ -914,7 +915,12 @@ def main():
            "wan_clip": {
                "key_idx": 3,
                "target_keys": ["attn", "mlp"],
-                "ignore_key": "textual",
+                "ignore_key": ["textual"],
+            },
+            "qwen25vl_llm": {
+                "key_idx": 3,
+                "target_keys": ["self_attn", "mlp"],
+                "ignore_key": ["visual"],
            },
        }

--- a/tools/convert/readme.md
+++ b/tools/convert/readme.md
@@ -14,6 +14,7 @@ A powerful model weight conversion tool that supports format conversion, quantiz
 ## Supported Model Types
+- `hunyuan_dit`: hunyuan DiT 1.5 models
 - `wan_dit`: Wan DiT series models (default)
 - `wan_animate_dit`: Wan Animate DiT models
 - `qwen_image_dit`: Qwen Image DiT models
@@ -242,6 +243,38 @@ python converter.py \
    --quantized
 ```
+#### 1.5 Qwen25_vl llm Quantization
+**INT8 Quantization**
+```bash
+python converter.py \
+    --source /path/to/hunyuanvideo-1.5/text_encoder/llm \
+    --output /path/to/output \
+    --output_ext .safetensors \
+    --output_name qwen25vl-llm-int8 \
+    --linear_dtype torch.int8 \
+    --non_linear_dtype torch.float16 \
+    --model_type qwen25vl_llm \
+    --quantized \
+    --single_file
+```
+**FP8 Quantization**
+```bash
+python converter.py \
+    --source /path/to/hunyuanvideo-1.5/text_encoder/llm \
+    --output /path/to/output \
+    --output_ext .safetensors \
+    --output_name qwen25vl-llm-fp8 \
+    --linear_dtype torch.float8_e4m3fn \
+    --non_linear_dtype torch.float16 \
+    --model_type qwen25vl_llm \
+    --quantized \
+    --single_file
+```
 ### 2. LoRA Merging
 #### 2.1 Merge Single LoRA

--- a/tools/convert/readme_zh.md
+++ b/tools/convert/readme_zh.md
@@ -14,6 +14,7 @@
 ## 支持的模型类型
+- `hunyuan_dit`: hunyuan DiT 1.5模型
 - `wan_dit`: Wan DiT 系列模型（默认）
 - `wan_animate_dit`: Wan Animate DiT 模型
 - `qwen_image_dit`: Qwen Image DiT 模型
@@ -242,6 +243,36 @@ python converter.py \
    --quantized
 ```
+#### 1.5 Qwen25_vl 語言部分量化
+**INT8 量化**
+```bash
+python converter.py \
+    --source /path/to/hunyuanvideo-1.5/text_encoder/llm \
+    --output /path/to/output \
+    --output_ext .safetensors \
+    --output_name qwen25vl-llm-int8 \
+    --linear_dtype torch.int8 \
+    --non_linear_dtype torch.float16 \
+    --model_type qwen25vl_llm \
+    --quantized \
+    --single_file
+```
+**FP8 量化**
+```bash
+python converter.py \
+    --source /path/to/hunyuanvideo-1.5/text_encoder/llm \
+    --output /path/to/output \
+    --output_ext .safetensors \
+    --output_name qwen25vl-llm-fp8 \
+    --linear_dtype torch.float8_e4m3fn \
+    --non_linear_dtype torch.float16 \
+    --model_type qwen25vl_llm \
+    --quantized \
+    --single_file
+```
 ### 2. LoRA 融合
 #### 2.1 融合单个 LoRA