Update docker & compile & nbhd

bf47451d · helloyongyang · f772bd96 · bf47451d · bf47451d · bf47451d
Commit bf47451d authored Nov 10, 2025 by helloyongyang
9 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build

 RUN cd flash-attention/hopper && python setup.py install && rm -rf build

-RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .

-RUN cd SageAttention-1104 && TORCH_CUDA_ARCH_LIST="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1

 RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build


--- a/Dockerfile_5090
+++ b/Dockerfile_5090
@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build

 RUN cd flash-attention/hopper && python setup.py install && rm -rf build

-RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
+RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
+
+RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .

-RUN cd SageAttention-1104 && TORCH_CUDA_ARCH_LIST="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 python setup.py install && rm -rf build
+RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1

 RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build


--- a/Dockerfile_deploy
+++ b/Dockerfile_deploy
@@ -5,7 +5,7 @@ RUN cd /opt/lightx2v/deploy/server/frontend \
    && npm install \
    && npm run build

-FROM lightx2v/lightx2v:25110701-cu128 AS base
+FROM lightx2v/lightx2v:25111001-cu128 AS base

 RUN mkdir /workspace/LightX2V
 WORKDIR /workspace/LightX2V

--- a/configs/seko_talk/seko_talk_24_fp8_dist_compile_nbhd_attn.json
+++ b/configs/seko_talk/seko_talk_24_fp8_dist_compile_nbhd_attn.json
+{
+    "infer_steps": 4,
+    "target_fps": 16,
+    "video_duration": 360,
+    "audio_sr": 16000,
+    "target_video_length": 81,
+    "resize_mode": "adaptive",
+    "self_attn_1_type": "nbhd_attn",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "sample_guide_scale": 1.0,
+    "sample_shift": 5,
+    "enable_cfg": false,
+    "cpu_offload": false,
+    "use_31_block": false,
+    "parallel": {
+        "seq_p_size": 8,
+        "seq_p_attn_type": "ulysses"
+    },
+    "clip_quantized": true,
+    "clip_quant_scheme": "fp8-sgl",
+    "dit_quantized": true,
+    "dit_quant_scheme": "fp8-sgl",
+    "adapter_quantized": true,
+    "adapter_quant_scheme": "fp8-sgl",
+    "t5_quantized": true,
+    "t5_quant_scheme": "fp8-sgl",
+    "compile": true,
+    "compile_shapes": [
+        [
+            480,
+            832
+        ],
+        [
+            544,
+            960
+        ],
+        [
+            720,
+            1280
+        ],
+        [
+            832,
+            480
+        ],
+        [
+            960,
+            544
+        ],
+        [
+            1280,
+            720
+        ],
+        [
+            480,
+            480
+        ],
+        [
+            576,
+            576
+        ],
+        [
+            704,
+            704
+        ],
+        [
+            960,
+            960
+        ]
+    ]
+}
--- a/docs/EN/source/getting_started/quickstart.md
+++ b/docs/EN/source/getting_started/quickstart.md
@@ -27,10 +27,10 @@ We strongly recommend using the Docker environment, which is the simplest and fa

 #### 1. Pull Image

-Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25110701-cu128`:
+Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25111001-cu128`:

 ```bash
-docker pull lightx2v/lightx2v:25110701-cu128
+docker pull lightx2v/lightx2v:25111001-cu128
 ```

 We recommend using the `cuda128` environment for faster inference speed. If you need to use the `cuda124` environment, you can use image versions with the `-cu124` suffix:
@@ -51,7 +51,7 @@ For mainland China, if the network is unstable when pulling images, you can pull

 ```bash
 # cuda128
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25110701-cu128
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25111001-cu128

 # cuda124
 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124

--- a/docs/ZH_CN/source/getting_started/quickstart.md
+++ b/docs/ZH_CN/source/getting_started/quickstart.md
@@ -27,10 +27,10 @@

 #### 1. 拉取镜像

-访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，选择一个最新日期的 tag，比如 `25110701-cu128`：
+访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，选择一个最新日期的 tag，比如 `25111001-cu128`：

 ```bash
-docker pull lightx2v/lightx2v:25110701-cu128
+docker pull lightx2v/lightx2v:25111001-cu128
 ```

 我们推荐使用`cuda128`环境，以获得更快的推理速度，若需要使用`cuda124`环境，可以使用带`-cu124`后缀的镜像版本：
@@ -51,7 +51,7 @@ docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --ent

 ```bash
 # cuda128
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25110701-cu128
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25111001-cu128

 # cuda124
 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124

--- a/lightx2v/common/ops/attn/nbhd_attn.py
+++ b/lightx2v/common/ops/attn/nbhd_attn.py
@@ -78,6 +78,7 @@ class NbhdAttnWeight(AttnWeightTemplate):
        self.config = {}

    @classmethod
+    @torch.compiler.disable
    def prepare_mask(cls, seqlen):
        if seqlen == cls.seqlen:
            return

--- a/lightx2v/models/networks/wan/audio_model.py
+++ b/lightx2v/models/networks/wan/audio_model.py
@@ -107,7 +107,7 @@ class WanAudioModel(WanModel):
                self.pre_weight.to_cuda()
                self.transformer_weights.non_block_weights_to_cuda()

-        max_audio_num_num = self.config.get("compile_max_audios", 3)
+        max_audio_num_num = self.config.get("compile_max_audios", 1)
        for audio_num in range(1, max_audio_num_num + 1):
            for shape in compile_shapes:
                self.start_compile(shape, audio_num, with_mask=True)

--- a/scripts/seko_talk/run_seko_talk_24_fp8_dist_compile_nbhd_attn.sh
+++ b/scripts/seko_talk/run_seko_talk_24_fp8_dist_compile_nbhd_attn.sh
+#!/bin/bash
+
+lightx2v_path=/path/to/Lightx2v
+model_path=/path/to/SekoTalk-Distill-fp8
+
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+
+# set environment variables
+source ${lightx2v_path}/scripts/base/base.sh
+
+export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
+
+export SENSITIVE_LAYER_DTYPE=None
+
+torchrun --nproc-per-node 8 -m lightx2v.infer \
+--model_cls seko_talk \
+--task s2v \
+--model_path $model_path \
+--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_24_fp8_dist_compile_nbhd_attn.json \
+--prompt  "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
+--negative_prompt 色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走 \
+--image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
+--audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.mp3 \
+--save_result_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4