Update configs and docker (#291)

6fc67329 · Yang Yong(雍洋) · GitHub · 27c5575f · 6fc67329 · 6fc67329
Commit 6fc67329 authored Sep 05, 2025 by Yang Yong(雍洋) Committed by GitHub Sep 05, 2025
19 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -38,4 +38,8 @@ RUN git clone https://github.com/ModelTC/SageAttention.git
 RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
 WORKDIR /workspace
--- a/Dockerfile_cu124
+++ b/Dockerfile_cu124
@@ -38,4 +38,8 @@ RUN git clone https://github.com/ModelTC/SageAttention.git
 RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
+RUN git clone https://github.com/KONAKONA666/q8_kernels.git
+RUN cd q8_kernels && git submodule init && git submodule update && python setup.py install && rm -rf build
 WORKDIR /workspace
--- a/configs/offload/disk/wan_i2v_audio_phase_lazy_load_720p.json
+++ b/configs/offload/disk/wan_i2v_audio_phase_lazy_load_720p.json
-{
-    "infer_steps": 4,
-    "target_fps": 16,
-    "video_duration": 15,
-    "audio_sr": 16000,
-    "target_video_length": 81,
-    "target_height": 720,
-    "target_width": 1280,
-    "self_attn_1_type": "flash_attn3",
-    "cross_attn_1_type": "flash_attn3",
-    "cross_attn_2_type": "flash_attn3",
-    "seed": 42,
-    "sample_guide_scale": 1.0,
-    "sample_shift": 5,
-    "enable_cfg": false,
-    "cpu_offload": true,
-    "use_31_block": false,
-    "adaptive_resize": true,
-    "offload_granularity": "phase",
-    "t5_offload_granularity": "block",
-    "mm_config": {
-        "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Vllm"
-    },
-    "t5_cpu_offload": true,
-    "t5_quantized": true,
-    "t5_quant_scheme": "fp8",
-    "clip_quantized": true,
-    "clip_quant_scheme": "fp8",
-    "use_tiling_vae": true,
-    "use_tiny_vae": true,
-    "lazy_load": true,
-    "rotary_chunk": true,
-    "clean_cuda_cache": true,
-    "audio_encoder_cpu_offload": true,
-    "audio_adapter_cpu_offload": true
-}
--- a/configs/seko_talk/seko_talk_01_base.json
+++ b/configs/seko_talk/seko_talk_01_base.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_02_fp8.json
+++ b/configs/seko_talk/seko_talk_02_fp8.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_03_dist.json
+++ b/configs/seko_talk/seko_talk_03_dist.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_04_fp8_dist.json
+++ b/configs/seko_talk/seko_talk_04_fp8_dist.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_05_offload_fp8_4090.json
+++ b/configs/seko_talk/seko_talk_05_offload_fp8_4090.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 120,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_06_offload_fp8_H100.json
+++ b/configs/seko_talk/seko_talk_06_offload_fp8_H100.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 120,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_07_dist_offload.json
+++ b/configs/seko_talk/seko_talk_07_dist_offload.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 5,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_08_5B_base.json
+++ b/configs/seko_talk/seko_talk_08_5B_base.json
 {
    "infer_steps": 4,
    "target_fps": 24,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 121,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_09_base_fixed_min_area.json
+++ b/configs/seko_talk/seko_talk_09_base_fixed_min_area.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "fixed_min_area",

--- a/configs/seko_talk/seko_talk_10_fp8_dist_fixed_min_area.json
+++ b/configs/seko_talk/seko_talk_10_fp8_dist_fixed_min_area.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "fixed_min_area",

--- a/configs/seko_talk/seko_talk_11_fp8_dist_fixed_shape.json
+++ b/configs/seko_talk/seko_talk_11_fp8_dist_fixed_shape.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "fixed_shape",

--- a/configs/seko_talk/seko_talk_12_fp8_dist_fixed_shape_8gpus_1s.json
+++ b/configs/seko_talk/seko_talk_12_fp8_dist_fixed_shape_8gpus_1s.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 17,
    "prev_frame_length": 1,

--- a/configs/seko_talk/seko_talk_13_fp8_dist_bucket_shape_8gpus_5s_realtime.json
+++ b/configs/seko_talk/seko_talk_13_fp8_dist_bucket_shape_8gpus_5s_realtime.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 81,
    "resize_mode": "adaptive",

--- a/configs/seko_talk/seko_talk_14_fp8_dist_bucket_shape_8gpus_1s_realtime.json
+++ b/configs/seko_talk/seko_talk_14_fp8_dist_bucket_shape_8gpus_1s_realtime.json
 {
    "infer_steps": 4,
    "target_fps": 16,
-    "video_duration": 15,
+    "video_duration": 360,
    "audio_sr": 16000,
    "target_video_length": 17,
    "prev_frame_length": 1,

--- a/docs/EN/source/getting_started/quickstart.md
+++ b/docs/EN/source/getting_started/quickstart.md
@@ -27,16 +27,16 @@ We strongly recommend using the Docker environment, which is the simplest and fa
 #### 1. Pull Image
-Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25082901-cu128`:
+Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25090503-cu128`:
 ```bash
-docker pull lightx2v/lightx2v:25082901-cu128
+docker pull lightx2v/lightx2v:25090503-cu128
 ```
 We recommend using the `cuda128` environment for faster inference speed. If you need to use the `cuda124` environment, you can use image versions with the `-cu124` suffix:
 ```bash
-docker pull lightx2v/lightx2v:25082901-cu124
+docker pull lightx2v/lightx2v:25090503-cu124
 ```
 #### 2. Run Container
@@ -51,10 +51,10 @@ For mainland China, if the network is unstable when pulling images, you can pull
 ```bash
 # cuda128
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25082901-cu128
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25090503-cu128
 # cuda124
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25082901-cu124
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25090503-cu124
 ```
 ### 🐍 Conda Environment Setup

--- a/docs/ZH_CN/source/getting_started/quickstart.md
+++ b/docs/ZH_CN/source/getting_started/quickstart.md
@@ -27,16 +27,16 @@
 #### 1. 拉取镜像
-访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，选择一个最新日期的 tag，比如 `25082901-cu128`：
+访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags)，选择一个最新日期的 tag，比如 `25090503-cu128`：
 ```bash
-docker pull lightx2v/lightx2v:25082901-cu128
+docker pull lightx2v/lightx2v:25090503-cu128
 ```
 我们推荐使用`cuda128`环境，以获得更快的推理速度，若需要使用`cuda124`环境，可以使用带`-cu124`后缀的镜像版本：
 ```bash
-docker pull lightx2v/lightx2v:25082901-cu124
+docker pull lightx2v/lightx2v:25090503-cu124
 ```
 #### 2. 运行容器
@@ -51,10 +51,10 @@ docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --ent
 ```bash
 # cuda128
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25082901-cu128
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25090503-cu128
 # cuda124
-docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25082901-cu124
+docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25090503-cu124
 ```
 ### 🐍 Conda 环境搭建