Commit bf47451d authored by helloyongyang's avatar helloyongyang
Browse files

Update docker & compile & nbhd

parent f772bd96
...@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build ...@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1 RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN cd SageAttention-1104 && TORCH_CUDA_ARCH_LIST="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 python setup.py install && rm -rf build RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
......
...@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build ...@@ -43,9 +43,11 @@ RUN cd flash-attention && python setup.py install && rm -rf build
RUN cd flash-attention/hopper && python setup.py install && rm -rf build RUN cd flash-attention/hopper && python setup.py install && rm -rf build
RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1 RUN git clone https://github.com/ModelTC/SageAttention.git --depth 1
RUN cd SageAttention && CUDA_ARCHITECTURES="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 pip install --no-cache-dir -v -e .
RUN cd SageAttention-1104 && TORCH_CUDA_ARCH_LIST="8.0,8.6,8.9,9.0,12.0" EXT_PARALLEL=4 NVCC_APPEND_FLAGS="--threads 8" MAX_JOBS=32 python setup.py install && rm -rf build RUN git clone https://github.com/ModelTC/SageAttention-1104.git --depth 1
RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build RUN cd SageAttention-1104/sageattention3_blackwell && python setup.py install && rm -rf build
......
...@@ -5,7 +5,7 @@ RUN cd /opt/lightx2v/deploy/server/frontend \ ...@@ -5,7 +5,7 @@ RUN cd /opt/lightx2v/deploy/server/frontend \
&& npm install \ && npm install \
&& npm run build && npm run build
FROM lightx2v/lightx2v:25110701-cu128 AS base FROM lightx2v/lightx2v:25111001-cu128 AS base
RUN mkdir /workspace/LightX2V RUN mkdir /workspace/LightX2V
WORKDIR /workspace/LightX2V WORKDIR /workspace/LightX2V
......
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "nbhd_attn",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"use_31_block": false,
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses"
},
"clip_quantized": true,
"clip_quant_scheme": "fp8-sgl",
"dit_quantized": true,
"dit_quant_scheme": "fp8-sgl",
"adapter_quantized": true,
"adapter_quant_scheme": "fp8-sgl",
"t5_quantized": true,
"t5_quant_scheme": "fp8-sgl",
"compile": true,
"compile_shapes": [
[
480,
832
],
[
544,
960
],
[
720,
1280
],
[
832,
480
],
[
960,
544
],
[
1280,
720
],
[
480,
480
],
[
576,
576
],
[
704,
704
],
[
960,
960
]
]
}
...@@ -27,10 +27,10 @@ We strongly recommend using the Docker environment, which is the simplest and fa ...@@ -27,10 +27,10 @@ We strongly recommend using the Docker environment, which is the simplest and fa
#### 1. Pull Image #### 1. Pull Image
Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25110701-cu128`: Visit LightX2V's [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags), select a tag with the latest date, such as `25111001-cu128`:
```bash ```bash
docker pull lightx2v/lightx2v:25110701-cu128 docker pull lightx2v/lightx2v:25111001-cu128
``` ```
We recommend using the `cuda128` environment for faster inference speed. If you need to use the `cuda124` environment, you can use image versions with the `-cu124` suffix: We recommend using the `cuda128` environment for faster inference speed. If you need to use the `cuda124` environment, you can use image versions with the `-cu124` suffix:
...@@ -51,7 +51,7 @@ For mainland China, if the network is unstable when pulling images, you can pull ...@@ -51,7 +51,7 @@ For mainland China, if the network is unstable when pulling images, you can pull
```bash ```bash
# cuda128 # cuda128
docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25110701-cu128 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25111001-cu128
# cuda124 # cuda124
docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124
......
...@@ -27,10 +27,10 @@ ...@@ -27,10 +27,10 @@
#### 1. 拉取镜像 #### 1. 拉取镜像
访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags),选择一个最新日期的 tag,比如 `25110701-cu128` 访问 LightX2V 的 [Docker Hub](https://hub.docker.com/r/lightx2v/lightx2v/tags),选择一个最新日期的 tag,比如 `25111001-cu128`
```bash ```bash
docker pull lightx2v/lightx2v:25110701-cu128 docker pull lightx2v/lightx2v:25111001-cu128
``` ```
我们推荐使用`cuda128`环境,以获得更快的推理速度,若需要使用`cuda124`环境,可以使用带`-cu124`后缀的镜像版本: 我们推荐使用`cuda128`环境,以获得更快的推理速度,若需要使用`cuda124`环境,可以使用带`-cu124`后缀的镜像版本:
...@@ -51,7 +51,7 @@ docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --ent ...@@ -51,7 +51,7 @@ docker run --gpus all -itd --ipc=host --name [容器名] -v [挂载设置] --ent
```bash ```bash
# cuda128 # cuda128
docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25110701-cu128 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25111001-cu128
# cuda124 # cuda124
docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124 docker pull registry.cn-hangzhou.aliyuncs.com/yongyang/lightx2v:25101501-cu124
......
...@@ -78,6 +78,7 @@ class NbhdAttnWeight(AttnWeightTemplate): ...@@ -78,6 +78,7 @@ class NbhdAttnWeight(AttnWeightTemplate):
self.config = {} self.config = {}
@classmethod @classmethod
@torch.compiler.disable
def prepare_mask(cls, seqlen): def prepare_mask(cls, seqlen):
if seqlen == cls.seqlen: if seqlen == cls.seqlen:
return return
......
...@@ -107,7 +107,7 @@ class WanAudioModel(WanModel): ...@@ -107,7 +107,7 @@ class WanAudioModel(WanModel):
self.pre_weight.to_cuda() self.pre_weight.to_cuda()
self.transformer_weights.non_block_weights_to_cuda() self.transformer_weights.non_block_weights_to_cuda()
max_audio_num_num = self.config.get("compile_max_audios", 3) max_audio_num_num = self.config.get("compile_max_audios", 1)
for audio_num in range(1, max_audio_num_num + 1): for audio_num in range(1, max_audio_num_num + 1):
for shape in compile_shapes: for shape in compile_shapes:
self.start_compile(shape, audio_num, with_mask=True) self.start_compile(shape, audio_num, with_mask=True)
......
#!/bin/bash
lightx2v_path=/path/to/Lightx2v
model_path=/path/to/SekoTalk-Distill-fp8
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
# set environment variables
source ${lightx2v_path}/scripts/base/base.sh
export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export SENSITIVE_LAYER_DTYPE=None
torchrun --nproc-per-node 8 -m lightx2v.infer \
--model_cls seko_talk \
--task s2v \
--model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_24_fp8_dist_compile_nbhd_attn.json \
--prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.mp3 \
--save_result_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment