Commit 6de0a3b4 authored by Yang Yong(雍洋)'s avatar Yang Yong(雍洋) Committed by GitHub
Browse files

Add audio input files and update pre-commit config for larger files (#283)

parent 8de61521
...@@ -16,6 +16,7 @@ repos: ...@@ -16,6 +16,7 @@ repos:
- id: check-yaml - id: check-yaml
- id: check-toml - id: check-toml
- id: check-added-large-files - id: check-added-large-files
args: ['--maxkb=3000'] # Allow files up to 3MB
- id: check-case-conflict - id: check-case-conflict
- id: check-merge-conflict - id: check-merge-conflict
- id: debug-statements - id: debug-statements
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"target_height": 720, "target_height": 720,
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "adaptive", "resize_mode": "adaptive",
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "adaptive", "resize_mode": "adaptive",
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "adaptive", "resize_mode": "adaptive",
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"cpu_offload": false, "cpu_offload": false,
"use_31_block": false, "use_31_block": false,
"parallel": { "parallel": {
"seq_p_size": 4, "seq_p_size": 8,
"seq_p_attn_type": "ulysses" "seq_p_attn_type": "ulysses"
} }
} }
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "adaptive", "resize_mode": "adaptive",
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
"cpu_offload": false, "cpu_offload": false,
"use_31_block": false, "use_31_block": false,
"parallel": { "parallel": {
"seq_p_size": 4, "seq_p_size": 8,
"seq_p_attn_type": "ulysses" "seq_p_attn_type": "ulysses"
}, },
"mm_config": { "mm_config": {
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 24, "target_fps": 24,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 121, "target_video_length": 121,
"resize_mode": "adaptive", "resize_mode": "adaptive",
"text_len": 512, "text_len": 512,
"num_channels_latents": 48, "num_channels_latents": 48,
"vae_stride": [4, 16, 16], "vae_stride": [
4,
16,
16
],
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
...@@ -22,8 +26,8 @@ ...@@ -22,8 +26,8 @@
"use_31_block": false, "use_31_block": false,
"lora_configs": [ "lora_configs": [
{ {
"path": "/mnt/aigc/rtxiang/pretrain/qianhai_weights/lora_model.safetensors", "path": "/mnt/aigc/rtxiang/pretrain/qianhai_weights/lora_model.safetensors",
"strength": 0.125 "strength": 0.125
} }
] ]
} }
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "fixed_min_area", "resize_mode": "fixed_min_area",
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "fixed_min_area", "resize_mode": "fixed_min_area",
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 81, "target_video_length": 81,
"resize_mode": "fixed_shape", "resize_mode": "fixed_shape",
"fixed_shape": [240, 320], "fixed_shape": [
240,
320
],
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_fps": 16, "target_fps": 16,
"video_duration": 12, "video_duration": 15,
"audio_sr": 16000, "audio_sr": 16000,
"target_video_length": 17, "target_video_length": 17,
"prev_frame_length": 1, "prev_frame_length": 1,
"resize_mode": "fixed_shape", "resize_mode": "fixed_shape",
"fixed_shape": [480, 480], "fixed_shape": [
480,
480
],
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
......
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_01_base.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_01_base.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_02_fp8.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_02_fp8.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
lightx2v_path=/path/to/Lightx2v lightx2v_path=/path/to/Lightx2v
model_path=/path/to/SekoTalk-Distill model_path=/path/to/SekoTalk-Distill
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
# set environment variables # set environment variables
source ${lightx2v_path}/scripts/base/base.sh source ${lightx2v_path}/scripts/base/base.sh
...@@ -13,13 +13,13 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True ...@@ -13,13 +13,13 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export ENABLE_GRAPH_MODE=false export ENABLE_GRAPH_MODE=false
export SENSITIVE_LAYER_DTYPE=None export SENSITIVE_LAYER_DTYPE=None
torchrun --nproc-per-node 4 -m lightx2v.infer \ torchrun --nproc-per-node 8 -m lightx2v.infer \
--model_cls seko_talk \ --model_cls seko_talk \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_03_dist.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_03_dist.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
lightx2v_path=/path/to/Lightx2v lightx2v_path=/path/to/Lightx2v
model_path=/path/to/SekoTalk-Distill-fp8 model_path=/path/to/SekoTalk-Distill-fp8
export CUDA_VISIBLE_DEVICES=0,1,2,3 export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
# set environment variables # set environment variables
source ${lightx2v_path}/scripts/base/base.sh source ${lightx2v_path}/scripts/base/base.sh
...@@ -13,13 +13,13 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True ...@@ -13,13 +13,13 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
export ENABLE_GRAPH_MODE=false export ENABLE_GRAPH_MODE=false
export SENSITIVE_LAYER_DTYPE=None export SENSITIVE_LAYER_DTYPE=None
torchrun --nproc-per-node 4 -m lightx2v.infer \ torchrun --nproc-per-node 8 -m lightx2v.infer \
--model_cls seko_talk \ --model_cls seko_talk \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_04_fp8_dist.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_04_fp8_dist.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_05_offload_fp8_4090.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_05_offload_fp8_4090.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_06_offload_fp8_H100.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_06_offload_fp8_H100.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ torchrun --nproc-per-node 4 -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ torchrun --nproc-per-node 4 -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_07_dist_offload.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_07_dist_offload.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_08_5B_base.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_08_5B_base.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
...@@ -18,8 +18,8 @@ python -m lightx2v.infer \ ...@@ -18,8 +18,8 @@ python -m lightx2v.infer \
--task i2v \ --task i2v \
--model_path $model_path \ --model_path $model_path \
--config_json ${lightx2v_path}/configs/seko_talk/seko_talk_09_base_fixed_min_area.json \ --config_json ${lightx2v_path}/configs/seko_talk/seko_talk_09_base_fixed_min_area.json \
--prompt "The video features a old lady is saying something and knitting a sweater." \ --prompt "The video features a male speaking to the camera with arms spread out, a slightly furrowed brow, and a focused gaze." \
--negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \ --negative_prompt 色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走 \
--image_path ${lightx2v_path}/assets/inputs/audio/15.png \ --image_path ${lightx2v_path}/assets/inputs/audio/seko_input.png \
--audio_path ${lightx2v_path}/assets/inputs/audio/15.wav \ --audio_path ${lightx2v_path}/assets/inputs/audio/seko_input.wav \
--save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4 --save_video_path ${lightx2v_path}/save_results/output_lightx2v_seko_talk.mp4
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment