Unverified Commit 04812de2 authored by Yang Yong (雍洋)'s avatar Yang Yong (雍洋) Committed by GitHub
Browse files

Refactor Config System (#338)

parent 6a658f42
...@@ -22,7 +22,7 @@ RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel ...@@ -22,7 +22,7 @@ RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel
&& make build && make clean && make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \ RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy easydict av imageio-ffmpeg einops loguru qtorch ftfy av
RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg && conda clean -all -y
......
...@@ -26,7 +26,7 @@ RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel ...@@ -26,7 +26,7 @@ RUN git clone https://github.com/sgl-project/sglang.git && cd sglang/sgl-kernel
&& make build && make clean && make build && make clean
RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \ RUN pip install --no-cache-dir diffusers transformers tokenizers accelerate safetensors opencv-python numpy imageio \
imageio-ffmpeg einops loguru qtorch ftfy easydict imageio-ffmpeg einops loguru qtorch ftfy
RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg RUN conda install conda-forge::ffmpeg=8.0.0 -y && ln -s /opt/conda/bin/ffmpeg /usr/bin/ffmpeg
......
...@@ -10,7 +10,6 @@ from datetime import datetime ...@@ -10,7 +10,6 @@ from datetime import datetime
import gradio as gr import gradio as gr
import psutil import psutil
import torch import torch
from easydict import EasyDict
from loguru import logger from loguru import logger
logger.add( logger.add(
...@@ -258,7 +257,7 @@ for op_name, is_installed in available_attn_ops: ...@@ -258,7 +257,7 @@ for op_name, is_installed in available_attn_ops:
def run_inference( def run_inference(
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
...@@ -382,7 +381,7 @@ def run_inference( ...@@ -382,7 +381,7 @@ def run_inference(
], ],
] ]
save_video_path = generate_unique_filename(output_dir) save_result_path = generate_unique_filename(output_dir)
is_dit_quant = dit_quant_scheme != "bf16" is_dit_quant = dit_quant_scheme != "bf16"
is_t5_quant = t5_quant_scheme != "bf16" is_t5_quant = t5_quant_scheme != "bf16"
...@@ -519,11 +518,10 @@ def run_inference( ...@@ -519,11 +518,10 @@ def run_inference(
prompt=prompt, prompt=prompt,
negative_prompt=negative_prompt, negative_prompt=negative_prompt,
image_path=image_path, image_path=image_path,
save_video_path=save_video_path, save_result_path=save_result_path,
) )
config.update({k: v for k, v in vars(args).items()}) config.update({k: v for k, v in vars(args).items()})
config = EasyDict(config)
config.update(model_config) config.update(model_config)
config.update(quant_model_config) config.update(quant_model_config)
...@@ -565,7 +563,7 @@ def run_inference( ...@@ -565,7 +563,7 @@ def run_inference(
cleanup_memory() cleanup_memory()
return save_video_path return save_result_path
def handle_lazy_load_change(lazy_load_enabled): def handle_lazy_load_change(lazy_load_enabled):
...@@ -1024,7 +1022,7 @@ def main(): ...@@ -1024,7 +1022,7 @@ def main():
info="Total number of frames in the video. More frames result in longer videos.", info="Total number of frames in the video. More frames result in longer videos.",
) )
save_video_path = gr.Textbox( save_result_path = gr.Textbox(
label="Output Video Path", label="Output Video Path",
value=generate_unique_filename(output_dir), value=generate_unique_filename(output_dir),
info="Must include .mp4 extension. If left blank or using the default value, a unique filename will be automatically generated.", info="Must include .mp4 extension. If left blank or using the default value, a unique filename will be automatically generated.",
...@@ -1234,7 +1232,7 @@ def main(): ...@@ -1234,7 +1232,7 @@ def main():
inputs=[ inputs=[
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
...@@ -1275,7 +1273,7 @@ def main(): ...@@ -1275,7 +1273,7 @@ def main():
inputs=[ inputs=[
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
......
...@@ -10,7 +10,6 @@ from datetime import datetime ...@@ -10,7 +10,6 @@ from datetime import datetime
import gradio as gr import gradio as gr
import psutil import psutil
import torch import torch
from easydict import EasyDict
from loguru import logger from loguru import logger
logger.add( logger.add(
...@@ -260,7 +259,7 @@ for op_name, is_installed in available_attn_ops: ...@@ -260,7 +259,7 @@ for op_name, is_installed in available_attn_ops:
def run_inference( def run_inference(
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
...@@ -384,7 +383,7 @@ def run_inference( ...@@ -384,7 +383,7 @@ def run_inference(
], ],
] ]
save_video_path = generate_unique_filename(output_dir) save_result_path = generate_unique_filename(output_dir)
is_dit_quant = dit_quant_scheme != "bf16" is_dit_quant = dit_quant_scheme != "bf16"
is_t5_quant = t5_quant_scheme != "bf16" is_t5_quant = t5_quant_scheme != "bf16"
...@@ -523,11 +522,10 @@ def run_inference( ...@@ -523,11 +522,10 @@ def run_inference(
prompt=prompt, prompt=prompt,
negative_prompt=negative_prompt, negative_prompt=negative_prompt,
image_path=image_path, image_path=image_path,
save_video_path=save_video_path, save_result_path=save_result_path,
) )
config.update({k: v for k, v in vars(args).items()}) config.update({k: v for k, v in vars(args).items()})
config = EasyDict(config)
config.update(model_config) config.update(model_config)
config.update(quant_model_config) config.update(quant_model_config)
...@@ -569,7 +567,7 @@ def run_inference( ...@@ -569,7 +567,7 @@ def run_inference(
cleanup_memory() cleanup_memory()
return save_video_path return save_result_path
def handle_lazy_load_change(lazy_load_enabled): def handle_lazy_load_change(lazy_load_enabled):
...@@ -1028,7 +1026,7 @@ def main(): ...@@ -1028,7 +1026,7 @@ def main():
info="视频中的总帧数。更多帧数会产生更长的视频。", info="视频中的总帧数。更多帧数会产生更长的视频。",
) )
save_video_path = gr.Textbox( save_result_path = gr.Textbox(
label="输出视频路径", label="输出视频路径",
value=generate_unique_filename(output_dir), value=generate_unique_filename(output_dir),
info="必须包含.mp4扩展名。如果留空或使用默认值,将自动生成唯一文件名。", info="必须包含.mp4扩展名。如果留空或使用默认值,将自动生成唯一文件名。",
...@@ -1236,7 +1234,7 @@ def main(): ...@@ -1236,7 +1234,7 @@ def main():
inputs=[ inputs=[
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
...@@ -1277,7 +1275,7 @@ def main(): ...@@ -1277,7 +1275,7 @@ def main():
inputs=[ inputs=[
prompt, prompt,
negative_prompt, negative_prompt,
save_video_path, save_result_path,
torch_compile, torch_compile,
infer_steps, infer_steps,
num_frames, num_frames,
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
...@@ -6,7 +6,6 @@ ...@@ -6,7 +6,6 @@
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6, "sample_guide_scale": 6,
"sample_shift": 8, "sample_shift": 8,
"enable_cfg": true, "enable_cfg": true,
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": false, "enable_cfg": false,
"cpu_offload": false, "cpu_offload": false,
"denoising_step_list": [1000, 750, 500, 250], "denoising_step_list": [
1000,
750,
500,
250
],
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Sgl-ActVllm"
}, },
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42, //1234
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
...@@ -16,13 +15,21 @@ ...@@ -16,13 +15,21 @@
}, },
"feature_caching": "Tea", "feature_caching": "Tea",
"coefficients": [ "coefficients": [
[2.57151496e05, -3.54229917e04, 1.40286849e03, -1.35890334e01, 1.32517977e-01], [
[-3.02331670e02, 2.23948934e02, -5.25463970e01, 5.87348440e00, -2.01973289e-01] 2.57151496e05,
-3.54229917e04,
1.40286849e03,
-1.35890334e01,
1.32517977e-01
],
[
-3.02331670e02,
2.23948934e02,
-5.25463970e01,
5.87348440e00,
-2.01973289e-01
]
], ],
// "coefficients": [
// [8.10705460e03, 2.13393892e03, -3.72934672e02, 1.66203073e01, -4.17769401e-02],
// [-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683]
// ],
"use_ret_steps": false, "use_ret_steps": false,
"teacache_thresh": 0.2, "teacache_thresh": 0.2,
"use_tiling_vae": true "use_tiling_vae": true
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": true, "cpu_offload": true,
"offload_granularity": "block", "offload_granularity": "block",
"offload_ratio": 0.8, //1 "offload_ratio": 0.8,
"t5_cpu_offload": true, "t5_cpu_offload": true,
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": false, "enable_cfg": false,
"cpu_offload": true, "cpu_offload": true,
"offload_granularity": "block", "offload_granularity": "block",
"offload_ratio": 0.8, //1 "offload_ratio": 0.8,
"t5_cpu_offload": true, "t5_cpu_offload": true,
"denoising_step_list": [1000, 750, 500, 250], "denoising_step_list": [
1000,
750,
500,
250
],
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
}, },
......
{ {
"infer_steps": 40, "infer_steps": 40,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": true, "cpu_offload": true,
"offload_granularity": "block", "offload_granularity": "block",
"offload_ratio": 0.8, //1 "offload_ratio": 0.8,
"t5_cpu_offload": true, "t5_cpu_offload": true,
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
......
{ {
"infer_steps": 4, "infer_steps": 4,
"target_video_length": 81, "target_video_length": 81,
"target_height": 480, // 720 "target_height": 480,
"target_width": 832, // 1280 "target_width": 832,
"self_attn_1_type": "sage_attn2", "self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2", "cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2", "cross_attn_2_type": "sage_attn2",
"seed": 42,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 5, "sample_shift": 5,
"enable_cfg": false, "enable_cfg": false,
"cpu_offload": true, "cpu_offload": true,
"offload_granularity": "block", "offload_granularity": "block",
"offload_ratio": 0.8, //1 "offload_ratio": 0.8,
"t5_cpu_offload": true, "t5_cpu_offload": true,
"denoising_step_list": [1000, 750, 500, 250], "denoising_step_list": [
1000,
750,
500,
250
],
"mm_config": { "mm_config": {
"mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F" "mm_type": "W-fp8-channel-sym-A-fp8-channel-sym-dynamic-Q8F"
}, },
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6, "sample_guide_scale": 6,
"sample_shift": 8, "sample_shift": 8,
"enable_cfg": true, "enable_cfg": true,
......
...@@ -6,15 +6,26 @@ ...@@ -6,15 +6,26 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 442,
"sample_guide_scale": 5, "sample_guide_scale": 5,
"sample_shift": 3, "sample_shift": 3,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": false, "cpu_offload": false,
"feature_caching": "Custom", "feature_caching": "Custom",
"coefficients": [ "coefficients": [
[8.10705460e03, 2.13393892e03, -3.72934672e02, 1.66203073e01, -4.17769401e-02], [
[-114.36346466, 65.26524496, -18.82220707, 4.91518089, -0.23412683] 8.10705460e03,
2.13393892e03,
-3.72934672e02,
1.66203073e01,
-4.17769401e-02
],
[
-114.36346466,
65.26524496,
-18.82220707,
4.91518089,
-0.23412683
]
], ],
"use_ret_steps": false, "use_ret_steps": false,
"teacache_thresh": 0.26 "teacache_thresh": 0.26
......
...@@ -7,15 +7,26 @@ ...@@ -7,15 +7,26 @@
"self_attn_1_type": "flash_attn3", "self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3", "cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3", "cross_attn_2_type": "flash_attn3",
"seed": 42,
"sample_guide_scale": 6, "sample_guide_scale": 6,
"sample_shift": 8, "sample_shift": 8,
"enable_cfg": true, "enable_cfg": true,
"cpu_offload": false, "cpu_offload": false,
"feature_caching": "Custom", "feature_caching": "Custom",
"coefficients": [ "coefficients": [
[-3.03318725e05, 4.90537029e04, -2.65530556e03, 5.87365115e01, -3.15583525e-01], [
[-5784.54975374, 5449.50911966, -1811.16591783, 256.27178429, -13.02252404] -3.03318725e05,
4.90537029e04,
-2.65530556e03,
5.87365115e01,
-3.15583525e-01
],
[
-5784.54975374,
5449.50911966,
-1811.16591783,
256.27178429,
-13.02252404
]
], ],
"use_ret_steps": false, "use_ret_steps": false,
"teacache_thresh": 0.26 "teacache_thresh": 0.26
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment