Commit a1ebc651 authored by xuwx1's avatar xuwx1
Browse files

updata lightx2v

parent 5a4db490
Pipeline #3149 canceled with stages
{
"batchsize": 1,
"num_channels_latents": 16,
"vae_scale_factor": 8,
"infer_steps": 50,
"guidance_embeds": false,
"num_images_per_prompt": 1,
"vae_latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"vae_latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"vae_z_dim": 16,
"feature_caching": "NoCaching",
"transformer_in_channels": 64,
"prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
"prompt_template_encode_start_idx": 64,
"_auto_resize": true,
"num_layers": 60,
"attention_out_dim": 3072,
"attention_dim_head": 128,
"axes_dims_rope": [
16,
56,
56
],
"_comment_attn": "in [torch_sdpa, flash_attn3, sage_attn2]",
"attn_type": "flash_attn3",
"do_true_cfg": true,
"true_cfg_scale": 4.0,
"CONDITION_IMAGE_SIZE": 1048576,
"USE_IMAGE_ID_IN_PROMPT": false
}
{
"batchsize": 1,
"num_channels_latents": 16,
"vae_scale_factor": 8,
"infer_steps": 40,
"guidance_embeds": false,
"num_images_per_prompt": 1,
"vae_latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"vae_latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"vae_z_dim": 16,
"feature_caching": "NoCaching",
"transformer_in_channels": 64,
"prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
"prompt_template_encode_start_idx": 64,
"_auto_resize": true,
"num_layers": 60,
"attention_out_dim": 3072,
"attention_dim_head": 128,
"axes_dims_rope": [
16,
56,
56
],
"_comment_attn": "in [torch_sdpa, flash_attn3, sage_attn2]",
"attn_type": "flash_attn3",
"do_true_cfg": true,
"true_cfg_scale": 4.0,
"CONDITION_IMAGE_SIZE": 147456,
"USE_IMAGE_ID_IN_PROMPT": true
}
{
"batchsize": 1,
"num_channels_latents": 16,
"vae_scale_factor": 8,
"infer_steps": 40,
"guidance_embeds": false,
"num_images_per_prompt": 1,
"vae_latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"vae_latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"vae_z_dim": 16,
"feature_caching": "NoCaching",
"transformer_in_channels": 64,
"prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
"prompt_template_encode_start_idx": 64,
"_auto_resize": true,
"num_layers": 60,
"attention_out_dim": 3072,
"attention_dim_head": 128,
"axes_dims_rope": [
16,
56,
56
],
"_comment_attn": "in [torch_sdpa, flash_attn3, sage_attn2]",
"attn_type": "flash_attn3",
"do_true_cfg": true,
"true_cfg_scale": 4.0,
"CONDITION_IMAGE_SIZE": 147456,
"USE_IMAGE_ID_IN_PROMPT": true,
"dit_quantized": true,
"dit_quantized_ckpt": "/path/to/qwen_2509_fp8.safetensors",
"dit_quant_scheme": "fp8-sgl"
}
{
"batchsize": 1,
"num_channels_latents": 16,
"vae_scale_factor": 8,
"infer_steps": 8,
"guidance_embeds": false,
"num_images_per_prompt": 1,
"vae_latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"vae_latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"vae_z_dim": 16,
"feature_caching": "NoCaching",
"transformer_in_channels": 64,
"prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
"prompt_template_encode_start_idx": 64,
"_auto_resize": true,
"num_layers": 60,
"attention_out_dim": 3072,
"attention_dim_head": 128,
"axes_dims_rope": [
16,
56,
56
],
"_comment_attn": "in [torch_sdpa, flash_attn3, sage_attn2]",
"attn_type": "flash_attn3",
"do_true_cfg": true,
"true_cfg_scale": 4.0,
"CONDITION_IMAGE_SIZE": 1048576,
"USE_IMAGE_ID_IN_PROMPT": false,
"lora_configs": [
{
"path": "/path/to/Qwen-Image-Edit-Lightning-4steps-V1.0.safetensors",
"strength": 1.0
}
]
}
{
"batchsize": 1,
"_comment": "格式: '宽高比': [width, height]",
"aspect_ratios": {
"1:1": [
1328,
1328
],
"16:9": [
1664,
928
],
"9:16": [
928,
1664
],
"4:3": [
1472,
1140
],
"3:4": [
142,
184
]
},
"aspect_ratio": "16:9",
"num_channels_latents": 16,
"vae_scale_factor": 8,
"infer_steps": 50,
"guidance_embeds": false,
"num_images_per_prompt": 1,
"vae_latents_mean": [
-0.7571,
-0.7089,
-0.9113,
0.1075,
-0.1745,
0.9653,
-0.1517,
1.5508,
0.4134,
-0.0715,
0.5517,
-0.3632,
-0.1922,
-0.9497,
0.2503,
-0.2921
],
"vae_latents_std": [
2.8184,
1.4541,
2.3275,
2.6558,
1.2196,
1.7708,
2.6052,
2.0743,
3.2687,
2.1526,
2.8652,
1.5579,
1.6382,
1.1253,
2.8251,
1.916
],
"vae_z_dim": 16,
"feature_caching": "NoCaching",
"prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
"prompt_template_encode_start_idx": 34,
"_auto_resize": false,
"num_layers": 60,
"attention_out_dim": 3072,
"attention_dim_head": 128,
"axes_dims_rope": [
16,
56,
56
],
"_comment_attn": "in [torch_sdpa, flash_attn3, sage_attn2]",
"attn_type": "flash_attn3",
"do_true_cfg": true,
"true_cfg_scale": 4.0
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn3",
"cross_attn_1_type": "sage_attn3",
"cross_attn_2_type": "sage_attn3",
"sample_guide_scale": 1,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 1,
"t5_cpu_offload": true,
"clip_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false,
"vae_cpu_offload": false
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn3",
"cross_attn_1_type": "sage_attn3",
"cross_attn_2_type": "sage_attn3",
"sample_guide_scale": 1,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 1,
"t5_cpu_offload": false,
"clip_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false,
"vae_cpu_offload": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-q8f",
"t5_quantized": true,
"t5_quant_scheme": "int8-q8f"
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn3",
"cross_attn_1_type": "sage_attn3",
"cross_attn_2_type": "sage_attn3",
"sample_guide_scale": 1,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 1,
"t5_cpu_offload": false,
"clip_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false,
"vae_cpu_offload": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-q8f",
"t5_quantized": true,
"t5_quant_scheme": "int8-q8f",
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses-4090"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"use_31_block": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-vllm",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-vllm",
"t5_quantized": true,
"t5_quant_scheme": "int8-vllm"
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"use_31_block": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-vllm",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-vllm",
"t5_quantized": true,
"t5_quant_scheme": "int8-vllm",
"parallel": {
"seq_p_size": 2,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"use_31_block": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-vllm",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-vllm",
"t5_quantized": true,
"t5_quant_scheme": "int8-vllm",
"parallel": {
"seq_p_size": 4,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "flash_attn3",
"cross_attn_1_type": "flash_attn3",
"cross_attn_2_type": "flash_attn3",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"cpu_offload": false,
"use_31_block": false,
"dit_quantized": true,
"dit_quant_scheme": "int8-vllm",
"adapter_quantized": true,
"adapter_quant_scheme": "int8-vllm",
"t5_quantized": true,
"t5_quant_scheme": "int8-vllm",
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": true,
"offload_granularity": "block",
"offload_ratio": 0.8,
"t5_cpu_offload": false,
"clip_cpu_offload": false,
"vae_cpu_offload": false,
"audio_encoder_cpu_offload": false,
"audio_adapter_cpu_offload": false
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"t5_quantized": true,
"t5_quant_scheme": "fp8-q8f",
"dit_quantized": true,
"dit_quant_scheme": "fp8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "fp8",
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 2,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"t5_quantized": true,
"t5_quant_scheme": "fp8-q8f",
"dit_quantized": true,
"dit_quant_scheme": "fp8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "fp8",
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 2,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 4,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"t5_quantized": true,
"t5_quant_scheme": "fp8-q8f",
"dit_quantized": true,
"dit_quant_scheme": "fp8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "fp8",
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 4,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses"
}
}
{
"infer_steps": 4,
"target_fps": 16,
"video_duration": 360,
"audio_sr": 16000,
"target_video_length": 81,
"resize_mode": "adaptive",
"self_attn_1_type": "sage_attn2",
"cross_attn_1_type": "sage_attn2",
"cross_attn_2_type": "sage_attn2",
"sample_guide_scale": 1.0,
"sample_shift": 5,
"enable_cfg": false,
"use_31_block": false,
"t5_quantized": true,
"t5_quant_scheme": "fp8-q8f",
"dit_quantized": true,
"dit_quant_scheme": "fp8-q8f",
"adapter_quantized": true,
"adapter_quant_scheme": "fp8",
"cpu_offload": false,
"t5_cpu_offload": true,
"clip_cpu_offload": true,
"vae_cpu_offload": true,
"audio_encoder_cpu_offload": true,
"audio_adapter_cpu_offload": true,
"parallel": {
"seq_p_size": 8,
"seq_p_attn_type": "ulysses"
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment