Initial commit

e2778d0d · litzh · e2778d0d · e2778d0d · e2778d0d · e2778d0d
Commit e2778d0d authored Feb 05, 2026 by litzh
20 changed files
--- a/configs/platforms/mthreads_musa/qwen_image_i2i_2511.json
+++ b/configs/platforms/mthreads_musa/qwen_image_i2i_2511.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "torch_sdpa",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "modulate_type": "torch",
+    "rope_type": "torch_naive",
+    "layer_norm_type": "torch",
+    "rms_norm_type": "torch"
+}
--- a/configs/platforms/mthreads_musa/wan_t2v.json
+++ b/configs/platforms/mthreads_musa/wan_t2v.json
+{
+    "infer_steps": 50,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "torch_sdpa",
+    "cross_attn_1_type": "torch_sdpa",
+    "cross_attn_2_type": "torch_sdpa",
+    "sample_guide_scale": 6,
+    "sample_shift": 8,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "modulate_type": "torch",
+    "rope_type": "torch_naive",
+    "layer_norm_type": "torch",
+    "rms_norm_type": "torch"
+}
--- a/configs/platforms/nvidia/qwen_image_i2i_2511.json
+++ b/configs/platforms/nvidia/qwen_image_i2i_2511.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true
+}
--- a/configs/platforms/nvidia/wan_t2v.json
+++ b/configs/platforms/nvidia/wan_t2v.json
+{
+    "infer_steps": 50,
+    "target_video_length": 81,
+    "text_len": 512,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "flash_attn3",
+    "cross_attn_1_type": "flash_attn3",
+    "cross_attn_2_type": "flash_attn3",
+    "sample_guide_scale": 6,
+    "sample_shift": 8,
+    "enable_cfg": true,
+    "cpu_offload": false
+}
--- a/configs/quantization/gguf/wan_i2v_q4_k.json
+++ b/configs/quantization/gguf/wan_i2v_q4_k.json
+{
+    "infer_steps": 4,
+    "target_video_length": 81,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "sage_attn2",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "sample_guide_scale": 5,
+    "sample_shift": 5,
+    "enable_cfg": true,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "dit_quantized": true,
+    "dit_quant_scheme": "gguf-Q4_K_S"
+}
--- a/configs/quantization/wan_i2v.json
+++ b/configs/quantization/wan_i2v.json
+{
+    "infer_steps": 40,
+    "target_video_length": 81,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "sage_attn2",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "sample_guide_scale": 5,
+    "sample_shift": 5,
+    "enable_cfg": true,
+    "cpu_offload": false,
+    "dit_quantized_ckpt": "/path/to/int8/model",
+    "dit_quantized": true,
+    "dit_quant_scheme": "int8-vllm"
+}
--- a/configs/quantization/wan_i2v_q8f.json
+++ b/configs/quantization/wan_i2v_q8f.json
+{
+    "infer_steps": 4,
+    "target_video_length": 81,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "sage_attn2",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "sample_guide_scale": 5,
+    "sample_shift": 5,
+    "enable_cfg": false,
+    "cpu_offload": false,
+    "dit_quantized": true,
+    "dit_quant_scheme": "int8-q8f",
+    "t5_quantized": true,
+    "t5_quant_scheme": "int8-q8f",
+    "clip_quantized": true,
+    "clip_quant_scheme": "int8-q8f"
+}
--- a/configs/quantization/wan_i2v_torchao.json
+++ b/configs/quantization/wan_i2v_torchao.json
+{
+    "infer_steps": 4,
+    "target_video_length": 81,
+    "target_height": 480,
+    "target_width": 832,
+    "self_attn_1_type": "sage_attn2",
+    "cross_attn_1_type": "sage_attn2",
+    "cross_attn_2_type": "sage_attn2",
+    "sample_guide_scale": 5,
+    "sample_shift": 5,
+    "enable_cfg": false,
+    "cpu_offload": false,
+    "dit_quantized": true,
+    "dit_quant_scheme": "int8-torchao",
+    "t5_quantized": true,
+    "t5_quant_scheme": "int8-torchao",
+    "clip_quantized": true,
+    "clip_quant_scheme": "int8-torchao"
+}
--- a/configs/qwen_image/5090/qwen_image_i2i_preview_2511_bf16.json
+++ b/configs/qwen_image/5090/qwen_image_i2i_preview_2511_bf16.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "sage_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "cpu_offload": true,
+    "offload_granularity": "block"
+}
--- a/configs/qwen_image/5090/qwen_image_i2i_preview_2511_fp8.json
+++ b/configs/qwen_image/5090/qwen_image_i2i_preview_2511_fp8.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "sage_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "cpu_offload": true,
+    "offload_granularity": "model",
+    "dit_quantized": true,
+    "dit_quant_scheme": "fp8-sgl",
+    "dit_quantized_ckpt": "/path/to/qwen_fp8.safetensors"
+}
--- a/configs/qwen_image/qwen_image_i2i_2511.json
+++ b/configs/qwen_image/qwen_image_i2i_2511.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true
+}
--- a/configs/qwen_image/qwen_image_i2i_2511_dist.json
+++ b/configs/qwen_image/qwen_image_i2i_2511_dist.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "parallel": {
+        "seq_p_size": 4,
+        "seq_p_attn_type": "ulysses",
+        "cfg_p_size": 2
+    }
+}
--- a/configs/qwen_image/qwen_image_i2i_2511_distill.json
+++ b/configs/qwen_image/qwen_image_i2i_2511_distill.json
+{
+    "infer_steps": 8,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 4.0,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "lora_configs": [
+        {
+          "path": "lightx2v/Qwen-Image-Edit-2511-Lightning/Qwen-Image-Edit-2511-Lightning-8steps-V1.0-fp32.safetensors",
+          "strength": 1.0
+        }
+      ]
+}
--- a/configs/qwen_image/qwen_image_i2i_2511_distill_fp8.json
+++ b/configs/qwen_image/qwen_image_i2i_2511_distill_fp8.json
+{
+    "infer_steps": 8,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 4.0,
+    "dit_quantized": true,
+    "dit_quantized_ckpt": "lightx2v/Qwen-Image-Edit-2511-Lightning/qwen_image_edit_2511_fp8_e4m3fn_scaled_lightning_8steps_v1.0.safetensors",
+    "dit_quant_scheme": "fp8-sgl"
+}
--- a/configs/qwen_image/qwen_image_i2i_2511_kernel.json
+++ b/configs/qwen_image/qwen_image_i2i_2511_kernel.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "vae_scale_factor": 8,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "text_encoder_type": "lightllm_kernel",
+    "lightllm_config": {
+        "use_flash_attention_kernel": true,
+        "use_rmsnorm_kernel": true
+    }
+}
--- a/configs/qwen_image/qwen_image_i2i_2511_service.json
+++ b/configs/qwen_image/qwen_image_i2i_2511_service.json
+{
+    "infer_steps": 40,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the key features of the input image (color, shape, size, texture, objects, background), then explain how the user's text instruction should alter or modify the image. Generate a new image that meets the user's requirements while maintaining consistency with the original input where appropriate.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 64,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "vae_scale_factor": 8,
+    "CONDITION_IMAGE_SIZE": 147456,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "text_encoder_type": "lightllm_service",
+    "lightllm_config": {
+        "service_url": "http://localhost:8010",
+        "service_timeout": 30,
+        "service_retry": 3,
+        "use_shm": true
+    }
+}
--- a/configs/qwen_image/qwen_image_i2i_layered.json
+++ b/configs/qwen_image/qwen_image_i2i_layered.json
+{
+    "infer_steps": 50,
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "resize_mode": "adaptive",
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0,
+    "USE_IMAGE_ID_IN_PROMPT": true,
+    "layered": true,
+    "layers": 4,
+    "resolution": 640,
+    "use_en_prompt": true,
+    "use_additional_t_cond": true,
+    "use_layer3d_rope": true,
+    "rope_type": "torch"
+}
--- a/configs/qwen_image/qwen_image_t2i_2512.json
+++ b/configs/qwen_image/qwen_image_t2i_2512.json
+{
+    "infer_steps": 50,
+    "aspect_ratio": "16:9",
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn3",
+    "enable_cfg": true,
+    "sample_guide_scale": 4.0
+}
--- a/configs/qwen_image/qwen_image_t2i_2512_distill.json
+++ b/configs/qwen_image/qwen_image_t2i_2512_distill.json
+{
+    "infer_steps": 8,
+    "aspect_ratio": "16:9",
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 4.0,
+    "lora_configs": [
+        {
+          "path": "lightx2v/Qwen-Image-2512-Lightning/Qwen-Image-2512-Lightning-8steps-V1.0-fp32.safetensors",
+          "strength": 1.0
+        }
+      ]
+}
--- a/configs/qwen_image/qwen_image_t2i_2512_distill_fp8.json
+++ b/configs/qwen_image/qwen_image_t2i_2512_distill_fp8.json
+{
+    "infer_steps": 8,
+    "aspect_ratio": "16:9",
+    "prompt_template_encode": "<|im_start|>system\nDescribe the image by detailing the color, shape, size, texture, quantity, text, spatial relationships of the objects and background:<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n",
+    "prompt_template_encode_start_idx": 34,
+    "attn_type": "flash_attn3",
+    "enable_cfg": false,
+    "sample_guide_scale": 4.0,
+    "dit_quantized": true,
+    "dit_quantized_ckpt": "lightx2v/Qwen-Image-2512-Lightning/qwen_image_2512_fp8_e4m3fn_scaled_8steps_v1.0.safetensors",
+    "dit_quant_scheme": "fp8-sgl"
+}