chore: add qwen-image tests for v1, clean the requirements (#712)

* add the v1 tests * formalize the requirements * update * update * well format the dependency * update docs * update * update * update the dependency * fix the version in ci * well organize the dependency * update the pr list * update tests * add tests for qwen-image * add qwen-image lightning * add tests * update fp4 lpips * add tests for qwen-image-edit * update * updat * add 8 steps lora back * update nvfp4 results * add qwen-image-edit-lightning * style: make linter happy * do not use batch when batchsize is 1 * update * finished all the tests * add cn test * style: make linter happy * update * add qwen-image controlnet * update * add fp4 metrics * fix the tests

chore: add qwen-image tests for v1, clean the requirements (#712)
* add the v1 tests * formalize the requirements * update * update * well format the dependency * update docs * update * update * update the dependency * fix the version in ci * well organize the dependency * update the pr list * update tests * add tests for qwen-image * add qwen-image lightning * add tests * update fp4 lpips * add tests for qwen-image-edit * update * updat * add 8 steps lora back * update nvfp4 results * add qwen-image-edit-lightning * style: make linter happy * do not use batch when batchsize is 1 * update * finished all the tests * add cn test * style: make linter happy * update * add qwen-image controlnet * update * add fp4 metrics * fix the tests
de6a75b6 · Muyang Li · GitHub · b0484ae0 · de6a75b6 · de6a75b6
Unverified Commit de6a75b6 authored Sep 22, 2025 by Muyang Li Committed by GitHub Sep 22, 2025
7 changed files
--- a/tests/v1/qwenimage/test_qwenimage.py
+++ b/tests/v1/qwenimage/test_qwenimage.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import QwenImagePipeline
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+@pytest.mark.parametrize(
+    "rank,expected_lpips",
+    [
+        (32, {"int4-bf16": 0.24, "fp4-bf16": 0.24}),
+        (128, {"int4-bf16": 0.31, "fp4-bf16": 0.18}),
+    ],
+)
+def test_qwenimage(rank: int, expected_lpips: dict[str, float]):
+    model_name = "qwen-image"
+    batch_size = 1
+    width = 1664
+    height = 928
+    num_inference_steps = 20
+    true_cfg_scale = 4.0
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"w{width}h{height}t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image"
+    dataset = [
+        {
+            "prompt": """Bookstore window display. A sign displays “New Arrivals This Week”. Below, a shelf tag with the text “Best-Selling Novels Here”. To the side, a colorful poster advertises “Author Meet And Greet on Saturday” with a central portrait of the author. There are four books on the bookshelf, namely “The light between worlds” “When stars are scattered” “The slient patient” “The night circus” Ultra HD, 4K, cinematic composition.""",
+            "negative_prompt": " ",
+            "filename": "bookstore",
+        },
+        {
+            "prompt": "一副典雅庄重的对联悬挂于厅堂之中，房间是个安静古典的中式布置，桌子上放着一些青花瓷，对联上左书“义本生知人机同道善思新”，右书“通云赋智乾坤启数高志远”， 横批“智启通义”，字体飘逸，中间挂在一着一副中国风的画作，内容是岳阳楼。超清，4K，电影级构图",
+            "negative_prompt": " ",
+            "filename": "chinese_room",
+        },
+        {
+            "prompt": '一张企业级高质量PPT页面图像，整体采用科技感十足的星空蓝为主色调，背景融合流动的发光科技线条与微光粒子特效，营造出专业、现代且富有信任感的品牌氛围；页面顶部左侧清晰展示橘红色Alibaba标志，色彩鲜明、辨识度高。主标题位于画面中央偏上位置，使用大号加粗白色或浅蓝色字体写着“通义千问视觉基础模型”，字体现代简洁，突出技术感；主标题下方紧接一行楷体中文文字：“原生中文·复杂场景·自动布局”，字体柔和优雅，形成科技与人文的融合。下方居中排布展示了四张与图片，分别是：一幅写实与水墨风格结合的梅花特写，枝干苍劲、花瓣清雅，背景融入淡墨晕染与飘雪效果，体现坚韧不拔的精神气质；上方写着黑色的楷体"梅傲"。一株生长于山涧石缝中的兰花，叶片修长、花朵素净，搭配晨雾缭绕的自然环境，展现清逸脱俗的文人风骨；上方写着黑色的楷体"兰幽"。一组迎风而立的翠竹，竹叶随风摇曳，光影交错，背景为青灰色山岩与流水，呈现刚柔并济、虚怀若谷的文化意象；上方写着黑色的楷体"竹清"。一片盛开于秋日庭院的菊花丛，花色丰富、层次分明，配以落叶与古亭剪影，传递恬然自适的生活哲学；上方写着黑色的楷体"菊淡"。所有图片采用统一尺寸与边框样式，呈横向排列。页面底部中央用楷体小字写明“2025年8月，敬请期待”，排版工整、结构清晰，整体风格统一且细节丰富，极具视觉冲击力与品牌调性。',
+            "negative_prompt": " ",
+            "filename": "ppt",
+        },
+    ]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImagePipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={
+                "width": width,
+                "height": height,
+                "num_inference_steps": num_inference_steps,
+                "true_cfg_scale": true_cfg_scale,
+            },
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(
+        f"nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image.safetensors",
+        torch_dtype=torch_dtype,
+    )
+
+    pipe = QwenImagePipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "true_cfg_scale": true_cfg_scale,
+        },
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_controlnet.py
+++ b/tests/v1/qwenimage/test_qwenimage_controlnet.py
+import gc
+import os
+from pathlib import Path
+
+import diffusers
+import packaging.version
+import pytest
+import torch
+from diffusers.utils import load_image
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+try:
+    from diffusers import QwenImageControlNetModel, QwenImageControlNetPipeline
+except ImportError:
+    QwenImageControlNetModel = None
+    QwenImageControlNetPipeline = None
+
+# Skip the test if diffusers<0.36
+pytestmark = pytest.mark.skipif(
+    packaging.version.parse(diffusers.__version__) <= packaging.version.parse("0.35.1"),
+    reason="QwenImageControlNetPipeline requires diffusers>=0.36",
+)
+
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+
+    def __init__(self, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = "qwen-image-controlnet-union"
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.13, "fp4-bf16": 0.11},
+            ),
+            id="qwen-image-controlnet-union-r32",
+        ),
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-controlnet-union-r128",
+        ),
+    ],
+)
+def test_qwenimage_controlnet(case: Case):
+    batch_size = 1
+    true_cfg_scale = 4.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+    forward_kwargs = {
+        "num_inference_steps": num_inference_steps,
+        "true_cfg_scale": true_cfg_scale,
+        "controlnet_conditioning_scale": 1.0,
+    }
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image"
+
+    dataset = [
+        {
+            "prompt": "Aesthetics art, traditional asian pagoda, elaborate golden accents, sky blue and white color palette, swirling cloud pattern, digital illustration, east asian architecture, ornamental rooftop, intricate detailing on building, cultural representation.",
+            "negative_prompt": " ",
+            "filename": "canny",
+            "control_image": load_image(
+                "https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/canny.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "A swanky, minimalist living room with a huge floor-to-ceiling window letting in loads of natural light. A beige couch with white cushions sits on a wooden floor, with a matching coffee table in front. The walls are a soft, warm beige, decorated with two framed botanical prints. A potted plant chills in the corner near the window. Sunlight pours through the leaves outside, casting cool shadows on the floor.",
+            "negative_prompt": " ",
+            "filename": "depth",
+            "control_image": load_image(
+                "https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/depth.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall.",
+            "negative_prompt": " ",
+            "filename": "pose",
+            "control_image": load_image(
+                "https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/pose.png"
+            ).convert("RGB"),
+        },
+    ]
+    for item in dataset:
+        item["width"] = item["control_image"].size[0]
+        item["height"] = item["control_image"].size[1]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        controlnet = QwenImageControlNetModel.from_pretrained(
+            "InstantX/Qwen-Image-ControlNet-Union", torch_dtype=torch_dtype
+        )
+        pipe = QwenImageControlNetPipeline.from_pretrained(repo_id, controlnet=controlnet, torch_dtype=torch_dtype)
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset, batch_size=1, pipeline=pipe, save_dir=save_dir_16bit, forward_kwargs=forward_kwargs
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = f"nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image.safetensors"
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+    controlnet = QwenImageControlNetModel.from_pretrained(
+        "InstantX/Qwen-Image-ControlNet-Union", torch_dtype=torch_dtype
+    )
+    pipe = QwenImageControlNetPipeline.from_pretrained(
+        repo_id, transformer=transformer, controlnet=controlnet, torch_dtype=torch_dtype
+    )
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset, batch_size=batch_size, pipeline=pipe, save_dir=save_dir_nunchaku, forward_kwargs=forward_kwargs
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_edit.py
+++ b/tests/v1/qwenimage/test_qwenimage_edit.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import QwenImageEditPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+
+    def __init__(self, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = "qwen-image-edit"
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-r32",
+        ),
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-r128",
+        ),
+    ],
+)
+def test_qwenimage_edit(case: Case):
+    batch_size = 1
+    true_cfg_scale = 4.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image-Edit"
+    dataset = [
+        {
+            "prompt": "change the text to read '双截棍 Qwen Image Edit is here'",
+            "negative_prompt": " ",
+            "filename": "neon_sign",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/neon_sign.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "Remove all UI text elements from the image. Keep the feeling that the characters and scene are in water. Also, remove the green UI elements at the bottom.",
+            "negative_prompt": " ",
+            "filename": "comfy_poster",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/comfy_poster.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImageEditPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = f"nunchaku-tech/nunchaku-qwen-image-edit/svdq-{get_precision()}_r{rank}-qwen-image-edit.safetensors"
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+
+    pipe = QwenImageEditPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_edit_lightning.py
+++ b/tests/v1/qwenimage/test_qwenimage_edit_lightning.py
+import gc
+import math
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+model_paths = {
+    "qwen-image-edit-lightningv1.0-4steps": "nunchaku-tech/nunchaku-qwen-image-edit/svdq-{precision}_r{rank}-qwen-image-edit-lightningv1.0-4steps.safetensors",
+    "qwen-image-edit-lightningv1.0-8steps": "nunchaku-tech/nunchaku-qwen-image-edit/svdq-{precision}_r{rank}-qwen-image-edit-lightningv1.0-8steps.safetensors",
+}
+lora_paths = {
+    "qwen-image-edit-lightningv1.0-4steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Edit-Lightning-4steps-V1.0-bf16.safetensors",
+    ),
+    "qwen-image-edit-lightningv1.0-8steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Edit-Lightning-8steps-V1.0-bf16.safetensors",
+    ),
+}
+
+
+class Case:
+
+    def __init__(self, model_name: str, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = model_name
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-lightningv1.0-4steps",
+                num_inference_steps=4,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-lightningv1.0-4steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-lightningv1.0-4steps",
+                num_inference_steps=4,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-lightningv1.0-4steps-r128",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-lightningv1.0-8steps",
+                num_inference_steps=8,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-lightningv1.0-8steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-lightningv1.0-8steps",
+                num_inference_steps=8,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-lightningv1.0-8steps-r128",
+        ),
+    ],
+)
+def test_qwenimage_edit_lightning(case: Case):
+    batch_size = 1
+    true_cfg_scale = 1.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image-Edit"
+
+    # From https://github.com/ModelTC/Qwen-Image-Lightning/blob/342260e8f5468d2f24d084ce04f55e101007118b/generate_with_diffusers.py#L82C9-L97C10
+    scheduler_config = {
+        "base_image_seq_len": 256,
+        "base_shift": math.log(3),  # We use shift=3 in distillation
+        "invert_sigmas": False,
+        "max_image_seq_len": 8192,
+        "max_shift": math.log(3),  # We use shift=3 in distillation
+        "num_train_timesteps": 1000,
+        "shift": 1.0,
+        "shift_terminal": None,  # set shift_terminal to None
+        "stochastic_sampling": False,
+        "time_shift_type": "exponential",
+        "use_beta_sigmas": False,
+        "use_dynamic_shifting": True,
+        "use_exponential_sigmas": False,
+        "use_karras_sigmas": False,
+    }
+    scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+
+    dataset = [
+        {
+            "prompt": "change the text to read '双截棍 Qwen Image Edit is here'",
+            "filename": "neon_sign",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/neon_sign.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "Remove all UI text elements from the image. Keep the feeling that the characters and scene are in water. Also, remove the green UI elements at the bottom.",
+            "filename": "comfy_poster",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/comfy_poster.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImageEditPipeline.from_pretrained(repo_id, scheduler=scheduler, torch_dtype=torch_dtype)
+        pipe.load_lora_weights(lora_paths[model_name][0], weight_name=lora_paths[model_name][1])
+        pipe.fuse_lora()
+        pipe.unload_lora_weights()
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = model_paths[model_name].format(precision=precision, rank=rank)
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+
+    pipe = QwenImageEditPipeline.from_pretrained(
+        repo_id, transformer=transformer, scheduler=scheduler, torch_dtype=torch_dtype
+    )
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_lightning.py
+++ b/tests/v1/qwenimage/test_qwenimage_lightning.py
+import gc
+import math
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FlowMatchEulerDiscreteScheduler, QwenImagePipeline
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+model_paths = {
+    "qwen-image-lightningv1.0-4steps": "nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image-lightningv1.0-{num_inference_steps}steps.safetensors",
+    "qwen-image-lightningv1.1-8steps": "nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image-lightningv1.1-{num_inference_steps}steps.safetensors",
+}
+lora_paths = {
+    "qwen-image-lightningv1.0-4steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Lightning-4steps-V1.0-bf16.safetensors",
+    ),
+    "qwen-image-lightningv1.1-8steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Lightning-8steps-V1.1-bf16.safetensors",
+    ),
+}
+
+
+class Case:
+
+    def __init__(self, model_name: str, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = model_name
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                model_name="qwen-image-lightningv1.0-4steps",
+                num_inference_steps=4,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.35, "fp4-bf16": 0.33},
+            ),
+            id="qwen-image-lightningv1.0-4steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-lightningv1.0-4steps",
+                num_inference_steps=4,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.32, "fp4-bf16": 0.32},
+            ),
+            id="qwen-image-lightningv1.0-4steps-r128",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-lightningv1.1-8steps",
+                num_inference_steps=8,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.33, "fp4-bf16": 0.34},
+            ),
+            id="qwen-image-lightningv1.1-8steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-lightningv1.1-8steps",
+                num_inference_steps=8,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.31, "fp4-bf16": 0.32},
+            ),
+            id="qwen-image-lightningv1.1-8steps-r128",
+        ),
+    ],
+)
+def test_qwenimage_lightning(case: Case):
+    batch_size = 1
+    width = 1024
+    height = 1024
+    true_cfg_scale = 1.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"w{width}h{height}t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image"
+    dataset = [
+        {
+            "prompt": """Bookstore window display. A sign displays “New Arrivals This Week”. Below, a shelf tag with the text “Best-Selling Novels Here”. To the side, a colorful poster advertises “Author Meet And Greet on Saturday” with a central portrait of the author. There are four books on the bookshelf, namely “The light between worlds” “When stars are scattered” “The slient patient” “The night circus” Ultra HD, 4K, cinematic composition.""",
+            "filename": "bookstore",
+        },
+        {
+            "prompt": "一副典雅庄重的对联悬挂于厅堂之中，房间是个安静古典的中式布置，桌子上放着一些青花瓷，对联上左书“义本生知人机同道善思新”，右书“通云赋智乾坤启数高志远”， 横批“智启通义”，字体飘逸，中间挂在一着一副中国风的画作，内容是岳阳楼。超清，4K，电影级构图",
+            "filename": "chinese_room",
+        },
+        {
+            "prompt": '一张企业级高质量PPT页面图像，整体采用科技感十足的星空蓝为主色调，背景融合流动的发光科技线条与微光粒子特效，营造出专业、现代且富有信任感的品牌氛围；页面顶部左侧清晰展示橘红色Alibaba标志，色彩鲜明、辨识度高。主标题位于画面中央偏上位置，使用大号加粗白色或浅蓝色字体写着“通义千问视觉基础模型”，字体现代简洁，突出技术感；主标题下方紧接一行楷体中文文字：“原生中文·复杂场景·自动布局”，字体柔和优雅，形成科技与人文的融合。下方居中排布展示了四张与图片，分别是：一幅写实与水墨风格结合的梅花特写，枝干苍劲、花瓣清雅，背景融入淡墨晕染与飘雪效果，体现坚韧不拔的精神气质；上方写着黑色的楷体"梅傲"。一株生长于山涧石缝中的兰花，叶片修长、花朵素净，搭配晨雾缭绕的自然环境，展现清逸脱俗的文人风骨；上方写着黑色的楷体"兰幽"。一组迎风而立的翠竹，竹叶随风摇曳，光影交错，背景为青灰色山岩与流水，呈现刚柔并济、虚怀若谷的文化意象；上方写着黑色的楷体"竹清"。一片盛开于秋日庭院的菊花丛，花色丰富、层次分明，配以落叶与古亭剪影，传递恬然自适的生活哲学；上方写着黑色的楷体"菊淡"。所有图片采用统一尺寸与边框样式，呈横向排列。页面底部中央用楷体小字写明“2025年8月，敬请期待”，排版工整、结构清晰，整体风格统一且细节丰富，极具视觉冲击力与品牌调性。',
+            "filename": "ppt",
+        },
+    ]
+
+    scheduler_config = {
+        "base_image_seq_len": 256,
+        "base_shift": math.log(3),  # We use shift=3 in distillation
+        "invert_sigmas": False,
+        "max_image_seq_len": 8192,
+        "max_shift": math.log(3),  # We use shift=3 in distillation
+        "num_train_timesteps": 1000,
+        "shift": 1.0,
+        "shift_terminal": None,  # set shift_terminal to None
+        "stochastic_sampling": False,
+        "time_shift_type": "exponential",
+        "use_beta_sigmas": False,
+        "use_dynamic_shifting": True,
+        "use_exponential_sigmas": False,
+        "use_karras_sigmas": False,
+    }
+    scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImagePipeline.from_pretrained(repo_id, scheduler=scheduler, torch_dtype=torch_dtype)
+        pipe.load_lora_weights(lora_paths[model_name][0], weight_name=lora_paths[model_name][1])
+        pipe.fuse_lora()
+        pipe.unload_lora_weights()
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={
+                "width": width,
+                "height": height,
+                "num_inference_steps": num_inference_steps,
+                "true_cfg_scale": true_cfg_scale,
+            },
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = model_paths[model_name].format(precision=precision, rank=rank, num_inference_steps=num_inference_steps)
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+
+    pipe = QwenImagePipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "true_cfg_scale": true_cfg_scale,
+        },
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10
--- a/tests/v1/test_examples.py
+++ b/tests/v1/test_examples.py
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from nunchaku.utils import get_precision
+
+EXAMPLES_DIR = Path("./examples/v1")
+
+example_scripts = [str(f) for f in EXAMPLES_DIR.iterdir() if f.is_file() and f.suffix == ".py"]
+
+
+@pytest.mark.parametrize("script_path", example_scripts)
+def test_example_script_runs(script_path):
+    if "sdxl" in script_path and get_precision() == "fp4":
+        pytest.skip("Skip FP4 tests for SDXL!")
+    result = subprocess.run(["python", script_path], text=True)
+    print(f"Running {script_path} -> Return code: {result.returncode}")
+    assert result.returncode == 0, f"{script_path} failed with code {result.returncode}"
--- a/tests/v1/utils.py
+++ b/tests/v1/utils.py
+import gc
+import os
+from pathlib import Path
+
+import torch
+from diffusers import DiffusionPipeline
+from tqdm import trange
+
+from ..utils import hash_str_to_int
+
+
+def run_pipeline(
+    dataset: list[dict],
+    batch_size: int,
+    pipeline: DiffusionPipeline,
+    save_dir: os.PathLike[str],
+    forward_kwargs: dict = {},
+):
+    if isinstance(save_dir, str):
+        save_dir = Path(save_dir)
+    assert isinstance(save_dir, Path)
+    save_dir.mkdir(parents=True, exist_ok=True)
+
+    pipeline.set_progress_bar_config(desc="Sampling", leave=False, dynamic_ncols=True, position=1)
+    for batch_idx in trange(len(dataset) // batch_size, desc="Batch", position=0, leave=False):
+        start_idx = batch_idx * batch_size
+        end_idx = start_idx + batch_size
+        batch = dataset[start_idx:end_idx]
+
+        filenames = [_["filename"] for _ in batch]
+        generators = [torch.Generator().manual_seed(hash_str_to_int(filename)) for filename in filenames]
+        _forward_kwargs = {k: v for k, v in forward_kwargs.items()}
+        _forward_kwargs["generator"] = generators if batch_size > 1 else generators[0]
+        for k in batch[0].keys():
+            if k == "filename":
+                continue
+            _forward_kwargs[k] = [_[k] for _ in batch] if batch_size > 1 else batch[0][k]
+        images = pipeline(**_forward_kwargs).images
+        for i, image in enumerate(images):
+            filename = filenames[i]
+            image.save(os.path.join(save_dir, f"{filename}.png"))
+    gc.collect()
+    torch.cuda.empty_cache()