chore: add V1 flux tests (#742)

* make linter happy * add tests for qwen-image-edit-2509 * update * update * flux schnell test runnable * update the test score * make linter happy * add fp4 results * fix the test score * add tests for flux_dev * update the test score * add flux.1-krea * fix the krea tests * update * update * add kontext * update * fix kontext * update * add flux.1-depth * add flux-tools * finish flux tools * add more flux examples * update * update3 * update * update score * update * update

chore: add V1 flux tests (#742)
* make linter happy * add tests for qwen-image-edit-2509 * update * update * flux schnell test runnable * update the test score * make linter happy * add fp4 results * fix the test score * add tests for flux_dev * update the test score * add flux.1-krea * fix the krea tests * update * update * add kontext * update * fix kontext * update * add flux.1-depth * add flux-tools * finish flux tools * add more flux examples * update * update3 * update * update score * update * update
4519ad6c · Muyang Li · GitHub · 5b9af2f1 · 4519ad6c · 4519ad6c
Unverified Commit 4519ad6c authored Sep 30, 2025 by Muyang Li Committed by GitHub Sep 30, 2025
10 changed files
--- a/tests/v1/flux/test_flux1_canny_dev.py
+++ b/tests/v1/flux/test_flux1_canny_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-canny-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-Canny-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = (
+            f"nunchaku-tech/nunchaku-flux.1-canny-dev/svdq-{precision}_r{rank}-flux.1-canny-dev.safetensors"
+        )
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.pipeline_cls = FluxControlPipeline
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 30,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.13, "fp4-bf16": 0.1}), id="flux.1-canny-dev-r32")]
+)
+def test_flux_canny_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "the insanely extreme muscle car, Big foot wheels, dragster style, flames, 6 wheels ",
+            "filename": "1ce4f3b8627ab16e8f09e6e169d8744d32274880",
+            "control_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/1ce4f3b8627ab16e8f09e6e169d8744d32274880-canny.png"
+            ).convert("RGB"),
+        },
+        # {
+        #     "prompt": "sunlower, Folk Art ",
+        #     "filename": "8c2fef24a984d4c76bebcfa406b7240fd25d7c36",
+        #     "control_image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/8c2fef24a984d4c76bebcfa406b7240fd25d7c36-canny.png"
+        #     ).convert("RGB"),
+        # },
+        # {
+        #     "prompt": "modern realistic allium flowers, clean straight lines, black and white, a lot of white space to color, coloring book style ",
+        #     "filename": "94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0",
+        #     "control_image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0-canny.png"
+        #     ).convert("RGB"),
+        # },
+        {
+            "prompt": " Content Spirit Wraith Coin Medium engraved metallic coin Style symmetrical, detailed design Lighting Reflective natural light Colors purples and grays Composition the beast centered, surrounded by elemental symbols, stats, and abilities Create a Spirit Wraith Elemental Guardian Coin featuring a symmetrical, detailed design of the Spirit Wraith guardian at the center, signifying its affinity for the spirit element. The coin should have reflective natural light with mystical purples and ethereal grays. Encircle the guardian with elemental symbols, stats, and abilities relevant to its spiritbased prowess. ",
+            "filename": "d38575d92bfd143930c4e57daa69aad5a4be48a6",
+            "control_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/d38575d92bfd143930c4e57daa69aad5a4be48a6-canny.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = case.pipeline_cls.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(case.model_path, torch_dtype=torch_dtype)
+
+    pipe = case.pipeline_cls.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_depth_dev.py
+++ b/tests/v1/flux/test_flux1_depth_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxControlPipeline
+from diffusers.utils import load_image
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-depth-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-Depth-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = (
+            f"nunchaku-tech/nunchaku-flux.1-depth-dev/svdq-{precision}_r{rank}-flux.1-depth-dev.safetensors"
+        )
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.pipeline_cls = FluxControlPipeline
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 10,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.13, "fp4-bf16": 0.11}), id="flux.1-depth-dev-r32")]
+)
+def test_flux_depth_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "the insanely extreme muscle car, Big foot wheels, dragster style, flames, 6 wheels ",
+            "filename": "1ce4f3b8627ab16e8f09e6e169d8744d32274880",
+            "control_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/1ce4f3b8627ab16e8f09e6e169d8744d32274880-depth.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "sunlower, Folk Art ",
+            "filename": "8c2fef24a984d4c76bebcfa406b7240fd25d7c36",
+            "control_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/8c2fef24a984d4c76bebcfa406b7240fd25d7c36-depth.png"
+            ).convert("RGB"),
+        },
+        # {
+        #     "prompt": "modern realistic allium flowers, clean straight lines, black and white, a lot of white space to color, coloring book style ",
+        #     "filename": "94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0",
+        #     "control_image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0-depth.png"
+        #     ).convert("RGB"),
+        # },
+        # {
+        #     "prompt": " Content Spirit Wraith Coin Medium engraved metallic coin Style symmetrical, detailed design Lighting Reflective natural light Colors purples and grays Composition the beast centered, surrounded by elemental symbols, stats, and abilities Create a Spirit Wraith Elemental Guardian Coin featuring a symmetrical, detailed design of the Spirit Wraith guardian at the center, signifying its affinity for the spirit element. The coin should have reflective natural light with mystical purples and ethereal grays. Encircle the guardian with elemental symbols, stats, and abilities relevant to its spiritbased prowess. ",
+        #     "filename": "d38575d92bfd143930c4e57daa69aad5a4be48a6",
+        #     "control_image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/d38575d92bfd143930c4e57daa69aad5a4be48a6-depth.png"
+        #     ).convert("RGB"),
+        # },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = case.pipeline_cls.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(case.model_path, torch_dtype=torch_dtype)
+
+    pipe = case.pipeline_cls.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_dev.py
+++ b/tests/v1/flux/test_flux1_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxPipeline
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = f"nunchaku-tech/nunchaku-flux.1-dev/svdq-{precision}_r{rank}-flux.1-dev.safetensors"
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 3.5,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.17, "fp4-bf16": 0.19}), id="flux.1-dev-r32")]
+)
+def test_flux_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    rank = case.rank
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "Plain light background, man to the side, light, happy, eye contact, black man aged 25  50, stylish confident man, suit, great straight hair, ",
+            "filename": "man",
+        },
+        {
+            "prompt": "3d rendering of isometric cupcake logo, pastel colors, octane rendering, unreal egine ",
+            "filename": "cupcake_logo",
+        },
+        {
+            "prompt": "character design and sketch, evil, female, drow elf, sorcerer, sharp facial features, large iris, dark blue and indigo colors, long and ornate cape, rainbowcolored gems and jewelry, leather armor, jeweled dagger, dark purple long hair, gothic ",
+            "filename": "character_design",
+        },
+        # {
+        #     "prompt": "a hauntingly sparse drivein theater with a single red car and a single audio post. ",
+        #     "filename": "drivein_theater",
+        # },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = FluxPipeline.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(
+        f"nunchaku-tech/nunchaku-flux.1-dev/svdq-{precision}_r{rank}-flux.1-dev.safetensors",
+        torch_dtype=torch_dtype,
+    )
+
+    pipe = FluxPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_fill_dev.py
+++ b/tests/v1/flux/test_flux1_fill_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxFillPipeline
+from diffusers.utils import load_image
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-fill-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-Fill-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = f"nunchaku-tech/nunchaku-flux.1-fill-dev/svdq-{precision}_r{rank}-flux.1-fill-dev.safetensors"
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.pipeline_cls = FluxFillPipeline
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 30,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1}), id="flux.1-fill-dev-r32")]
+)
+def test_flux_fill_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "the insanely extreme muscle car, Big foot wheels, dragster style, flames, 6 wheels ",
+            "filename": "1ce4f3b8627ab16e8f09e6e169d8744d32274880",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/1ce4f3b8627ab16e8f09e6e169d8744d32274880-image.png"
+            ).convert("RGB"),
+            "mask_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/1ce4f3b8627ab16e8f09e6e169d8744d32274880-mask.png"
+            ).convert("RGB"),
+        },
+        # {
+        #     "prompt": "sunlower, Folk Art ",
+        #     "filename": "8c2fef24a984d4c76bebcfa406b7240fd25d7c36",
+        #     "image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/8c2fef24a984d4c76bebcfa406b7240fd25d7c36-image.png"
+        #     ).convert("RGB"),
+        #     "mask_image": load_image(
+        #         "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/8c2fef24a984d4c76bebcfa406b7240fd25d7c36-mask.png"
+        #     ).convert("RGB"),
+        # },
+        {
+            "prompt": "modern realistic allium flowers, clean straight lines, black and white, a lot of white space to color, coloring book style ",
+            "filename": "94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0-image.png"
+            ).convert("RGB"),
+            "mask_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/94f2b6fc3ab734ccdf6e57f72287f0a6df522dc0-mask.png"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": " Content Spirit Wraith Coin Medium engraved metallic coin Style symmetrical, detailed design Lighting Reflective natural light Colors purples and grays Composition the beast centered, surrounded by elemental symbols, stats, and abilities Create a Spirit Wraith Elemental Guardian Coin featuring a symmetrical, detailed design of the Spirit Wraith guardian at the center, signifying its affinity for the spirit element. The coin should have reflective natural light with mystical purples and ethereal grays. Encircle the guardian with elemental symbols, stats, and abilities relevant to its spiritbased prowess. ",
+            "filename": "d38575d92bfd143930c4e57daa69aad5a4be48a6",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/d38575d92bfd143930c4e57daa69aad5a4be48a6-image.png"
+            ).convert("RGB"),
+            "mask_image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/d38575d92bfd143930c4e57daa69aad5a4be48a6-mask.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = case.pipeline_cls.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(case.model_path, torch_dtype=torch_dtype)
+
+    pipe = case.pipeline_cls.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_kontext_dev.py
+++ b/tests/v1/flux/test_flux1_kontext_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxKontextPipeline
+from diffusers.utils import load_image
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-kontext-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-Kontext-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = (
+            f"nunchaku-tech/nunchaku-flux.1-kontext-dev/svdq-{precision}_r{rank}-flux.1-kontext-dev.safetensors"
+        )
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 2.5,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.17, "fp4-bf16": 0.13}), id="flux.1-kontext-dev-r32")]
+)
+def test_flux_kontext_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "make the cat floating in the air and holding a sign that reads 'this is fun' written with a blue crayon",
+            "filename": "cat_sitting.png",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/cat_sitting.jpg"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "turn the style of the photo to vintage comic book",
+            "filename": "pie",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/pie.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = FluxKontextPipeline.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(case.model_path, torch_dtype=torch_dtype)
+
+    pipe = FluxKontextPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_krea_dev.py
+++ b/tests/v1/flux/test_flux1_krea_dev.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxPipeline
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 20,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-krea-dev",
+        repo_id: str = "black-forest-labs/FLUX.1-Krea-dev",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = f"nunchaku-tech/nunchaku-flux.1-krea-dev/svdq-{precision}_r{rank}-flux.1-krea-dev.safetensors"
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+            "guidance_scale": 3.5,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.20, "fp4-bf16": 0.11}), id="flux.1-krea-dev-r32")]
+)
+def test_flux_krea_dev(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    repo_id = case.repo_id
+
+    dataset = [
+        # {
+        #     "prompt": "Plain light background, man to the side, light, happy, eye contact, black man aged 25  50, stylish confident man, suit, great straight hair, ",
+        #     "filename": "man",
+        # },
+        {
+            "prompt": "3d rendering of isometric cupcake logo, pastel colors, octane rendering, unreal egine ",
+            "filename": "cupcake_logo",
+        },
+        {
+            "prompt": "character design and sketch, evil, female, drow elf, sorcerer, sharp facial features, large iris, dark blue and indigo colors, long and ornate cape, rainbowcolored gems and jewelry, leather armor, jeweled dagger, dark purple long hair, gothic ",
+            "filename": "character_design",
+        },
+        {
+            "prompt": "a hauntingly sparse drivein theater with a single red car and a single audio post. ",
+            "filename": "drivein_theater",
+        },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = FluxPipeline.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(case.model_path, torch_dtype=torch_dtype)
+
+    pipe = FluxPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/flux/test_flux1_schnell.py
+++ b/tests/v1/flux/test_flux1_schnell.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FluxPipeline
+
+from nunchaku import NunchakuFluxTransformer2DModelV2
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+    def __init__(
+        self,
+        rank: int = 32,
+        batch_size: int = 1,
+        width: int = 1024,
+        height: int = 1024,
+        num_inference_steps: int = 4,
+        attention_impl: str = "flashattn2",
+        expected_lpips: dict[str, float] = {},
+        model_name: str = "flux.1-schnell",
+        repo_id: str = "black-forest-labs/FLUX.1-schnell",
+    ):
+        self.rank = rank
+        self.batch_size = batch_size
+        self.width = width
+        self.height = height
+        self.num_inference_steps = num_inference_steps
+        self.attention_impl = attention_impl
+        self.expected_lpips = expected_lpips
+        self.model_name = model_name
+        self.repo_id = repo_id
+
+        self.model_path = f"nunchaku-tech/nunchaku-flux.1-schnell/svdq-{precision}_r{rank}-flux.1-schnell.safetensors"
+
+        ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+        folder_name = f"w{width}h{height}t{num_inference_steps}"
+
+        self.save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+        self.save_dir_nunchaku = (
+            Path("test_results")
+            / "nunchaku"
+            / model_name
+            / f"{precision}_r{rank}-{dtype_str}"
+            / f"{folder_name}-bs{batch_size}"
+        )
+
+        self.forward_kwargs = {
+            "width": width,
+            "height": height,
+            "num_inference_steps": num_inference_steps,
+        }
+
+
+@pytest.mark.parametrize(
+    "case", [pytest.param(Case(expected_lpips={"int4-bf16": 0.14, "fp4-bf16": 0.12}), id="flux.1-schnell-r32")]
+)
+def test_flux_schnell(case: Case):
+    batch_size = case.batch_size
+    expected_lpips = case.expected_lpips
+    rank = case.rank
+    repo_id = case.repo_id
+
+    dataset = [
+        {
+            "prompt": "Plain light background, man to the side, light, happy, eye contact, black man aged 25  50, stylish confident man, suit, great straight hair, ",
+            "filename": "man",
+        },
+        {
+            "prompt": "3d rendering of isometric cupcake logo, pastel colors, octane rendering, unreal egine ",
+            "filename": "cupcake_logo",
+        },
+        {
+            "prompt": "character design and sketch, evil, female, drow elf, sorcerer, sharp facial features, large iris, dark blue and indigo colors, long and ornate cape, rainbowcolored gems and jewelry, leather armor, jeweled dagger, dark purple long hair, gothic ",
+            "filename": "character_design",
+        },
+        {
+            "prompt": "a hauntingly sparse drivein theater with a single red car and a single audio post. ",
+            "filename": "drivein_theater",
+        },
+    ]
+
+    if not already_generate(case.save_dir_16bit, len(dataset)):
+        pipeline = FluxPipeline.from_pretrained(case.repo_id, torch_dtype=torch_dtype)
+        if get_gpu_memory() > 25:
+            pipeline.enable_model_cpu_offload()
+        else:
+            pipeline.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=case.batch_size,
+            pipeline=pipeline,
+            save_dir=case.save_dir_16bit,
+            forward_kwargs=case.forward_kwargs,
+        )
+
+    transformer = NunchakuFluxTransformer2DModelV2.from_pretrained(
+        f"nunchaku-tech/nunchaku-flux.1-schnell/svdq-{precision}_r{rank}-flux.1-schnell.safetensors",
+        torch_dtype=torch_dtype,
+    )
+
+    pipe = FluxPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=case.save_dir_nunchaku,
+        forward_kwargs=case.forward_kwargs,
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(case.save_dir_16bit, case.save_dir_nunchaku)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_controlnet.py
+++ b/tests/v1/qwenimage/test_qwenimage_controlnet.py
@@ -56,7 +56,7 @@ class Case:
            Case(
                num_inference_steps=20,
                rank=128,
-                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+                expected_lpips={"int4-bf16": 0.12, "fp4-bf16": 0.1},
            ),
            id="qwen-image-controlnet-union-r128",
        ),

--- a/tests/v1/qwenimage/test_qwenimage_edit_2509.py
+++ b/tests/v1/qwenimage/test_qwenimage_edit_2509.py
+import gc
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import QwenImageEditPlusPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+
+class Case:
+
+    def __init__(self, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = "qwen-image-edit-2509"
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.27, "fp4-bf16": 0.22},
+            ),
+            id="qwen-image-edit-2509-r32",
+        ),
+        pytest.param(
+            Case(
+                num_inference_steps=20,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.26, "fp4-bf16": 0.24},
+            ),
+            id="qwen-image-edit-2509-r128",
+        ),
+    ],
+)
+def test_qwenimage_edit(case: Case):
+    batch_size = 1
+    true_cfg_scale = 4.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image-Edit-2509"
+    dataset = [
+        {
+            "prompt": "make the cat floating in the air and holding a sign that reads 'this is fun' written with a blue crayon",
+            "negative_prompt": " ",
+            "filename": "cat_sitting.png",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/cat_sitting.jpg"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "turn the style of the photo to vintage comic book",
+            "negative_prompt": " ",
+            "filename": "pie",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/pie.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImageEditPlusPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = (
+        f"nunchaku-tech/nunchaku-qwen-image-edit-2509/svdq-{get_precision()}_r{rank}-qwen-image-edit-2509.safetensors"
+    )
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+
+    pipe = QwenImageEditPlusPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10
--- a/tests/v1/qwenimage/test_qwenimage_edit_2509_lightning.py
+++ b/tests/v1/qwenimage/test_qwenimage_edit_2509_lightning.py
+import gc
+import math
+import os
+from pathlib import Path
+
+import pytest
+import torch
+from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPlusPipeline
+from diffusers.utils import load_image
+
+from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
+from nunchaku.utils import get_gpu_memory, get_precision, is_turing
+
+from ...utils import already_generate, compute_lpips
+from ..utils import run_pipeline
+
+precision = get_precision()
+torch_dtype = torch.float16 if is_turing() else torch.bfloat16
+dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
+
+model_paths = {
+    "qwen-image-edit-2509-lightningv2.0-4steps": "nunchaku-tech/nunchaku-qwen-image-edit-2509/svdq-{precision}_r{rank}-qwen-image-edit-2509-lightningv2.0-4steps.safetensors",
+    "qwen-image-edit-2509-lightningv2.0-8steps": "nunchaku-tech/nunchaku-qwen-image-edit-2509/svdq-{precision}_r{rank}-qwen-image-edit-2509-lightningv2.0-8steps.safetensors",
+}
+lora_paths = {
+    "qwen-image-edit-2509-lightningv2.0-4steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Lightning-4steps-V2.0-bf16.safetensors",
+    ),
+    "qwen-image-edit-2509-lightningv2.0-8steps": (
+        "lightx2v/Qwen-Image-Lightning",
+        "Qwen-Image-Lightning-8steps-V2.0-bf16.safetensors",
+    ),
+}
+
+
+class Case:
+
+    def __init__(self, model_name: str, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
+        self.model_name = model_name
+        self.num_inference_steps = num_inference_steps
+        self.rank = rank
+        self.expected_lpips = expected_lpips
+
+
+@pytest.mark.parametrize(
+    "case",
+    [
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-2509-lightningv2.0-4steps",
+                num_inference_steps=4,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-2509-lightningv2.0-4steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-2509-lightningv2.0-4steps",
+                num_inference_steps=4,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-2509-lightningv2.0-4steps-r128",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-2509-lightningv2.0-8steps",
+                num_inference_steps=8,
+                rank=32,
+                expected_lpips={"int4-bf16": 0.11, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-2509-lightningv2.0-8steps-r32",
+        ),
+        pytest.param(
+            Case(
+                model_name="qwen-image-edit-2509-lightningv2.0-8steps",
+                num_inference_steps=8,
+                rank=128,
+                expected_lpips={"int4-bf16": 0.17, "fp4-bf16": 0.1},
+            ),
+            id="qwen-image-edit-2509-lightningv2.0-8steps-r128",
+        ),
+    ],
+)
+def test_qwenimage_edit_2509_lightning(case: Case):
+    batch_size = 1
+    true_cfg_scale = 1.0
+    rank = case.rank
+    expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
+    model_name = case.model_name
+    num_inference_steps = case.num_inference_steps
+
+    ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
+    folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
+    save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
+
+    repo_id = "Qwen/Qwen-Image-Edit-2509"
+
+    # From https://github.com/ModelTC/Qwen-Image-Lightning/blob/342260e8f5468d2f24d084ce04f55e101007118b/generate_with_diffusers.py#L82C9-L97C10
+    scheduler_config = {
+        "base_image_seq_len": 256,
+        "base_shift": math.log(3),  # We use shift=3 in distillation
+        "invert_sigmas": False,
+        "max_image_seq_len": 8192,
+        "max_shift": math.log(3),  # We use shift=3 in distillation
+        "num_train_timesteps": 1000,
+        "shift": 1.0,
+        "shift_terminal": None,  # set shift_terminal to None
+        "stochastic_sampling": False,
+        "time_shift_type": "exponential",
+        "use_beta_sigmas": False,
+        "use_dynamic_shifting": True,
+        "use_exponential_sigmas": False,
+        "use_karras_sigmas": False,
+    }
+    scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
+
+    dataset = [
+        {
+            "prompt": "make the cat floating in the air and holding a sign that reads 'this is fun' written with a blue crayon",
+            "filename": "cat_sitting.png",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/cat_sitting.jpg"
+            ).convert("RGB"),
+        },
+        {
+            "prompt": "turn the style of the photo to vintage comic book",
+            "filename": "pie",
+            "image": load_image(
+                "https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/pie.png"
+            ).convert("RGB"),
+        },
+    ]
+
+    if not already_generate(save_dir_16bit, len(dataset)):
+        pipe = QwenImageEditPlusPipeline.from_pretrained(repo_id, scheduler=scheduler, torch_dtype=torch_dtype)
+        pipe.load_lora_weights(lora_paths[model_name][0], weight_name=lora_paths[model_name][1])
+        pipe.fuse_lora()
+        pipe.unload_lora_weights()
+        pipe.enable_sequential_cpu_offload()
+        run_pipeline(
+            dataset=dataset,
+            batch_size=1,
+            pipeline=pipe,
+            save_dir=save_dir_16bit,
+            forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+        )
+        del pipe
+        gc.collect()
+        torch.cuda.empty_cache()
+
+    save_dir_nunchaku = (
+        Path("test_results")
+        / "nunchaku"
+        / model_name
+        / f"{precision}_r{rank}-{dtype_str}"
+        / f"{folder_name}-bs{batch_size}"
+    )
+
+    model_path = model_paths[model_name].format(precision=precision, rank=rank)
+    transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
+
+    pipe = QwenImageEditPlusPipeline.from_pretrained(
+        repo_id, transformer=transformer, scheduler=scheduler, torch_dtype=torch_dtype
+    )
+
+    if get_gpu_memory() > 18:
+        pipe.enable_model_cpu_offload()
+    else:
+        transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
+        pipe._exclude_from_cpu_offload.append("transformer")
+        pipe.enable_sequential_cpu_offload()
+
+    run_pipeline(
+        dataset=dataset,
+        batch_size=batch_size,
+        pipeline=pipe,
+        save_dir=save_dir_nunchaku,
+        forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
+    )
+    del transformer
+    del pipe
+    gc.collect()
+    torch.cuda.empty_cache()
+
+    lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
+    print(f"lpips: {lpips}")
+    assert lpips < expected_lpips * 1.10