Unverified Commit de6a75b6 authored by Muyang Li's avatar Muyang Li Committed by GitHub
Browse files

chore: add qwen-image tests for v1, clean the requirements (#712)

* add the v1 tests

* formalize the requirements

* update

* update

* well format the dependency

* update docs

* update

* update

* update the dependency

* fix the version in ci

* well organize the dependency

* update the pr list

* update tests

* add tests for qwen-image

* add qwen-image lightning

* add tests

* update fp4 lpips

* add tests for qwen-image-edit

* update

* updat

* add 8 steps lora back

* update nvfp4 results

* add qwen-image-edit-lightning

* style: make linter happy

* do not use batch when batchsize is 1

* update

* finished all the tests

* add cn test

* style: make linter happy

* update

* add qwen-image controlnet

* update

* add fp4 metrics

* fix the tests
parent b0484ae0
import gc
import os
from pathlib import Path
import pytest
import torch
from diffusers import QwenImagePipeline
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
from nunchaku.utils import get_gpu_memory, get_precision, is_turing
from ...utils import already_generate, compute_lpips
from ..utils import run_pipeline
precision = get_precision()
torch_dtype = torch.float16 if is_turing() else torch.bfloat16
dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
@pytest.mark.parametrize(
"rank,expected_lpips",
[
(32, {"int4-bf16": 0.24, "fp4-bf16": 0.24}),
(128, {"int4-bf16": 0.31, "fp4-bf16": 0.18}),
],
)
def test_qwenimage(rank: int, expected_lpips: dict[str, float]):
model_name = "qwen-image"
batch_size = 1
width = 1664
height = 928
num_inference_steps = 20
true_cfg_scale = 4.0
ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
folder_name = f"w{width}h{height}t{num_inference_steps}g{true_cfg_scale}"
save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
repo_id = "Qwen/Qwen-Image"
dataset = [
{
"prompt": """Bookstore window display. A sign displays “New Arrivals This Week”. Below, a shelf tag with the text “Best-Selling Novels Here”. To the side, a colorful poster advertises “Author Meet And Greet on Saturday” with a central portrait of the author. There are four books on the bookshelf, namely “The light between worlds” “When stars are scattered” “The slient patient” “The night circus” Ultra HD, 4K, cinematic composition.""",
"negative_prompt": " ",
"filename": "bookstore",
},
{
"prompt": "一副典雅庄重的对联悬挂于厅堂之中,房间是个安静古典的中式布置,桌子上放着一些青花瓷,对联上左书“义本生知人机同道善思新”,右书“通云赋智乾坤启数高志远”, 横批“智启通义”,字体飘逸,中间挂在一着一副中国风的画作,内容是岳阳楼。超清,4K,电影级构图",
"negative_prompt": " ",
"filename": "chinese_room",
},
{
"prompt": '一张企业级高质量PPT页面图像,整体采用科技感十足的星空蓝为主色调,背景融合流动的发光科技线条与微光粒子特效,营造出专业、现代且富有信任感的品牌氛围;页面顶部左侧清晰展示橘红色Alibaba标志,色彩鲜明、辨识度高。主标题位于画面中央偏上位置,使用大号加粗白色或浅蓝色字体写着“通义千问视觉基础模型”,字体现代简洁,突出技术感;主标题下方紧接一行楷体中文文字:“原生中文·复杂场景·自动布局”,字体柔和优雅,形成科技与人文的融合。下方居中排布展示了四张与图片,分别是:一幅写实与水墨风格结合的梅花特写,枝干苍劲、花瓣清雅,背景融入淡墨晕染与飘雪效果,体现坚韧不拔的精神气质;上方写着黑色的楷体"梅傲"。一株生长于山涧石缝中的兰花,叶片修长、花朵素净,搭配晨雾缭绕的自然环境,展现清逸脱俗的文人风骨;上方写着黑色的楷体"兰幽"。一组迎风而立的翠竹,竹叶随风摇曳,光影交错,背景为青灰色山岩与流水,呈现刚柔并济、虚怀若谷的文化意象;上方写着黑色的楷体"竹清"。一片盛开于秋日庭院的菊花丛,花色丰富、层次分明,配以落叶与古亭剪影,传递恬然自适的生活哲学;上方写着黑色的楷体"菊淡"。所有图片采用统一尺寸与边框样式,呈横向排列。页面底部中央用楷体小字写明“2025年8月,敬请期待”,排版工整、结构清晰,整体风格统一且细节丰富,极具视觉冲击力与品牌调性。',
"negative_prompt": " ",
"filename": "ppt",
},
]
if not already_generate(save_dir_16bit, len(dataset)):
pipe = QwenImagePipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=1,
pipeline=pipe,
save_dir=save_dir_16bit,
forward_kwargs={
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"true_cfg_scale": true_cfg_scale,
},
)
del pipe
gc.collect()
torch.cuda.empty_cache()
save_dir_nunchaku = (
Path("test_results")
/ "nunchaku"
/ model_name
/ f"{precision}_r{rank}-{dtype_str}"
/ f"{folder_name}-bs{batch_size}"
)
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(
f"nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image.safetensors",
torch_dtype=torch_dtype,
)
pipe = QwenImagePipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
if get_gpu_memory() > 18:
pipe.enable_model_cpu_offload()
else:
transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
pipe._exclude_from_cpu_offload.append("transformer")
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=batch_size,
pipeline=pipe,
save_dir=save_dir_nunchaku,
forward_kwargs={
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"true_cfg_scale": true_cfg_scale,
},
)
del transformer
del pipe
gc.collect()
torch.cuda.empty_cache()
lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku)
print(f"lpips: {lpips}")
assert lpips < expected_lpips[f"{precision}-{dtype_str}"] * 1.10
import gc
import os
from pathlib import Path
import diffusers
import packaging.version
import pytest
import torch
from diffusers.utils import load_image
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
from nunchaku.utils import get_gpu_memory, get_precision, is_turing
from ...utils import already_generate, compute_lpips
from ..utils import run_pipeline
try:
from diffusers import QwenImageControlNetModel, QwenImageControlNetPipeline
except ImportError:
QwenImageControlNetModel = None
QwenImageControlNetPipeline = None
# Skip the test if diffusers<0.36
pytestmark = pytest.mark.skipif(
packaging.version.parse(diffusers.__version__) <= packaging.version.parse("0.35.1"),
reason="QwenImageControlNetPipeline requires diffusers>=0.36",
)
precision = get_precision()
torch_dtype = torch.float16 if is_turing() else torch.bfloat16
dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
class Case:
def __init__(self, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
self.model_name = "qwen-image-controlnet-union"
self.num_inference_steps = num_inference_steps
self.rank = rank
self.expected_lpips = expected_lpips
@pytest.mark.parametrize(
"case",
[
pytest.param(
Case(
num_inference_steps=20,
rank=32,
expected_lpips={"int4-bf16": 0.13, "fp4-bf16": 0.11},
),
id="qwen-image-controlnet-union-r32",
),
pytest.param(
Case(
num_inference_steps=20,
rank=128,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-controlnet-union-r128",
),
],
)
def test_qwenimage_controlnet(case: Case):
batch_size = 1
true_cfg_scale = 4.0
rank = case.rank
expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
model_name = case.model_name
num_inference_steps = case.num_inference_steps
forward_kwargs = {
"num_inference_steps": num_inference_steps,
"true_cfg_scale": true_cfg_scale,
"controlnet_conditioning_scale": 1.0,
}
ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
repo_id = "Qwen/Qwen-Image"
dataset = [
{
"prompt": "Aesthetics art, traditional asian pagoda, elaborate golden accents, sky blue and white color palette, swirling cloud pattern, digital illustration, east asian architecture, ornamental rooftop, intricate detailing on building, cultural representation.",
"negative_prompt": " ",
"filename": "canny",
"control_image": load_image(
"https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/canny.png"
).convert("RGB"),
},
{
"prompt": "A swanky, minimalist living room with a huge floor-to-ceiling window letting in loads of natural light. A beige couch with white cushions sits on a wooden floor, with a matching coffee table in front. The walls are a soft, warm beige, decorated with two framed botanical prints. A potted plant chills in the corner near the window. Sunlight pours through the leaves outside, casting cool shadows on the floor.",
"negative_prompt": " ",
"filename": "depth",
"control_image": load_image(
"https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/depth.png"
).convert("RGB"),
},
{
"prompt": "Photograph of a young man with light brown hair and a beard, wearing a beige flat cap, black leather jacket, gray shirt, brown pants, and white sneakers. He's sitting on a concrete ledge in front of a large circular window, with a cityscape reflected in the glass. The wall is cream-colored, and the sky is clear blue. His shadow is cast on the wall.",
"negative_prompt": " ",
"filename": "pose",
"control_image": load_image(
"https://huggingface.co/InstantX/Qwen-Image-ControlNet-Union/resolve/main/conds/pose.png"
).convert("RGB"),
},
]
for item in dataset:
item["width"] = item["control_image"].size[0]
item["height"] = item["control_image"].size[1]
if not already_generate(save_dir_16bit, len(dataset)):
controlnet = QwenImageControlNetModel.from_pretrained(
"InstantX/Qwen-Image-ControlNet-Union", torch_dtype=torch_dtype
)
pipe = QwenImageControlNetPipeline.from_pretrained(repo_id, controlnet=controlnet, torch_dtype=torch_dtype)
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset, batch_size=1, pipeline=pipe, save_dir=save_dir_16bit, forward_kwargs=forward_kwargs
)
del pipe
gc.collect()
torch.cuda.empty_cache()
save_dir_nunchaku = (
Path("test_results")
/ "nunchaku"
/ model_name
/ f"{precision}_r{rank}-{dtype_str}"
/ f"{folder_name}-bs{batch_size}"
)
model_path = f"nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image.safetensors"
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
controlnet = QwenImageControlNetModel.from_pretrained(
"InstantX/Qwen-Image-ControlNet-Union", torch_dtype=torch_dtype
)
pipe = QwenImageControlNetPipeline.from_pretrained(
repo_id, transformer=transformer, controlnet=controlnet, torch_dtype=torch_dtype
)
if get_gpu_memory() > 18:
pipe.enable_model_cpu_offload()
else:
transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
pipe._exclude_from_cpu_offload.append("transformer")
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset, batch_size=batch_size, pipeline=pipe, save_dir=save_dir_nunchaku, forward_kwargs=forward_kwargs
)
del transformer
del pipe
gc.collect()
torch.cuda.empty_cache()
lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
print(f"lpips: {lpips}")
assert lpips < expected_lpips * 1.10
import gc
import os
from pathlib import Path
import pytest
import torch
from diffusers import QwenImageEditPipeline
from diffusers.utils import load_image
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
from nunchaku.utils import get_gpu_memory, get_precision, is_turing
from ...utils import already_generate, compute_lpips
from ..utils import run_pipeline
precision = get_precision()
torch_dtype = torch.float16 if is_turing() else torch.bfloat16
dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
class Case:
def __init__(self, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
self.model_name = "qwen-image-edit"
self.num_inference_steps = num_inference_steps
self.rank = rank
self.expected_lpips = expected_lpips
@pytest.mark.parametrize(
"case",
[
pytest.param(
Case(
num_inference_steps=20,
rank=32,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-r32",
),
pytest.param(
Case(
num_inference_steps=20,
rank=128,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-r128",
),
],
)
def test_qwenimage_edit(case: Case):
batch_size = 1
true_cfg_scale = 4.0
rank = case.rank
expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
model_name = case.model_name
num_inference_steps = case.num_inference_steps
ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
repo_id = "Qwen/Qwen-Image-Edit"
dataset = [
{
"prompt": "change the text to read '双截棍 Qwen Image Edit is here'",
"negative_prompt": " ",
"filename": "neon_sign",
"image": load_image(
"https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/neon_sign.png"
).convert("RGB"),
},
{
"prompt": "Remove all UI text elements from the image. Keep the feeling that the characters and scene are in water. Also, remove the green UI elements at the bottom.",
"negative_prompt": " ",
"filename": "comfy_poster",
"image": load_image(
"https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/comfy_poster.png"
).convert("RGB"),
},
]
if not already_generate(save_dir_16bit, len(dataset)):
pipe = QwenImageEditPipeline.from_pretrained(repo_id, torch_dtype=torch_dtype)
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=1,
pipeline=pipe,
save_dir=save_dir_16bit,
forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
)
del pipe
gc.collect()
torch.cuda.empty_cache()
save_dir_nunchaku = (
Path("test_results")
/ "nunchaku"
/ model_name
/ f"{precision}_r{rank}-{dtype_str}"
/ f"{folder_name}-bs{batch_size}"
)
model_path = f"nunchaku-tech/nunchaku-qwen-image-edit/svdq-{get_precision()}_r{rank}-qwen-image-edit.safetensors"
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
pipe = QwenImageEditPipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
if get_gpu_memory() > 18:
pipe.enable_model_cpu_offload()
else:
transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
pipe._exclude_from_cpu_offload.append("transformer")
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=batch_size,
pipeline=pipe,
save_dir=save_dir_nunchaku,
forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
)
del transformer
del pipe
gc.collect()
torch.cuda.empty_cache()
lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
print(f"lpips: {lpips}")
assert lpips < expected_lpips * 1.10
import gc
import math
import os
from pathlib import Path
import pytest
import torch
from diffusers import FlowMatchEulerDiscreteScheduler, QwenImageEditPipeline
from diffusers.utils import load_image
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
from nunchaku.utils import get_gpu_memory, get_precision, is_turing
from ...utils import already_generate, compute_lpips
from ..utils import run_pipeline
precision = get_precision()
torch_dtype = torch.float16 if is_turing() else torch.bfloat16
dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
model_paths = {
"qwen-image-edit-lightningv1.0-4steps": "nunchaku-tech/nunchaku-qwen-image-edit/svdq-{precision}_r{rank}-qwen-image-edit-lightningv1.0-4steps.safetensors",
"qwen-image-edit-lightningv1.0-8steps": "nunchaku-tech/nunchaku-qwen-image-edit/svdq-{precision}_r{rank}-qwen-image-edit-lightningv1.0-8steps.safetensors",
}
lora_paths = {
"qwen-image-edit-lightningv1.0-4steps": (
"lightx2v/Qwen-Image-Lightning",
"Qwen-Image-Edit-Lightning-4steps-V1.0-bf16.safetensors",
),
"qwen-image-edit-lightningv1.0-8steps": (
"lightx2v/Qwen-Image-Lightning",
"Qwen-Image-Edit-Lightning-8steps-V1.0-bf16.safetensors",
),
}
class Case:
def __init__(self, model_name: str, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
self.model_name = model_name
self.num_inference_steps = num_inference_steps
self.rank = rank
self.expected_lpips = expected_lpips
@pytest.mark.parametrize(
"case",
[
pytest.param(
Case(
model_name="qwen-image-edit-lightningv1.0-4steps",
num_inference_steps=4,
rank=32,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-lightningv1.0-4steps-r32",
),
pytest.param(
Case(
model_name="qwen-image-edit-lightningv1.0-4steps",
num_inference_steps=4,
rank=128,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-lightningv1.0-4steps-r128",
),
pytest.param(
Case(
model_name="qwen-image-edit-lightningv1.0-8steps",
num_inference_steps=8,
rank=32,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-lightningv1.0-8steps-r32",
),
pytest.param(
Case(
model_name="qwen-image-edit-lightningv1.0-8steps",
num_inference_steps=8,
rank=128,
expected_lpips={"int4-bf16": 0.1, "fp4-bf16": 0.1},
),
id="qwen-image-edit-lightningv1.0-8steps-r128",
),
],
)
def test_qwenimage_edit_lightning(case: Case):
batch_size = 1
true_cfg_scale = 1.0
rank = case.rank
expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
model_name = case.model_name
num_inference_steps = case.num_inference_steps
ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
folder_name = f"t{num_inference_steps}g{true_cfg_scale}"
save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
repo_id = "Qwen/Qwen-Image-Edit"
# From https://github.com/ModelTC/Qwen-Image-Lightning/blob/342260e8f5468d2f24d084ce04f55e101007118b/generate_with_diffusers.py#L82C9-L97C10
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3), # We use shift=3 in distillation
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3), # We use shift=3 in distillation
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None, # set shift_terminal to None
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
dataset = [
{
"prompt": "change the text to read '双截棍 Qwen Image Edit is here'",
"filename": "neon_sign",
"image": load_image(
"https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/neon_sign.png"
).convert("RGB"),
},
{
"prompt": "Remove all UI text elements from the image. Keep the feeling that the characters and scene are in water. Also, remove the green UI elements at the bottom.",
"filename": "comfy_poster",
"image": load_image(
"https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/inputs/comfy_poster.png"
).convert("RGB"),
},
]
if not already_generate(save_dir_16bit, len(dataset)):
pipe = QwenImageEditPipeline.from_pretrained(repo_id, scheduler=scheduler, torch_dtype=torch_dtype)
pipe.load_lora_weights(lora_paths[model_name][0], weight_name=lora_paths[model_name][1])
pipe.fuse_lora()
pipe.unload_lora_weights()
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=1,
pipeline=pipe,
save_dir=save_dir_16bit,
forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
)
del pipe
gc.collect()
torch.cuda.empty_cache()
save_dir_nunchaku = (
Path("test_results")
/ "nunchaku"
/ model_name
/ f"{precision}_r{rank}-{dtype_str}"
/ f"{folder_name}-bs{batch_size}"
)
model_path = model_paths[model_name].format(precision=precision, rank=rank)
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
pipe = QwenImageEditPipeline.from_pretrained(
repo_id, transformer=transformer, scheduler=scheduler, torch_dtype=torch_dtype
)
if get_gpu_memory() > 18:
pipe.enable_model_cpu_offload()
else:
transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
pipe._exclude_from_cpu_offload.append("transformer")
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=batch_size,
pipeline=pipe,
save_dir=save_dir_nunchaku,
forward_kwargs={"num_inference_steps": num_inference_steps, "true_cfg_scale": true_cfg_scale},
)
del transformer
del pipe
gc.collect()
torch.cuda.empty_cache()
lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku, batch_size=1)
print(f"lpips: {lpips}")
assert lpips < expected_lpips * 1.10
import gc
import math
import os
from pathlib import Path
import pytest
import torch
from diffusers import FlowMatchEulerDiscreteScheduler, QwenImagePipeline
from nunchaku.models.transformers.transformer_qwenimage import NunchakuQwenImageTransformer2DModel
from nunchaku.utils import get_gpu_memory, get_precision, is_turing
from ...utils import already_generate, compute_lpips
from ..utils import run_pipeline
precision = get_precision()
torch_dtype = torch.float16 if is_turing() else torch.bfloat16
dtype_str = "fp16" if torch_dtype == torch.float16 else "bf16"
model_paths = {
"qwen-image-lightningv1.0-4steps": "nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image-lightningv1.0-{num_inference_steps}steps.safetensors",
"qwen-image-lightningv1.1-8steps": "nunchaku-tech/nunchaku-qwen-image/svdq-{precision}_r{rank}-qwen-image-lightningv1.1-{num_inference_steps}steps.safetensors",
}
lora_paths = {
"qwen-image-lightningv1.0-4steps": (
"lightx2v/Qwen-Image-Lightning",
"Qwen-Image-Lightning-4steps-V1.0-bf16.safetensors",
),
"qwen-image-lightningv1.1-8steps": (
"lightx2v/Qwen-Image-Lightning",
"Qwen-Image-Lightning-8steps-V1.1-bf16.safetensors",
),
}
class Case:
def __init__(self, model_name: str, num_inference_steps: int, rank: int, expected_lpips: dict[str, float]):
self.model_name = model_name
self.num_inference_steps = num_inference_steps
self.rank = rank
self.expected_lpips = expected_lpips
@pytest.mark.parametrize(
"case",
[
pytest.param(
Case(
model_name="qwen-image-lightningv1.0-4steps",
num_inference_steps=4,
rank=32,
expected_lpips={"int4-bf16": 0.35, "fp4-bf16": 0.33},
),
id="qwen-image-lightningv1.0-4steps-r32",
),
pytest.param(
Case(
model_name="qwen-image-lightningv1.0-4steps",
num_inference_steps=4,
rank=128,
expected_lpips={"int4-bf16": 0.32, "fp4-bf16": 0.32},
),
id="qwen-image-lightningv1.0-4steps-r128",
),
pytest.param(
Case(
model_name="qwen-image-lightningv1.1-8steps",
num_inference_steps=8,
rank=32,
expected_lpips={"int4-bf16": 0.33, "fp4-bf16": 0.34},
),
id="qwen-image-lightningv1.1-8steps-r32",
),
pytest.param(
Case(
model_name="qwen-image-lightningv1.1-8steps",
num_inference_steps=8,
rank=128,
expected_lpips={"int4-bf16": 0.31, "fp4-bf16": 0.32},
),
id="qwen-image-lightningv1.1-8steps-r128",
),
],
)
def test_qwenimage_lightning(case: Case):
batch_size = 1
width = 1024
height = 1024
true_cfg_scale = 1.0
rank = case.rank
expected_lpips = case.expected_lpips[f"{precision}-{dtype_str}"]
model_name = case.model_name
num_inference_steps = case.num_inference_steps
ref_root = os.environ.get("NUNCHAKU_TEST_CACHE_ROOT", os.path.join("test_results", "ref"))
folder_name = f"w{width}h{height}t{num_inference_steps}g{true_cfg_scale}"
save_dir_16bit = Path(ref_root) / model_name / dtype_str / folder_name
repo_id = "Qwen/Qwen-Image"
dataset = [
{
"prompt": """Bookstore window display. A sign displays “New Arrivals This Week”. Below, a shelf tag with the text “Best-Selling Novels Here”. To the side, a colorful poster advertises “Author Meet And Greet on Saturday” with a central portrait of the author. There are four books on the bookshelf, namely “The light between worlds” “When stars are scattered” “The slient patient” “The night circus” Ultra HD, 4K, cinematic composition.""",
"filename": "bookstore",
},
{
"prompt": "一副典雅庄重的对联悬挂于厅堂之中,房间是个安静古典的中式布置,桌子上放着一些青花瓷,对联上左书“义本生知人机同道善思新”,右书“通云赋智乾坤启数高志远”, 横批“智启通义”,字体飘逸,中间挂在一着一副中国风的画作,内容是岳阳楼。超清,4K,电影级构图",
"filename": "chinese_room",
},
{
"prompt": '一张企业级高质量PPT页面图像,整体采用科技感十足的星空蓝为主色调,背景融合流动的发光科技线条与微光粒子特效,营造出专业、现代且富有信任感的品牌氛围;页面顶部左侧清晰展示橘红色Alibaba标志,色彩鲜明、辨识度高。主标题位于画面中央偏上位置,使用大号加粗白色或浅蓝色字体写着“通义千问视觉基础模型”,字体现代简洁,突出技术感;主标题下方紧接一行楷体中文文字:“原生中文·复杂场景·自动布局”,字体柔和优雅,形成科技与人文的融合。下方居中排布展示了四张与图片,分别是:一幅写实与水墨风格结合的梅花特写,枝干苍劲、花瓣清雅,背景融入淡墨晕染与飘雪效果,体现坚韧不拔的精神气质;上方写着黑色的楷体"梅傲"。一株生长于山涧石缝中的兰花,叶片修长、花朵素净,搭配晨雾缭绕的自然环境,展现清逸脱俗的文人风骨;上方写着黑色的楷体"兰幽"。一组迎风而立的翠竹,竹叶随风摇曳,光影交错,背景为青灰色山岩与流水,呈现刚柔并济、虚怀若谷的文化意象;上方写着黑色的楷体"竹清"。一片盛开于秋日庭院的菊花丛,花色丰富、层次分明,配以落叶与古亭剪影,传递恬然自适的生活哲学;上方写着黑色的楷体"菊淡"。所有图片采用统一尺寸与边框样式,呈横向排列。页面底部中央用楷体小字写明“2025年8月,敬请期待”,排版工整、结构清晰,整体风格统一且细节丰富,极具视觉冲击力与品牌调性。',
"filename": "ppt",
},
]
scheduler_config = {
"base_image_seq_len": 256,
"base_shift": math.log(3), # We use shift=3 in distillation
"invert_sigmas": False,
"max_image_seq_len": 8192,
"max_shift": math.log(3), # We use shift=3 in distillation
"num_train_timesteps": 1000,
"shift": 1.0,
"shift_terminal": None, # set shift_terminal to None
"stochastic_sampling": False,
"time_shift_type": "exponential",
"use_beta_sigmas": False,
"use_dynamic_shifting": True,
"use_exponential_sigmas": False,
"use_karras_sigmas": False,
}
scheduler = FlowMatchEulerDiscreteScheduler.from_config(scheduler_config)
if not already_generate(save_dir_16bit, len(dataset)):
pipe = QwenImagePipeline.from_pretrained(repo_id, scheduler=scheduler, torch_dtype=torch_dtype)
pipe.load_lora_weights(lora_paths[model_name][0], weight_name=lora_paths[model_name][1])
pipe.fuse_lora()
pipe.unload_lora_weights()
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=1,
pipeline=pipe,
save_dir=save_dir_16bit,
forward_kwargs={
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"true_cfg_scale": true_cfg_scale,
},
)
del pipe
gc.collect()
torch.cuda.empty_cache()
save_dir_nunchaku = (
Path("test_results")
/ "nunchaku"
/ model_name
/ f"{precision}_r{rank}-{dtype_str}"
/ f"{folder_name}-bs{batch_size}"
)
model_path = model_paths[model_name].format(precision=precision, rank=rank, num_inference_steps=num_inference_steps)
transformer = NunchakuQwenImageTransformer2DModel.from_pretrained(model_path, torch_dtype=torch_dtype)
pipe = QwenImagePipeline.from_pretrained(repo_id, transformer=transformer, torch_dtype=torch_dtype)
if get_gpu_memory() > 18:
pipe.enable_model_cpu_offload()
else:
transformer.set_offload(True, use_pin_memory=True, num_blocks_on_gpu=20)
pipe._exclude_from_cpu_offload.append("transformer")
pipe.enable_sequential_cpu_offload()
run_pipeline(
dataset=dataset,
batch_size=batch_size,
pipeline=pipe,
save_dir=save_dir_nunchaku,
forward_kwargs={
"width": width,
"height": height,
"num_inference_steps": num_inference_steps,
"true_cfg_scale": true_cfg_scale,
},
)
del transformer
del pipe
gc.collect()
torch.cuda.empty_cache()
lpips = compute_lpips(save_dir_16bit, save_dir_nunchaku)
print(f"lpips: {lpips}")
assert lpips < expected_lpips * 1.10
import subprocess
from pathlib import Path
import pytest
from nunchaku.utils import get_precision
EXAMPLES_DIR = Path("./examples/v1")
example_scripts = [str(f) for f in EXAMPLES_DIR.iterdir() if f.is_file() and f.suffix == ".py"]
@pytest.mark.parametrize("script_path", example_scripts)
def test_example_script_runs(script_path):
if "sdxl" in script_path and get_precision() == "fp4":
pytest.skip("Skip FP4 tests for SDXL!")
result = subprocess.run(["python", script_path], text=True)
print(f"Running {script_path} -> Return code: {result.returncode}")
assert result.returncode == 0, f"{script_path} failed with code {result.returncode}"
import gc
import os
from pathlib import Path
import torch
from diffusers import DiffusionPipeline
from tqdm import trange
from ..utils import hash_str_to_int
def run_pipeline(
dataset: list[dict],
batch_size: int,
pipeline: DiffusionPipeline,
save_dir: os.PathLike[str],
forward_kwargs: dict = {},
):
if isinstance(save_dir, str):
save_dir = Path(save_dir)
assert isinstance(save_dir, Path)
save_dir.mkdir(parents=True, exist_ok=True)
pipeline.set_progress_bar_config(desc="Sampling", leave=False, dynamic_ncols=True, position=1)
for batch_idx in trange(len(dataset) // batch_size, desc="Batch", position=0, leave=False):
start_idx = batch_idx * batch_size
end_idx = start_idx + batch_size
batch = dataset[start_idx:end_idx]
filenames = [_["filename"] for _ in batch]
generators = [torch.Generator().manual_seed(hash_str_to_int(filename)) for filename in filenames]
_forward_kwargs = {k: v for k, v in forward_kwargs.items()}
_forward_kwargs["generator"] = generators if batch_size > 1 else generators[0]
for k in batch[0].keys():
if k == "filename":
continue
_forward_kwargs[k] = [_[k] for _ in batch] if batch_size > 1 else batch[0][k]
images = pipeline(**_forward_kwargs).images
for i, image in enumerate(images):
filename = filenames[i]
image.save(os.path.join(save_dir, f"{filename}.png"))
gc.collect()
torch.cuda.empty_cache()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment