test_workflows.py

import gc
import os
import subprocess

import numpy as np
import pytest
import torch
from diffusers.utils import load_image
from PIL import Image
from torchmetrics.image import LearnedPerceptualImagePatchSimilarity, PeakSignalNoiseRatio
from torchmetrics.multimodal import CLIPImageQualityAssessment

from nunchaku.utils import get_precision

script_dir = os.path.join(os.path.dirname(__file__), "scripts")


@pytest.mark.parametrize(
    "script_name, expected_clip_iqa, expected_lpips, expected_psnr",
    [
        ("nunchaku-flux1-redux-dev.py", 0.9, 0.137, 18.9),
        ("nunchaku-flux1-dev-controlnet_upscaler.py", 0.9, 0.1, 26),
        ("nunchaku-flux1-dev-controlnet_union_pro2.py", 0.9, 0.1, 26),
        ("nunchaku-flux1-depth-lora.py", 0.59, 0.13, 21),
        ("nunchaku-flux1-canny.py", 0.9, 0.1, 26),
        ("nunchaku-flux1-schnell.py", 0.9, 0.29, 19.3),
        ("nunchaku-flux1-depth.py", 0.9, 0.13, 26),
        ("nunchaku-shuttle-jaguar.py", 0.9, 0.157, 23.9),
        ("nunchaku-flux1-fill.py", 0.9, 0.1, 26),
        ("nunchaku-flux1-fill-removalV2.py", 0.56, 0.13, 26),
        ("nunchaku-flux1-dev.py", 0.9, 0.28, 19.7),
        ("nunchaku-flux1-canny-lora.py", 0.9, 0.1, 25),
        ("nunchaku-flux1-dev-qencoder.py", 0.9, 0.27, 16.3),
        ("nunchaku-flux1-dev-hand_drawn_game.py", 0.92, 0.254, 20),
        ("nunchaku-flux1-dev-pulid.py", 0.9, 0.194, 15.8),
        ("nunchaku-flux1-kontext-dev.py", 0.9, 0.1, 18.3),
        ("nunchaku-flux1-kontext-dev-turbo_lora.py", 0.87, 0.13, 18.8),
        ("nunchaku-flux1-ip-adapter.py", 0.5, 0.36, 14),
    ],
)
@pytest.mark.flaky(reruns=2, reruns_delay=0)
def test_workflows(script_name: str, expected_clip_iqa: float, expected_lpips: float, expected_psnr: float):
    gc.collect()
    torch.cuda.empty_cache()
    script_path = os.path.join(script_dir, script_name)

    result = subprocess.run(["python", script_path])
    print(f"Running {script_path} -> Return code: {result.returncode}")
    if result.returncode != 0:
        print(f"Output: {result.stdout}")
        print(f"Error: {result.stderr}")
        assert result.returncode == 0, f"{script_path} failed with code {result.returncode}"

    path = open("image_path.txt", "r").read().strip()

    # clip_iqa metric
    metric = CLIPImageQualityAssessment(model_name_or_path="openai/clip-vit-large-patch14").to("cuda")
    image = Image.open(path).convert("RGB")
    gen_tensor = torch.from_numpy(np.array(image)).permute(2, 0, 1).to(torch.float32).unsqueeze(0).to("cuda")
    clip_iqa = metric(gen_tensor).item()
    print(f"CLIP-IQA: {clip_iqa}")

    # lpips metric
    ref_image_url = (
        f"https://huggingface.co/datasets/nunchaku-tech/test-data/resolve/main/ComfyUI-nunchaku/ref_images/"
        f"{get_precision()}/{script_name.replace('.py', '.png')}"
    )
    ref_image = load_image(ref_image_url).convert("RGB")
    metric = LearnedPerceptualImagePatchSimilarity().to("cuda")
    ref_tensor = torch.from_numpy(np.array(ref_image)).permute(2, 0, 1).to(torch.float32)
    ref_tensor = ref_tensor.unsqueeze(0).to("cuda")
    lpips = metric(gen_tensor / 255, ref_tensor / 255).item()
    print(f"LPIPS: {lpips}")

    metric = PeakSignalNoiseRatio(data_range=(0, 255)).cuda()
    psnr = metric(gen_tensor, ref_tensor).item()
    print(f"PSNR: {psnr}")

    assert clip_iqa >= expected_clip_iqa * 0.85
    assert lpips <= expected_lpips * 1.15
    assert psnr >= expected_psnr * 0.85