test.py

import os
import json
import torch
import time
from diffusers import DiffusionPipeline

# 使用环境变量来定义模型路径，增加灵活性
MODEL_PATH = os.environ.get("MODEL_PATH", "/workspace/stable-diffusion-xl-base-1.0")

torch.autograd.set_detect_anomaly(True)

# 初始化模型
print(f"Loading model from: {MODEL_PATH}")
pipe = DiffusionPipeline.from_pretrained(
    MODEL_PATH,
    torch_dtype=torch.float16,
    use_safetensors=True,
    variant="fp16"
)
pipe.to("cuda")

pipe.text_encoder = torch.compile(pipe.text_encoder, backend="inductor", dynamic=False)
pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, backend="inductor", dynamic=False)
if hasattr(pipe, "unet"):
    pipe.unet = torch.compile(pipe.unet, backend="inductor", dynamic=False)

# 基础提示词
prompt = "An astronaut riding a green horse"

# 配置组合参数
widths = [1024, 2048]
heights = [1024, 2048]
steps_list = [10, 20]
batch_sizes = [2]

# Warm up
print("Warming up...")
for i in range(1):
    pipe(prompt=prompt, width=1024, height=1024, num_inference_steps=10, num_images_per_prompt=1)
    pipe(prompt=prompt, width=2048, height=2048, num_inference_steps=10, num_images_per_prompt=1)
    pipe(prompt=prompt, width=1024, height=1024, num_inference_steps=10, num_images_per_prompt=2)
    pipe(prompt=prompt, width=2048, height=2048, num_inference_steps=10, num_images_per_prompt=2)

print("Warm up finished. Starting benchmark...")

# 初始化结果列表
all_results = []

# 生成8种配置组合
for width, height in zip(widths, heights):
    for num_inference_steps in steps_list:
        for batch_size in batch_sizes:
            print(f"\n生成配置: {width}x{height}, steps={num_inference_steps}, batch={batch_size}")
            time_list = []
            for i in range(5):
                torch.cuda.empty_cache()
                torch.cuda.synchronize()
                time_start = time.time()
                result = pipe(
                    prompt=prompt,
                    width=width,
                    height=height,
                    num_inference_steps=num_inference_steps,
                    num_images_per_prompt=batch_size
                )
                torch.cuda.synchronize()
                time_end = time.time()
                time_list.append((time_end - time_start) * 1000)

            avg_time = sum(time_list) / len(time_list)
            print(f"time cost: {time_list}, avg: {avg_time:.2f} ms")

            # 保存本批次生成的图片
            for i, image in enumerate(result.images):
                filename = f"output_{width}x{height}_steps{num_inference_steps}_batch{batch_size}_{i}.png"
                image.save(filename)
                print(f"保存图片: {filename}")

            # 记录结果
            all_results.append({
                "config": f"{width}x{height}_steps{num_inference_steps}_batch{batch_size}",
                "avg_time_ms": round(avg_time, 2)
            })

# 将所有结果保存到 JSON 文件
with open("results.json", "w") as f:
    json.dump(all_results, f, indent=4)

print("\n所有配置组合生成完成！结果已保存到 results.json")