Commit 0c0b0c34 authored by jerrrrry's avatar jerrrrry
Browse files

feat: Add complete SDXL benchmark CI/CD pipeline with caching

parents
# 定义流水线要使用的 Docker 镜像
image: image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250724
# 定义流水线的所有阶段
stages:
- prepare
- install
- test
# === 阶段 1: 准备阶段 - 下载所有大文件 ===
download_assets:
stage: prepare
tags:
- demos
script:
- echo "--- 1. Preparing assets ---"
# 安装 modelscope
- pip install modelscope
# 下载模型
- modelscope download --model AI-ModelScope/sdxl-vae-fp16-fix --local_dir ./sdxl-vae-fp16-fix
- modelscope download --model stabilityai/stable-diffusion-xl-base-1.0 --local_dir ./stable-diffusion-xl-base-1.0
# 下载库文件
- curl -f -C - -o rocblas-install-0626-bug.tar.gz https://wuzh01.hpccube.com:65015/efile/s/d/amVycnJycnk=/a6a7342d017b1748
- curl -f -C - -o package-miopen-dev-0801-ubuntu20.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/9c2334cb9cdb8b39
- echo "--- Asset download finished ---"
# 使用缓存来保存下载的文件,key为分支名,paths为要缓存的目录
cache:
key: ${CI_COMMIT_REF_SLUG}
paths:
- sdxl-vae-fp16-fix/
- stable-diffusion-xl-base-1.0/
- rocblas-install-0626-bug.tar.gz
- package-miopen-dev-0801-ubuntu20.tar.gz
# === 阶段 2: 安装阶段 - 解压、安装、配置 ===
setup_environment:
stage: install
tags:
- demos
script:
- echo "--- 2. Setting up environment ---"
# 解压库文件
- tar -xzvf rocblas-install-0626-bug.tar.gz
- tar -xzvf package-miopen-dev-0801-ubuntu20.tar.gz
# 替换 VAE 权重
- cd stable-diffusion-xl-base-1.0
- mv vae vae_bak || true # 如果 vae_bak 已存在,忽略错误
- mkdir -p vae
- cp ../sdxl-vae-fp16-fix/c* ./vae/
- cp ../sdxl-vae-fp16-fix/s* ./vae/
- cp ../sdxl-vae-fp16-fix/d* ./vae/
- cd ..
# 下载并安装 Python 包
- curl -f -C - -o diffusers-0.33.1-py3-none-any.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/63ec0d10ce960f90
- curl -f -C - -o lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/da522f7e175bb092
- curl -f -C - -o litserve-0.2.15-py3-none-any.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/36943fb1ca62ac3b
- pip install diffusers-0.33.1-py3-none-any.whl lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl litserve-0.2.15-py3-none-any.whl
# 替换 transformers
- curl -f -C - -o transformers.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/f587d939a37d1727
- tar -xzvf transformers.tar.gz
- echo "--- Environment setup finished ---"
# 继承上一个作业的缓存,并缓存 pip 安装的包
cache:
key: ${CI_COMMIT_REF_SLUG}
paths:
- sdxl-vae-fp16-fix/
- stable-diffusion-xl-base-1.0/
- rocblas-install/
- package-miopen-dev-0801-ubuntu20/
- transformers/
# 缓存 pip 的安装目录,加速后续安装
policy: pull-push
# === 阶段 3: 测试阶段 - 运行基准测试 ===
benchmark_sdxl:
stage: test
tags:
- demos
# 在脚本执行前设置所有必要的环境变量
before_script:
- export LD_LIBRARY_PATH=$CI_PROJECT_DIR/rocblas-install/lib/:$LD_LIBRARY_PATH
- export LD_LIBRARY_PATH=$CI_PROJECT_DIR/package-miopen-dev-0801-ubuntu20/lib/:$LD_LIBRARY_PATH
- export PYTORCH_MIOPEN_SUGGEST_NHWC=1
- export PYTHONPATH="$CI_PROJECT_DIR/transformers:$PYTHONPATH"
# 设置模型路径环境变量,供 test.py 使用
- export MODEL_PATH="$CI_PROJECT_DIR/stable-diffusion-xl-base-1.0"
script:
- echo "--- 3. Running SDXL benchmark ---"
- echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
- echo "PYTHONPATH: $PYTHONPATH"
- python test.py
- echo "--- Benchmark finished ---"
# 定义产物,保存测试生成的图片和性能报告
artifacts:
name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME"
paths:
- *.png
- results.json
# 产物保存 1 周
expire_in: 1 week
# 只从上一个作业继承缓存,不更新
cache:
key: ${CI_COMMIT_REF_SLUG}
policy: pull
SDXL Benchmark CI/CD
This project uses GitLab CI/CD to automate the benchmarking of the Stable Diffusion XL model on a DCU environment.
Pipeline Stages
prepare: Downloads the SDXL models and necessary libraries (rocblas, miopen). These files are cached to speed up subsequent pipelines.
install: Extracts libraries, replaces VAE weights, and installs specific Python packages. The installed packages are also cached.
test: Sets up environment variables and runs the test.py benchmark script. The output images and a results.json performance report are saved as artifacts.
How it Works
The pipeline runs on a GitLab Runner with the demos tag.
It uses a specific Docker image that contains the base PyTorch and DCU environment.
All heavy lifting (downloads, installations) is cached based on the branch name (${CI_COMMIT_REF_SLUG}).
The final test results, including generated images and performance metrics, are available for download from the CI/CD job artifacts page.
import os
import json
import torch
import time
from diffusers import DiffusionPipeline
# 使用环境变量来定义模型路径,增加灵活性
MODEL_PATH = os.environ.get("MODEL_PATH", "/workspace/stable-diffusion-xl-base-1.0")
torch.autograd.set_detect_anomaly(True)
# 初始化模型
print(f"Loading model from: {MODEL_PATH}")
pipe = DiffusionPipeline.from_pretrained(
MODEL_PATH,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16"
)
pipe.to("cuda")
pipe.text_encoder = torch.compile(pipe.text_encoder, backend="inductor", dynamic=False)
pipe.text_encoder_2 = torch.compile(pipe.text_encoder_2, backend="inductor", dynamic=False)
if hasattr(pipe, "unet"):
pipe.unet = torch.compile(pipe.unet, backend="inductor", dynamic=False)
# 基础提示词
prompt = "An astronaut riding a green horse"
# 配置组合参数
widths = [1024, 2048]
heights = [1024, 2048]
steps_list = [10, 20]
batch_sizes = [2]
# Warm up
print("Warming up...")
for i in range(1):
pipe(prompt=prompt, width=1024, height=1024, num_inference_steps=10, num_images_per_prompt=1)
pipe(prompt=prompt, width=2048, height=2048, num_inference_steps=10, num_images_per_prompt=1)
pipe(prompt=prompt, width=1024, height=1024, num_inference_steps=10, num_images_per_prompt=2)
pipe(prompt=prompt, width=2048, height=2048, num_inference_steps=10, num_images_per_prompt=2)
print("Warm up finished. Starting benchmark...")
# 初始化结果列表
all_results = []
# 生成8种配置组合
for width, height in zip(widths, heights):
for num_inference_steps in steps_list:
for batch_size in batch_sizes:
print(f"\n生成配置: {width}x{height}, steps={num_inference_steps}, batch={batch_size}")
time_list = []
for i in range(5):
torch.cuda.empty_cache()
torch.cuda.synchronize()
time_start = time.time()
result = pipe(
prompt=prompt,
width=width,
height=height,
num_inference_steps=num_inference_steps,
num_images_per_prompt=batch_size
)
torch.cuda.synchronize()
time_end = time.time()
time_list.append((time_end - time_start) * 1000)
avg_time = sum(time_list) / len(time_list)
print(f"time cost: {time_list}, avg: {avg_time:.2f} ms")
# 保存本批次生成的图片
for i, image in enumerate(result.images):
filename = f"output_{width}x{height}_steps{num_inference_steps}_batch{batch_size}_{i}.png"
image.save(filename)
print(f"保存图片: {filename}")
# 记录结果
all_results.append({
"config": f"{width}x{height}_steps{num_inference_steps}_batch{batch_size}",
"avg_time_ms": round(avg_time, 2)
})
# 将所有结果保存到 JSON 文件
with open("results.json", "w") as f:
json.dump(all_results, f, indent=4)
print("\n所有配置组合生成完成!结果已保存到 results.json")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment