# 定义流水线要使用的 Docker 镜像 image: image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250724 # 只定义一个阶段 stages: - test # === 唯一的作业:准备环境并运行基准测试 === benchmark_sdxl: stage: test tags: - demos script: - echo "=========================================" - echo "Step 1: Checking and downloading assets" - echo "All download logs will be saved to setup.log" - echo "=========================================" # 条件性下载模型,并将输出重定向到 setup.log - | if [ ! -d "stable-diffusion-xl-base-1.0" ]; then echo "Models not found in cache, downloading..." | tee -a setup.log pip install modelscope -q >> setup.log 2>&1 modelscope download --model "AI-ModelScope/sdxl-vae-fp16-fix" --local_dir "./sdxl-vae-fp16-fix" >> setup.log 2>&1 modelscope download --model "stabilityai/stable-diffusion-xl-base-1.0" --local_dir "./stable-diffusion-xl-base-1.0" >> setup.log 2>&1 else echo "Models found in cache, skipping download." | tee -a setup.log fi # 条件性下载库和包,并将输出重定向到 setup.log - | if [ ! -f "diffusers-0.33.1-py3-none-any.whl" ]; then echo "Libraries/packages not found in cache, downloading..." | tee -a setup.log curl -s -f -C - -o "rocblas-install-0626-bug.tar.gz" "https://wuzh01.hpccube.com:65015/efile/s/d/amVycnJycnk=/a6a7342d017b1748" >> setup.log 2>&1 curl -s -f -C - -o "package-miopen-dev-0801-ubuntu20.tar.gz" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/9c2334cb9cdb8b39" >> setup.log 2>&1 curl -s -f -C - -o "diffusers-0.33.1-py3-none-any.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/63ec0d10ce960f90" >> setup.log 2>&1 curl -s -f -C - -o "lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/da522f7e175bb092" >> setup.log 2>&1 curl -s -f -C - -o "litserve-0.2.15-py3-none-any.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/36943fb1ca62ac3b" >> setup.log 2>&1 curl -s -f -C - -o "transformers.tar.gz" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/f587d939a37d1727" >> setup.log 2>&1 else echo "Libraries/packages found in cache, skipping download." | tee -a setup.log fi - echo "=========================================" - echo "Step 2: Setting up the environment" - echo "=========================================" # 解压库文件 - tar -xzf "rocblas-install-0626-bug.tar.gz" - tar -xzf "package-miopen-dev-0801-ubuntu20.tar.gz" - tar -xzf "transformers.tar.gz" # 替换 VAE 权重 - cd "stable-diffusion-xl-base-1.0" - mv vae vae_bak || true - mkdir -p vae - cp ../sdxl-vae-fp16-fix/c* ./vae/ - cp ../sdxl-vae-fp16-fix/s* ./vae/ - cp ../sdxl-vae-fp16-fix/d* ./vae/ - cd .. - echo "=========================================" - echo "Step 3: Installing Python packages" - echo "=========================================" # 安装 Python 包 (有缓存时会非常快),输出也重定向 - pip install "diffusers-0.33.1-py3-none-any.whl" "lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl" "litserve-0.2.15-py3-none-any.whl" -q >> setup.log 2>&1 - echo "=========================================" - echo "Step 4: Running the benchmark" - echo "=========================================" # 设置环境变量并运行测试 - export LD_LIBRARY_PATH="$CI_PROJECT_DIR/rocblas-install/lib/:$LD_LIBRARY_PATH" - export LD_LIBRARY_PATH="$CI_PROJECT_DIR/package-miopen-dev-0801-ubuntu20/lib/:$LD_LIBRARY_PATH" - export PYTORCH_MIOPEN_SUGGEST_NHWC=1 - export PYTHONPATH="$CI_PROJECT_DIR/transformers:$PYTHONPATH" - export MODEL_PATH="$CI_PROJECT_DIR/stable-diffusion-xl-base-1.0" - python test.py > benchmark.log 2>&1 - echo "=========================================" - echo "Job Finished. Displaying log summaries:" - echo "=========================================" - echo "--- Last 20 lines of setup.log ---" - tail -n 20 setup.log || true # `|| true` 防止文件不存在时出错 - echo "--- Last 20 lines of benchmark.log ---" - tail -n 20 benchmark.log || true # 定义缓存,拉取并更新 cache: key: "sdxl-all-in-one-cache-${CI_COMMIT_REF_SLUG}" paths: - "sdxl-vae-fp16-fix/" - "stable-diffusion-xl-base-1.0/" - "rocblas-install/" - "package-miopen-dev-0801-ubuntu20/" - "transformers/" - "diffusers-0.33.1-py3-none-any.whl" - "lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl" - "litserve-0.2.15-py3-none-any.whl" - "transformers.tar.gz" - "/root/.cache/pip" # 定义产物,现在包含了两个日志文件 artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME" paths: - "*.png" - "results.json" - "benchmark.log" - "setup.log" # <-- 关键:将设置日志也作为产物保存 expire_in: 1 week