# 定义流水线要使用的 Docker 镜像 image: image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250724 # 定义流水线的所有阶段 stages: - prepare - install - test # === 阶段 1: 准备阶段 === download_assets: stage: prepare tags: - demos script: - echo "--- 1. Preparing assets ---" - pip install modelscope - nohup modelscope download --model "AI-ModelScope/sdxl-vae-fp16-fix" --local_dir "./sdxl-vae-fp16-fix" > downloadsdxl-vae-fp16-fix.log 2>&1 & - nohup modelscope download --model "stabilityai/stable-diffusion-xl-base-1.0" --local_dir "./stable-diffusion-xl-base-1.0" > downloadsdxl-stable-diffusion-xl-base-1.0.log 2>&1 & - curl -f -C - -o "rocblas-install-0626-bug.tar.gz" "https://wuzh01.hpccube.com:65015/efile/s/d/amVycnJycnk=/a6a7342d017b1748" - curl -f -C - -o "package-miopen-dev-0801-ubuntu20.tar.gz" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/9c2334cb9cdb8b39" - echo "--- Asset download finished ---" cache: key: "assets-cache-${CI_COMMIT_REF_SLUG}" paths: - "sdxl-vae-fp16-fix/" - "stable-diffusion-xl-base-1.0/" - "rocblas-install-0626-bug.tar.gz" - "package-miopen-dev-0801-ubuntu20.tar.gz" # === 阶段 2: 安装阶段 === setup_environment: stage: install tags: - demos script: - echo "--- 2. Setting up environment ---" - tar -xzvf "rocblas-install-0626-bug.tar.gz" - tar -xzvf "package-miopen-dev-0801-ubuntu20.tar.gz" - cd "stable-diffusion-xl-base-1.0" - mv vae vae_bak || true - mkdir -p vae - cp ../sdxl-vae-fp16-fix/c* ./vae/ - cp ../sdxl-vae-fp16-fix/s* ./vae/ - cp ../sdxl-vae-fp16-fix/d* ./vae/ - cd .. - curl -f -C - -o "diffusers-0.33.1-py3-none-any.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/63ec0d10ce960f90" - curl -f -C - -o "lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/da522f7e175bb092" - curl -f -C - -o "litserve-0.2.15-py3-none-any.whl" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/36943fb1ca62ac3b" - pip install "diffusers-0.33.1-py3-none-any.whl" "lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl" "litserve-0.2.15-py3-none-any.whl" - curl -f -C - -o "transformers.tar.gz" "https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/f587d939a37d1727" - tar -xzvf "transformers.tar.gz" - echo "--- Environment setup finished ---" cache: key: "assets-cache-${CI_COMMIT_REF_SLUG}" paths: - "sdxl-vae-fp16-fix/" - "stable-diffusion-xl-base-1.0/" - "rocblas-install/" - "package-miopen-dev-0801-ubuntu20/" - "transformers/" policy: pull-push # === 阶段 3: 测试阶段 === benchmark_sdxl: stage: test tags: - demos script: - echo "--- 3. Running SDXL benchmark ---" # 设置环境变量 - export LD_LIBRARY_PATH="$CI_PROJECT_DIR/rocblas-install/lib/:$LD_LIBRARY_PATH" - export LD_LIBRARY_PATH="$CI_PROJECT_DIR/package-miopen-dev-0801-ubuntu20/lib/:$LD_LIBRARY_PATH" - export PYTORCH_MIOPEN_SUGGEST_NHWC=1 - export PYTHONPATH="$CI_PROJECT_DIR/transformers:$PYTHONPATH" - export MODEL_PATH="$CI_PROJECT_DIR/stable-diffusion-xl-base-1.0" - echo "Starting benchmark script, full log will be saved to benchmark.log..." # 将 Python 脚本的所有输出(包括错误)都重定向到 benchmark.log 文件 - python test.py artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME" paths: - "*.png" - "results.json" - "benchmark.log" # <-- 关键:将日志文件也作为产物保存 expire_in: 1 week cache: key: "assets-cache-${CI_COMMIT_REF_SLUG}" policy: pull