# 定义流水线要使用的 Docker 镜像 image: image.sourcefind.cn:5000/dcu/admin/base/vllm:0.8.5-ubuntu22.04-dtk25.04.1-rc5-das1.6-py3.10-20250724 # 定义流水线的所有阶段 stages: - prepare - install - test # === 阶段 1: 准备阶段 - 下载所有大文件 === download_assets: stage: prepare tags: - demos script: - echo "--- 1. Preparing assets ---" # 安装 modelscope - pip install modelscope # 下载模型 - modelscope download --model AI-ModelScope/sdxl-vae-fp16-fix --local_dir ./sdxl-vae-fp16-fix - modelscope download --model stabilityai/stable-diffusion-xl-base-1.0 --local_dir ./stable-diffusion-xl-base-1.0 # 下载库文件 - curl -f -C - -o rocblas-install-0626-bug.tar.gz https://wuzh01.hpccube.com:65015/efile/s/d/amVycnJycnk=/a6a7342d017b1748 - curl -f -C - -o package-miopen-dev-0801-ubuntu20.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/9c2334cb9cdb8b39 - echo "--- Asset download finished ---" # 使用缓存来保存下载的文件,key为分支名,paths为要缓存的目录 cache: key: ${CI_COMMIT_REF_SLUG} paths: - sdxl-vae-fp16-fix/ - stable-diffusion-xl-base-1.0/ - rocblas-install-0626-bug.tar.gz - package-miopen-dev-0801-ubuntu20.tar.gz # === 阶段 2: 安装阶段 - 解压、安装、配置 === setup_environment: stage: install tags: - demos script: - echo "--- 2. Setting up environment ---" # 解压库文件 - tar -xzvf rocblas-install-0626-bug.tar.gz - tar -xzvf package-miopen-dev-0801-ubuntu20.tar.gz # 替换 VAE 权重 - cd stable-diffusion-xl-base-1.0 - mv vae vae_bak || true # 如果 vae_bak 已存在,忽略错误 - mkdir -p vae - cp ../sdxl-vae-fp16-fix/c* ./vae/ - cp ../sdxl-vae-fp16-fix/s* ./vae/ - cp ../sdxl-vae-fp16-fix/d* ./vae/ - cd .. # 下载并安装 Python 包 - curl -f -C - -o diffusers-0.33.1-py3-none-any.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/63ec0d10ce960f90 - curl -f -C - -o lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/da522f7e175bb092 - curl -f -C - -o litserve-0.2.15-py3-none-any.whl https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/36943fb1ca62ac3b - pip install diffusers-0.33.1-py3-none-any.whl lightop-0.5.0+das.dtk25041-cp310-cp310-linux_x86_64.whl litserve-0.2.15-py3-none-any.whl # 替换 transformers - curl -f -C - -o transformers.tar.gz https://ksefile.hpccube.com:65241/efile/s/d/amVycnJycnk=/f587d939a37d1727 - tar -xzvf transformers.tar.gz - echo "--- Environment setup finished ---" # 继承上一个作业的缓存,并缓存 pip 安装的包 cache: key: ${CI_COMMIT_REF_SLUG} paths: - sdxl-vae-fp16-fix/ - stable-diffusion-xl-base-1.0/ - rocblas-install/ - package-miopen-dev-0801-ubuntu20/ - transformers/ # 缓存 pip 的安装目录,加速后续安装 policy: pull-push # === 阶段 3: 测试阶段 - 运行基准测试 === benchmark_sdxl: stage: test tags: - demos before_script: - export LD_LIBRARY_PATH=$CI_PROJECT_DIR/rocblas-install/lib/:$LD_LIBRARY_PATH - export LD_LIBRARY_PATH=$CI_PROJECT_DIR/package-miopen-dev-0801-ubuntu20/lib/:$LD_LIBRARY_PATH - export PYTORCH_MIOPEN_SUGGEST_NHWC=1 - export PYTHONPATH="$CI_PROJECT_DIR/transformers:$PYTHONPATH" - export MODEL_PATH="$CI_PROJECT_DIR/stable-diffusion-xl-base-1.0" script: - echo "--- 3. Running SDXL benchmark ---" - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" - echo "PYTHONPATH: $PYTHONPATH" - python test.py - echo "--- Benchmark finished ---" artifacts: name: "$CI_JOB_NAME-$CI_COMMIT_REF_NAME" paths: - "*.png" - results.json expire_in: 1 week cache: key: "${CI_COMMIT_REF_SLUG}" policy: pull