Unverified Commit 1752262e authored by Shengqi Chen's avatar Shengqi Chen Committed by GitHub
Browse files

[CI] refactor release pipeline config into groups (#32833)


Signed-off-by: default avatarShengqi Chen <harry-chen@outlook.com>
parent ea6102b8
steps: steps:
# aarch64 + CUDA builds - input: "Provide Release version here"
id: input-release-version
fields:
- text: "What is the release version?"
key: release-version
- group: "Build Python wheels"
key: "build-wheels"
steps:
- label: "Build wheel - aarch64 - CUDA 12.9" - label: "Build wheel - aarch64 - CUDA 12.9"
depends_on: ~ depends_on: ~
id: build-wheel-arm64-cuda-12-9 id: build-wheel-arm64-cuda-12-9
...@@ -30,7 +38,6 @@ steps: ...@@ -30,7 +38,6 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
# aarch64 build
- label: "Build wheel - aarch64 - CPU" - label: "Build wheel - aarch64 - CPU"
depends_on: ~ depends_on: ~
id: build-wheel-arm64-cpu id: build-wheel-arm64-cpu
...@@ -44,7 +51,6 @@ steps: ...@@ -44,7 +51,6 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
# x86 + CUDA builds
- label: "Build wheel - x86_64 - CUDA 12.9" - label: "Build wheel - x86_64 - CUDA 12.9"
depends_on: ~ depends_on: ~
id: build-wheel-x86-cuda-12-9 id: build-wheel-x86-cuda-12-9
...@@ -71,7 +77,6 @@ steps: ...@@ -71,7 +77,6 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
# x86 CPU wheel build
- label: "Build wheel - x86_64 - CPU" - label: "Build wheel - x86_64 - CPU"
depends_on: ~ depends_on: ~
id: build-wheel-x86-cpu id: build-wheel-x86-cpu
...@@ -85,7 +90,9 @@ steps: ...@@ -85,7 +90,9 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
# Build release images (CUDA 12.9) - group: "Build release Docker images"
key: "build-release-images"
steps:
- label: "Build release image - x86_64 - CUDA 12.9" - label: "Build release image - x86_64 - CUDA 12.9"
depends_on: ~ depends_on: ~
id: build-release-image-x86 id: build-release-image-x86
...@@ -109,33 +116,8 @@ steps: ...@@ -109,33 +116,8 @@ steps:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)"
- label: "Create multi-arch manifest - CUDA 12.9"
depends_on:
- build-release-image-x86
- build-release-image-arm64
id: create-multi-arch-manifest
agents:
queue: small_cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
- label: "Annotate release workflow - CUDA 12.9"
depends_on:
- create-multi-arch-manifest
id: annotate-release-workflow
agents:
queue: small_cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/annotate-release.sh"
- block: "Build CUDA 13.0 release images"
key: block-release-image-build-cuda-13-0
depends_on: ~
- label: "Build release image - x86_64 - CUDA 13.0" - label: "Build release image - x86_64 - CUDA 13.0"
depends_on: block-release-image-build-cuda-13-0 depends_on: ~
id: build-release-image-x86-cuda-13-0 id: build-release-image-x86-cuda-13-0
agents: agents:
queue: cpu_queue_postmerge queue: cpu_queue_postmerge
...@@ -148,7 +130,7 @@ steps: ...@@ -148,7 +130,7 @@ steps:
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
- label: "Build release image - aarch64 - CUDA 13.0" - label: "Build release image - aarch64 - CUDA 13.0"
depends_on: block-release-image-build-cuda-13-0 depends_on: ~
id: build-release-image-arm64-cuda-13-0 id: build-release-image-arm64-cuda-13-0
agents: agents:
queue: arm64_cpu_queue_postmerge queue: arm64_cpu_queue_postmerge
...@@ -158,50 +140,14 @@ steps: ...@@ -158,50 +140,14 @@ steps:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ." - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=13.0.1 --build-arg torch_cuda_arch_list='8.7 8.9 9.0 10.0+PTX 12.0 12.1' --build-arg INSTALL_KV_CONNECTORS=true --build-arg BUILD_BASE_IMAGE=nvidia/cuda:13.0.1-devel-ubuntu22.04 --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130 --target vllm-openai --progress plain -f docker/Dockerfile ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)-cu130"
- label: "Create multi-arch manifest - CUDA 13.0" - block: "Build release image for x86_64 CPU"
depends_on:
- build-release-image-x86-cuda-13-0
- build-release-image-arm64-cuda-13-0
id: create-multi-arch-manifest-cuda-13-0
agents:
queue: small_cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
- input: "Provide Release version here"
id: input-release-version
fields:
- text: "What is the release version?"
key: release-version
- block: "Confirm update release wheels to PyPI (experimental, use with caution)?"
key: block-upload-release-wheels
depends_on:
- input-release-version
- build-wheel-x86-cuda-12-9
- build-wheel-x86-cuda-13-0
- build-wheel-x86-cpu
- build-wheel-arm64-cuda-12-9
- build-wheel-arm64-cuda-13-0
- build-wheel-arm64-cpu
- label: "Upload release wheels to PyPI and GitHub"
depends_on:
- block-upload-release-wheels
id: upload-release-wheels
agents:
queue: small_cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/upload-release-wheels.sh"
- block: "Build CPU release image"
key: block-cpu-release-image-build key: block-cpu-release-image-build
depends_on: ~ depends_on: ~
- label: "Build and publish CPU release image" - label: "Build release image - x86_64 - CPU"
depends_on: block-cpu-release-image-build depends_on:
- block-cpu-release-image-build
- input-release-version
agents: agents:
queue: cpu_queue_postmerge queue: cpu_queue_postmerge
commands: commands:
...@@ -212,12 +158,14 @@ steps: ...@@ -212,12 +158,14 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
- block: "Build arm64 CPU release image" - block: "Build release image for arm64 CPU"
key: block-arm64-cpu-release-image-build key: block-arm64-cpu-release-image-build
depends_on: ~ depends_on: ~
- label: "Build and publish arm64 CPU release image" - label: "Build release image - arm64 - CPU"
depends_on: block-arm64-cpu-release-image-build depends_on:
- block-arm64-cpu-release-image-build
- input-release-version
agents: agents:
queue: arm64_cpu_queue_postmerge queue: arm64_cpu_queue_postmerge
commands: commands:
...@@ -228,11 +176,11 @@ steps: ...@@ -228,11 +176,11 @@ steps:
env: env:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
- block: "Build ROCm release image" - block: "Build release image for x86_64 ROCm"
key: block-rocm-release-image-build key: block-rocm-release-image-build
depends_on: ~ depends_on: ~
- label: "Build release image (ROCm)" - label: "Build release image - x86_64 - ROCm"
depends_on: block-rocm-release-image-build depends_on: block-rocm-release-image-build
id: build-release-image-rocm id: build-release-image-rocm
agents: agents:
...@@ -245,7 +193,43 @@ steps: ...@@ -245,7 +193,43 @@ steps:
- "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg BASE_IMAGE=rocm/vllm-dev:base-$BUILDKITE_COMMIT --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm --target vllm-openai --progress plain -f docker/Dockerfile.rocm ." - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg BASE_IMAGE=rocm/vllm-dev:base-$BUILDKITE_COMMIT --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm --target vllm-openai --progress plain -f docker/Dockerfile.rocm ."
- "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm" - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-rocm"
- label: "Build and publish nightly multi-arch image to DockerHub" - group: "Publish release images"
key: "publish-release-images"
steps:
- label: "Create multi-arch manifest - CUDA 12.9"
depends_on:
- build-release-image-x86
- build-release-image-arm64
id: create-multi-arch-manifest
agents:
queue: small_cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT"
- label: "Annotate release workflow - CUDA 12.9"
depends_on:
- create-multi-arch-manifest
id: annotate-release-workflow
agents:
queue: small_cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/annotate-release.sh"
- label: "Create multi-arch manifest - CUDA 13.0"
depends_on:
- build-release-image-x86-cuda-13-0
- build-release-image-arm64-cuda-13-0
id: create-multi-arch-manifest-cuda-13-0
agents:
queue: small_cpu_queue_postmerge
commands:
- "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7"
- "docker manifest create public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-x86_64-cu130 public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-aarch64-cu130 --amend"
- "docker manifest push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-cu130"
- label: "Publish nightly multi-arch image to DockerHub"
depends_on: depends_on:
- create-multi-arch-manifest - create-multi-arch-manifest
if: build.env("NIGHTLY") == "1" if: build.env("NIGHTLY") == "1"
...@@ -263,7 +247,7 @@ steps: ...@@ -263,7 +247,7 @@ steps:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot" DOCKERHUB_USERNAME: "vllmbot"
- label: "Build and publish nightly multi-arch image to DockerHub - CUDA 13.0" - label: "Publish nightly multi-arch image to DockerHub - CUDA 13.0"
depends_on: depends_on:
- create-multi-arch-manifest-cuda-13-0 - create-multi-arch-manifest-cuda-13-0
if: build.env("NIGHTLY") == "1" if: build.env("NIGHTLY") == "1"
...@@ -281,6 +265,23 @@ steps: ...@@ -281,6 +265,23 @@ steps:
DOCKER_BUILDKIT: "1" DOCKER_BUILDKIT: "1"
DOCKERHUB_USERNAME: "vllmbot" DOCKERHUB_USERNAME: "vllmbot"
- group: "Publish wheels"
key: "publish-wheels"
steps:
- block: "Confirm update release wheels to PyPI (experimental, use with caution)?"
key: block-upload-release-wheels
depends_on:
- input-release-version
- build-wheels
- label: "Upload release wheels to PyPI and GitHub"
depends_on:
- block-upload-release-wheels
id: upload-release-wheels
agents:
queue: small_cpu_queue_postmerge
commands:
- "bash .buildkite/scripts/upload-release-wheels.sh"
# ============================================================================= # =============================================================================
# ROCm Release Pipeline (x86_64 only) # ROCm Release Pipeline (x86_64 only)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment