Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
fd2f1054
Unverified
Commit
fd2f1054
authored
Sep 15, 2025
by
Simon Mo
Committed by
GitHub
Sep 15, 2025
Browse files
[ci] fix wheel names for arm wheels (#24898)
Signed-off-by:
simon-mo
<
simon.mo@hey.com
>
parent
e757a629
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
36 additions
and
23 deletions
+36
-23
.buildkite/release-pipeline.yaml
.buildkite/release-pipeline.yaml
+4
-12
.buildkite/scripts/annotate-release.sh
.buildkite/scripts/annotate-release.sh
+22
-7
docker/Dockerfile
docker/Dockerfile
+2
-0
setup.py
setup.py
+2
-4
vllm/envs.py
vllm/envs.py
+6
-0
No files found.
.buildkite/release-pipeline.yaml
View file @
fd2f1054
steps
:
# aarch64 + CUDA builds. PyTorch 2.8 aarch64 + CUDA wheel is only available on CUDA 12.9
-
label
:
"
Build
arm64
wheel
-
CUDA
12.9"
depends_on
:
~
id
:
build-wheel-arm64-cuda-12-9
agents
:
queue
:
arm64_cpu_queue_postmerge
commands
:
# #NOTE: torch_cuda_arch_list is derived from upstream PyTorch build files here:
# https://github.com/pytorch/pytorch/blob/main/.ci/aarch64_linux/aarch64_ci_build.sh#L7
-
"
DOCKER_BUILDKIT=1
docker
build
--build-arg
max_jobs=16
--build-arg
USE_SCCACHE=1
--build-arg
GIT_REPO_CHECK=1
--build-arg
CUDA_VERSION=12.9.1
--build-arg
torch_cuda_arch_list='8.7
9.0
10.0+PTX
12.0'
--tag
vllm-ci:build-image
--target
build
--progress
plain
-f
docker/Dockerfile
."
-
"
DOCKER_BUILDKIT=1
docker
build
--build-arg
max_jobs=16
--build-arg
USE_SCCACHE=1
--build-arg
GIT_REPO_CHECK=1
--build-arg
CUDA_VERSION=12.9.1
--build-arg
VLLM_MAIN_CUDA_VERSION=12.9
--build-arg
torch_cuda_arch_list='8.7
9.0
10.0+PTX
12.0'
--tag
vllm-ci:build-image
--target
build
--progress
plain
-f
docker/Dockerfile
."
-
"
mkdir
artifacts"
-
"
docker
run
--rm
-v
$(pwd)/artifacts:/artifacts_host
vllm-ci:build-image
bash
-c
'cp
-r
dist
/artifacts_host
&&
chmod
-R
a+rw
/artifacts_host'"
-
"
bash
.buildkite/scripts/upload-wheels.sh"
env
:
DOCKER_BUILDKIT
:
"
1"
-
block
:
"
Build
CUDA
12.8
wheel"
key
:
block-build-cu128-wheel
-
label
:
"
Build
wheel
-
CUDA
12.8"
depends_on
:
block-build-cu128-wheel
depends_on
:
~
id
:
build-wheel-cuda-12-8
agents
:
queue
:
cpu_queue_postmerge
...
...
@@ -30,12 +28,8 @@ steps:
env
:
DOCKER_BUILDKIT
:
"
1"
-
block
:
"
Build
CUDA
12.6
wheel"
key
:
block-build-cu126-wheel
depends_on
:
~
-
label
:
"
Build
wheel
-
CUDA
12.6"
depends_on
:
block-build-cu126-wheel
depends_on
:
~
id
:
build-wheel-cuda-12-6
agents
:
queue
:
cpu_queue_postmerge
...
...
@@ -102,8 +96,6 @@ steps:
depends_on
:
-
create-multi-arch-manifest
-
build-wheel-cuda-12-8
-
build-wheel-cuda-12-6
-
build-wheel-cuda-12-9
id
:
annotate-release-workflow
agents
:
queue
:
cpu_queue_postmerge
...
...
.buildkite/scripts/annotate-release.sh
View file @
fd2f1054
...
...
@@ -14,18 +14,33 @@ buildkite-agent annotate --style 'info' --context 'release-workflow' << EOF
To download the wheel:
\`\`\`
aws s3 cp s3://vllm-wheels/
${
RELEASE_VERSION
}
/vllm-
${
RELEASE_VERSION
}
-cp38-abi3-manylinux1_x86_64.whl .
aws s3 cp s3://vllm-wheels/
${
RELEASE_VERSION
}
/vllm-
${
RELEASE_VERSION
}
-cp38-abi3-manylinux2014_aarch64.whl .
aws s3 cp s3://vllm-wheels/
${
RELEASE_VERSION
}
+cu126/vllm-
${
RELEASE_VERSION
}
+cu126-cp38-abi3-manylinux1_x86_64.whl .
aws s3 cp s3://vllm-wheels/
${
RELEASE_VERSION
}
+cu1
18
/vllm-
${
RELEASE_VERSION
}
+cu1
18
-cp38-abi3-manylinux1_x86_64.whl .
aws s3 cp s3://vllm-wheels/
${
RELEASE_VERSION
}
+cu1
29
/vllm-
${
RELEASE_VERSION
}
+cu1
29
-cp38-abi3-manylinux1_x86_64.whl .
\`\`\`
To download and upload the image:
\`\`\`
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
vllm/vllm-openai
docker tag vllm/vllm-openai vllm/vllm-openai:latest
docker tag vllm/vllm-openai vllm/vllm-openai:v
${
RELEASE_VERSION
}
docker push vllm/vllm-openai:latest
docker push vllm/vllm-openai:v
${
RELEASE_VERSION
}
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
-x86_64
docker pull public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
-aarch64
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
-x86_64 vllm/vllm-openai:x86_64
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:latest-x86_64
docker tag vllm/vllm-openai:x86_64 vllm/vllm-openai:v
${
RELEASE_VERSION
}
-x86_64
docker push vllm/vllm-openai:latest-x86_64
docker push vllm/vllm-openai:v
${
RELEASE_VERSION
}
-x86_64
docker tag public.ecr.aws/q9t5s3a7/vllm-release-repo:
${
BUILDKITE_COMMIT
}
-aarch64 vllm/vllm-openai:aarch64
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:latest-aarch64
docker tag vllm/vllm-openai:aarch64 vllm/vllm-openai:v
${
RELEASE_VERSION
}
-aarch64
docker push vllm/vllm-openai:latest-aarch64
docker push vllm/vllm-openai:v
${
RELEASE_VERSION
}
-aarch64
docker manifest create vllm/vllm-openai:latest vllm/vllm-openai:latest-x86_64 vllm/vllm-openai:latest-aarch64 --amend
docker manifest create vllm/vllm-openai:v
${
RELEASE_VERSION
}
vllm/vllm-openai:v
${
RELEASE_VERSION
}
-x86_64 vllm/vllm-openai:v
${
RELEASE_VERSION
}
-aarch64 --amend
docker manifest push vllm/vllm-openai:latest
docker manifest push vllm/vllm-openai:v
${
RELEASE_VERSION
}
\`\`\`
EOF
\ No newline at end of file
docker/Dockerfile
View file @
fd2f1054
...
...
@@ -196,6 +196,7 @@ ARG SCCACHE_S3_NO_CREDENTIALS=0
# Flag to control whether to use pre-built vLLM wheels
ARG
VLLM_USE_PRECOMPILED=""
ARG
VLLM_MAIN_CUDA_VERSION=""
# if USE_SCCACHE is set, use sccache to speed up compilation
RUN
--mount
=
type
=
cache,target
=
/root/.cache/uv
\
...
...
@@ -213,6 +214,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
&&
export
SCCACHE_IDLE_TIMEOUT
=
0
\
&&
export
CMAKE_BUILD_TYPE
=
Release
\
&&
export
VLLM_USE_PRECOMPILED
=
"
${
VLLM_USE_PRECOMPILED
}
"
\
&&
export
VLLM_MAIN_CUDA_VERSION
=
"
${
VLLM_MAIN_CUDA_VERSION
}
"
\
&&
export
VLLM_DOCKER_BUILD_CONTEXT
=
1
\
&&
sccache
--show-stats
\
&&
python3 setup.py bdist_wheel
--dist-dir
=
dist
--py-limited-api
=
cp38
\
...
...
setup.py
View file @
fd2f1054
...
...
@@ -56,8 +56,6 @@ elif (sys.platform.startswith("linux") and torch.version.cuda is None
# fallback to cpu
VLLM_TARGET_DEVICE
=
"cpu"
MAIN_CUDA_VERSION
=
"12.8"
def
is_sccache_available
()
->
bool
:
return
which
(
"sccache"
)
is
not
None
and
\
...
...
@@ -507,7 +505,7 @@ def get_vllm_version() -> str:
version
+=
f
"
{
sep
}
precompiled"
else
:
cuda_version
=
str
(
get_nvcc_cuda_version
())
if
cuda_version
!=
MAIN_CUDA_VERSION
:
if
cuda_version
!=
envs
.
VLLM_
MAIN_CUDA_VERSION
:
cuda_version_str
=
cuda_version
.
replace
(
"."
,
""
)[:
3
]
# skip this for source tarball, required for pypi
if
"sdist"
not
in
sys
.
argv
:
...
...
@@ -515,7 +513,7 @@ def get_vllm_version() -> str:
elif
_is_hip
():
# Get the Rocm Version
rocm_version
=
get_rocm_version
()
or
torch
.
version
.
hip
if
rocm_version
and
rocm_version
!=
MAIN_CUDA_VERSION
:
if
rocm_version
and
rocm_version
!=
envs
.
VLLM_
MAIN_CUDA_VERSION
:
version
+=
f
"
{
sep
}
rocm
{
rocm_version
.
replace
(
'.'
,
''
)[:
3
]
}
"
elif
_is_tpu
():
version
+=
f
"
{
sep
}
tpu"
...
...
vllm/envs.py
View file @
fd2f1054
...
...
@@ -70,6 +70,7 @@ if TYPE_CHECKING:
VLLM_VIDEO_LOADER_BACKEND
:
str
=
"opencv"
VLLM_MM_INPUT_CACHE_GIB
:
int
=
4
VLLM_TARGET_DEVICE
:
str
=
"cuda"
VLLM_MAIN_CUDA_VERSION
:
str
=
"12.8"
MAX_JOBS
:
Optional
[
str
]
=
None
NVCC_THREADS
:
Optional
[
str
]
=
None
VLLM_USE_PRECOMPILED
:
bool
=
False
...
...
@@ -249,6 +250,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_TARGET_DEVICE"
:
lambda
:
os
.
getenv
(
"VLLM_TARGET_DEVICE"
,
"cuda"
).
lower
(),
# Main CUDA version of vLLM, supporting [12.6, 12.8, 12.9],
# 12.8 is the default. This follows PyTorch but can be overridden.
"VLLM_MAIN_CUDA_VERSION"
:
lambda
:
os
.
getenv
(
"VLLM_MAIN_CUDA_VERSION"
,
""
).
lower
()
or
"12.8"
,
# Maximum number of compilation jobs to run in parallel.
# By default this is the number of CPUs
"MAX_JOBS"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment