Unverified Commit a6ac22e6 authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: aarch64 path to use cu129 (#3624)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
parent ab0da582
...@@ -34,6 +34,18 @@ inputs: ...@@ -34,6 +34,18 @@ inputs:
aws_secret_access_key: aws_secret_access_key:
description: 'AWS Secret Access Key' description: 'AWS Secret Access Key'
required: false required: false
base_image_tag:
description: 'Optional override for base image tag passed to build.sh'
required: false
runtime_image_tag:
description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
required: false
cuda_version:
description: 'Optional override for CUDA_VERSION build-arg'
required: false
torch_backend:
description: 'Optional override for TORCH_BACKEND build-arg (e.g., cu129)'
required: false
outputs: outputs:
image_tag: image_tag:
...@@ -81,6 +93,21 @@ runs: ...@@ -81,6 +93,21 @@ runs:
echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV
echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
# Collect optional overrides provided by the workflow
EXTRA_ARGS=""
if [ -n "${{ inputs.base_image_tag }}" ]; then
EXTRA_ARGS+=" --base-image-tag ${{ inputs.base_image_tag }}"
fi
if [ -n "${{ inputs.runtime_image_tag }}" ]; then
EXTRA_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }}"
fi
if [ -n "${{ inputs.cuda_version }}" ]; then
EXTRA_ARGS+=" --build-arg CUDA_VERSION=${{ inputs.cuda_version }}"
fi
if [ -n "${{ inputs.torch_backend }}" ]; then
EXTRA_ARGS+=" --build-arg TORCH_BACKEND=${{ inputs.torch_backend }}"
fi
./container/build.sh --tag "$IMAGE_TAG" \ ./container/build.sh --tag "$IMAGE_TAG" \
--target ${{ inputs.target }} \ --target ${{ inputs.target }} \
--vllm-max-jobs 10 \ --vllm-max-jobs 10 \
...@@ -88,7 +115,7 @@ runs: ...@@ -88,7 +115,7 @@ runs:
--platform ${{ inputs.platform }} \ --platform ${{ inputs.platform }} \
--use-sccache \ --use-sccache \
--sccache-bucket "$SCCACHE_S3_BUCKET" \ --sccache-bucket "$SCCACHE_S3_BUCKET" \
--sccache-region "$AWS_DEFAULT_REGION" --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS
BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
echo "🕐 Build ended at: ${BUILD_END_TIME}" echo "🕐 Build ended at: ${BUILD_END_TIME}"
......
...@@ -58,6 +58,10 @@ jobs: ...@@ -58,6 +58,10 @@ jobs:
framework: vllm framework: vllm
target: runtime target: runtime
platform: 'linux/${{ matrix.platform.arch }}' platform: 'linux/${{ matrix.platform.arch }}'
base_image_tag: ${{ matrix.platform.arch == 'arm64' && '25.06-cuda12.9-devel-ubuntu24.04' || '' }}
runtime_image_tag: ${{ matrix.platform.arch == 'arm64' && '12.9.0-runtime-ubuntu24.04' || '' }}
cuda_version: ${{ matrix.platform.arch == 'arm64' && '129' || '' }}
torch_backend: ${{ matrix.platform.arch == 'arm64' && 'cu129' || '' }}
ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }} ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
ci_token: ${{ secrets.CI_TOKEN }} ci_token: ${{ secrets.CI_TOKEN }}
aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }} aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}
...@@ -251,4 +255,4 @@ jobs: ...@@ -251,4 +255,4 @@ jobs:
CONTAINER_INDEX: ${{ secrets.CONTAINER_INDEX }} CONTAINER_INDEX: ${{ secrets.CONTAINER_INDEX }}
run: | run: |
# Upload complete workflow metrics including container metrics # Upload complete workflow metrics including container metrics
python3 .github/workflows/upload_complete_workflow_metrics.py python3 .github/workflows/upload_complete_workflow_metrics.py
\ No newline at end of file
...@@ -88,6 +88,10 @@ RUN apt-get update -y \ ...@@ -88,6 +88,10 @@ RUN apt-get update -y \
&& apt-get clean \ && apt-get clean \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# if libmlx5.so not shipped with 24.04 rdma-core packaging, CMAKE will fail when looking for
# generic dev name .so so we symlink .s0.1 -> .so
RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true
### VIRTUAL ENVIRONMENT SETUP ### ### VIRTUAL ENVIRONMENT SETUP ###
COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/
......
...@@ -136,9 +136,9 @@ git checkout $VLLM_REF ...@@ -136,9 +136,9 @@ git checkout $VLLM_REF
echo "\n=== Installing vLLM & FlashInfer ===" echo "\n=== Installing vLLM & FlashInfer ==="
if [[ $VLLM_REF =~ ^v ]] && [ "$ARCH" = "amd64" ]; then if [[ $VLLM_REF =~ ^v ]] && { [ "$ARCH" = "amd64" ] || { [ "$ARCH" = "arm64" ] && [ "$TORCH_BACKEND" = "cu129" ]; }; }; then
# VLLM_REF starts with 'v' and amd64 - use pip install with version tag # VLLM_REF starts with 'v' and either amd64, or arm64 with cu129 backend - use PyPI install
echo "Installing vLLM $VLLM_REF from PyPI..." echo "Installing vLLM $VLLM_REF from PyPI... (ARCH=$ARCH, TORCH_BACKEND=$TORCH_BACKEND)"
uv pip install vllm[flashinfer]==$VLLM_REF --torch-backend=$TORCH_BACKEND uv pip install vllm[flashinfer]==$VLLM_REF --torch-backend=$TORCH_BACKEND
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment