fix: aarch64 path to use cu129 (#3624)

Signed-off-by: alec-flowers <aflowers@nvidia.com>

fix: aarch64 path to use cu129 (#3624)
Signed-off-by: alec-flowers <aflowers@nvidia.com>
a6ac22e6 · Alec · GitHub · ab0da582 · a6ac22e6 · a6ac22e6
Unverified Commit a6ac22e6 authored Oct 15, 2025 by Alec Committed by GitHub Oct 15, 2025
4 changed files
--- a/.github/actions/docker-build/action.yml
+++ b/.github/actions/docker-build/action.yml
@@ -34,6 +34,18 @@ inputs:
  aws_secret_access_key:
    description: 'AWS Secret Access Key'
    required: false
+  base_image_tag:
+    description: 'Optional override for base image tag passed to build.sh'
+    required: false
+  runtime_image_tag:
+    description: 'Optional override for RUNTIME_IMAGE_TAG build-arg'
+    required: false
+  cuda_version:
+    description: 'Optional override for CUDA_VERSION build-arg'
+    required: false
+  torch_backend:
+    description: 'Optional override for TORCH_BACKEND build-arg (e.g., cu129)'
+    required: false

 outputs:
  image_tag:
@@ -81,6 +93,21 @@ runs:
        echo "BUILD_START_TIME=${BUILD_START_TIME}" >> $GITHUB_ENV

        echo "image_tag=$IMAGE_TAG" >> $GITHUB_OUTPUT
+        # Collect optional overrides provided by the workflow
+        EXTRA_ARGS=""
+        if [ -n "${{ inputs.base_image_tag }}" ]; then
+          EXTRA_ARGS+=" --base-image-tag ${{ inputs.base_image_tag }}"
+        fi
+        if [ -n "${{ inputs.runtime_image_tag }}" ]; then
+          EXTRA_ARGS+=" --build-arg RUNTIME_IMAGE_TAG=${{ inputs.runtime_image_tag }}"
+        fi
+        if [ -n "${{ inputs.cuda_version }}" ]; then
+          EXTRA_ARGS+=" --build-arg CUDA_VERSION=${{ inputs.cuda_version }}"
+        fi
+        if [ -n "${{ inputs.torch_backend }}" ]; then
+          EXTRA_ARGS+=" --build-arg TORCH_BACKEND=${{ inputs.torch_backend }}"
+        fi
+
        ./container/build.sh --tag "$IMAGE_TAG" \
          --target ${{ inputs.target }} \
          --vllm-max-jobs 10 \
@@ -88,7 +115,7 @@ runs:
          --platform ${{ inputs.platform }} \
          --use-sccache \
          --sccache-bucket "$SCCACHE_S3_BUCKET" \
-          --sccache-region "$AWS_DEFAULT_REGION"
+          --sccache-region "$AWS_DEFAULT_REGION" $EXTRA_ARGS

        BUILD_END_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ)
        echo "🕐 Build ended at: ${BUILD_END_TIME}"

--- a/.github/workflows/container-validation-backends.yml
+++ b/.github/workflows/container-validation-backends.yml
@@ -58,6 +58,10 @@ jobs:
          framework: vllm
          target: runtime
          platform: 'linux/${{ matrix.platform.arch }}'
+          base_image_tag: ${{ matrix.platform.arch == 'arm64' && '25.06-cuda12.9-devel-ubuntu24.04' || '' }}
+          runtime_image_tag: ${{ matrix.platform.arch == 'arm64' && '12.9.0-runtime-ubuntu24.04' || '' }}
+          cuda_version: ${{ matrix.platform.arch == 'arm64' && '129' || '' }}
+          torch_backend: ${{ matrix.platform.arch == 'arm64' && 'cu129' || '' }}
          ngc_ci_access_token: ${{ secrets.NGC_CI_ACCESS_TOKEN }}
          ci_token: ${{ secrets.CI_TOKEN }}
          aws_default_region: ${{ secrets.AWS_DEFAULT_REGION }}

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -88,6 +88,10 @@ RUN apt-get update -y \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*

+# if libmlx5.so not shipped with 24.04 rdma-core packaging, CMAKE will fail when looking for
+# generic dev name .so so we symlink .s0.1 -> .so
+RUN ln -sf /usr/lib/aarch64-linux-gnu/libmlx5.so.1 /usr/lib/aarch64-linux-gnu/libmlx5.so || true
+
 ### VIRTUAL ENVIRONMENT SETUP ###

 COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/

--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -136,9 +136,9 @@ git checkout $VLLM_REF

 echo "\n=== Installing vLLM & FlashInfer ==="

-if [[ $VLLM_REF =~ ^v ]] && [ "$ARCH" = "amd64" ]; then
-    # VLLM_REF starts with 'v' and amd64 - use pip install with version tag
-    echo "Installing vLLM $VLLM_REF from PyPI..."
+if [[ $VLLM_REF =~ ^v ]] && { [ "$ARCH" = "amd64" ] || { [ "$ARCH" = "arm64" ] && [ "$TORCH_BACKEND" = "cu129" ]; }; }; then
+    # VLLM_REF starts with 'v' and either amd64, or arm64 with cu129 backend - use PyPI install
+    echo "Installing vLLM $VLLM_REF from PyPI... (ARCH=$ARCH, TORCH_BACKEND=$TORCH_BACKEND)"

    uv pip install vllm[flashinfer]==$VLLM_REF --torch-backend=$TORCH_BACKEND