fix: install torch distribution matching container cuda version (#2027)

20c5daf3 · ptarasiewiczNV · GitHub · 4449f3da · 20c5daf3 · 20c5daf3
Unverified Commit 20c5daf3 authored Jul 22, 2025 by ptarasiewiczNV Committed by GitHub Jul 22, 2025
Show whitespace changes
Inline Side-by-side

Showing with 14 additions and 5 deletions

container/Dockerfile.vllm container/Dockerfile.vllm +4 -2

container/deps/vllm/install_vllm.sh container/deps/vllm/install_vllm.sh +10 -3

No files found.
--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
 ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
 ARG VLLM_REF="059d4cd"
+ARG TORCH_BACKEND="cu128"
 # After this commit deepgemm API changed
 # 1.0.0 -> 2.0.0
@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
 FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
-# Redeclare ARCH and ARCH_ALT so they're available in this stage
+# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
 ARG ARCH
 ARG ARCH_ALT
+ARG TORCH_BACKEND
 USER root
 ARG PYTHON_VERSION=3.12
@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
    --mount=type=cache,target=/root/.cache/uv \
    cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
    chmod +x /tmp/install_vllm.sh && \
-    /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
+    /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND
 ENV LD_LIBRARY_PATH=\
 /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\

--- a/container/deps/vllm/install_vllm.sh
+++ b/container/deps/vllm/install_vllm.sh
@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
 ARCH=$(uname -m)
 DEEPGEMM_REF="6c9558e"
 FLASHINF_REF="1d72ed4"
+TORCH_BACKEND="cu128"
 # Convert x86_64 to amd64 for consistency with Docker ARG
 if [ "$ARCH" = "x86_64" ]; then
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
            FLASHINF_REF="$2"
            shift 2
            ;;
+        --torch-backend)
+            TORCH_BACKEND="$2"
+            shift 2
+            ;;
        -h|--help)
-            echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
+            echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
            echo "Options:"
            echo "  --editable        Install vllm in editable mode (default)"
            echo "  --no-editable     Install vllm in non-editable mode"
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
            echo "  --installation-dir DIR  Directory to install vllm (default: /tmp/vllm)"
            echo "  --deepgemm-ref REF  Git reference for DeepGEMM (default: 6c9558e)"
            echo "  --flashinf-ref REF  Git reference for Flash Infer (default: 1d72ed4)"
+            echo "  --torch-backend BACKEND  Torch backend to use (default: cu128)"
            exit 0
            ;;
        *)
@@ -96,6 +102,7 @@ echo "  EDITABLE: $EDITABLE"
 echo "  VLLM_REF: $VLLM_REF"
 echo "  MAX_JOBS: $MAX_JOBS"
 echo "  ARCH: $ARCH"
+echo "  TORCH_BACKEND: $TORCH_BACKEND"
 # Install common dependencies
 uv pip install pip cuda-python
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
 else
    echo "Installing vllm for AMD64 architecture"
    if [ "$EDITABLE" = "true" ]; then
-        VLLM_USE_PRECOMPILED=1 uv pip install -e .
+        VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend=$TORCH_BACKEND
    else
-        VLLM_USE_PRECOMPILED=1 uv pip install .
+        VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend=$TORCH_BACKEND
    fi
 fi