Unverified Commit 20c5daf3 authored by ptarasiewiczNV's avatar ptarasiewiczNV Committed by GitHub
Browse files

fix: install torch distribution matching container cuda version (#2027)

parent 4449f3da
......@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG VLLM_REF="059d4cd"
ARG TORCH_BACKEND="cu128"
# After this commit deepgemm API changed
# 1.0.0 -> 2.0.0
......@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage
# Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG TORCH_BACKEND
USER root
ARG PYTHON_VERSION=3.12
......@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
......
......@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
ARCH=$(uname -m)
DEEPGEMM_REF="6c9558e"
FLASHINF_REF="1d72ed4"
TORCH_BACKEND="cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG
if [ "$ARCH" = "x86_64" ]; then
......@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
FLASHINF_REF="$2"
shift 2
;;
--torch-backend)
TORCH_BACKEND="$2"
shift 2
;;
-h|--help)
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
echo "Options:"
echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode"
......@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
echo " --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
echo " --torch-backend BACKEND Torch backend to use (default: cu128)"
exit 0
;;
*)
......@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
echo " VLLM_REF: $VLLM_REF"
echo " MAX_JOBS: $MAX_JOBS"
echo " ARCH: $ARCH"
echo " TORCH_BACKEND: $TORCH_BACKEND"
# Install common dependencies
uv pip install pip cuda-python
......@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
else
echo "Installing vllm for AMD64 architecture"
if [ "$EDITABLE" = "true" ]; then
VLLM_USE_PRECOMPILED=1 uv pip install -e .
VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend=$TORCH_BACKEND
else
VLLM_USE_PRECOMPILED=1 uv pip install .
VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend=$TORCH_BACKEND
fi
fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment