Unverified Commit 20c5daf3 authored by ptarasiewiczNV's avatar ptarasiewiczNV Committed by GitHub
Browse files

fix: install torch distribution matching container cuda version (#2027)

parent 4449f3da
...@@ -11,6 +11,7 @@ ARG RELEASE_BUILD ...@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG VLLM_REF="059d4cd" ARG VLLM_REF="059d4cd"
ARG TORCH_BACKEND="cu128"
# After this commit deepgemm API changed # After this commit deepgemm API changed
# 1.0.0 -> 2.0.0 # 1.0.0 -> 2.0.0
...@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64 ...@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH and ARCH_ALT so they're available in this stage # Redeclare ARCH, ARCH_ALT, TORCH_BACKEND so they're available in this stage
ARG ARCH ARG ARCH
ARG ARCH_ALT ARG ARCH_ALT
ARG TORCH_BACKEND
USER root USER root
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
...@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \ ...@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/uv \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \ cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \ chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF /tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF --torch-backend $TORCH_BACKEND
ENV LD_LIBRARY_PATH=\ ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\ /opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
......
...@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp ...@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
ARCH=$(uname -m) ARCH=$(uname -m)
DEEPGEMM_REF="6c9558e" DEEPGEMM_REF="6c9558e"
FLASHINF_REF="1d72ed4" FLASHINF_REF="1d72ed4"
TORCH_BACKEND="cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG # Convert x86_64 to amd64 for consistency with Docker ARG
if [ "$ARCH" = "x86_64" ]; then if [ "$ARCH" = "x86_64" ]; then
...@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do ...@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
FLASHINF_REF="$2" FLASHINF_REF="$2"
shift 2 shift 2
;; ;;
--torch-backend)
TORCH_BACKEND="$2"
shift 2
;;
-h|--help) -h|--help)
echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]" echo "Usage: $0 [--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF] [--torch-backend BACKEND]"
echo "Options:" echo "Options:"
echo " --editable Install vllm in editable mode (default)" echo " --editable Install vllm in editable mode (default)"
echo " --no-editable Install vllm in non-editable mode" echo " --no-editable Install vllm in non-editable mode"
...@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do ...@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)" echo " --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)" echo " --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
echo " --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)" echo " --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
echo " --torch-backend BACKEND Torch backend to use (default: cu128)"
exit 0 exit 0
;; ;;
*) *)
...@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE" ...@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
echo " VLLM_REF: $VLLM_REF" echo " VLLM_REF: $VLLM_REF"
echo " MAX_JOBS: $MAX_JOBS" echo " MAX_JOBS: $MAX_JOBS"
echo " ARCH: $ARCH" echo " ARCH: $ARCH"
echo " TORCH_BACKEND: $TORCH_BACKEND"
# Install common dependencies # Install common dependencies
uv pip install pip cuda-python uv pip install pip cuda-python
...@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then ...@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
else else
echo "Installing vllm for AMD64 architecture" echo "Installing vllm for AMD64 architecture"
if [ "$EDITABLE" = "true" ]; then if [ "$EDITABLE" = "true" ]; then
VLLM_USE_PRECOMPILED=1 uv pip install -e . VLLM_USE_PRECOMPILED=1 uv pip install -e . --torch-backend=$TORCH_BACKEND
else else
VLLM_USE_PRECOMPILED=1 uv pip install . VLLM_USE_PRECOMPILED=1 uv pip install . --torch-backend=$TORCH_BACKEND
fi fi
fi fi
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment