Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
20c5daf3
Unverified
Commit
20c5daf3
authored
Jul 22, 2025
by
ptarasiewiczNV
Committed by
GitHub
Jul 22, 2025
Browse files
fix: install torch distribution matching container cuda version (#2027)
parent
4449f3da
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
5 deletions
+14
-5
container/Dockerfile.vllm
container/Dockerfile.vllm
+4
-2
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+10
-3
No files found.
container/Dockerfile.vllm
View file @
20c5daf3
...
@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
...
@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG VLLM_REF="059d4cd"
ARG VLLM_REF="059d4cd"
ARG TORCH_BACKEND="cu128"
# After this commit deepgemm API changed
# After this commit deepgemm API changed
# 1.0.0 -> 2.0.0
# 1.0.0 -> 2.0.0
...
@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
...
@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH
and
ARCH_ALT so they're available in this stage
# Redeclare ARCH
,
ARCH_ALT
, TORCH_BACKEND
so they're available in this stage
ARG ARCH
ARG ARCH
ARG ARCH_ALT
ARG ARCH_ALT
ARG TORCH_BACKEND
USER root
USER root
ARG PYTHON_VERSION=3.12
ARG PYTHON_VERSION=3.12
...
@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
...
@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
--mount=type=cache,target=/root/.cache/uv \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
--torch-backend $TORCH_BACKEND
ENV LD_LIBRARY_PATH=\
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
...
...
container/deps/vllm/install_vllm.sh
View file @
20c5daf3
...
@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
...
@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
ARCH
=
$(
uname
-m
)
ARCH
=
$(
uname
-m
)
DEEPGEMM_REF
=
"6c9558e"
DEEPGEMM_REF
=
"6c9558e"
FLASHINF_REF
=
"1d72ed4"
FLASHINF_REF
=
"1d72ed4"
TORCH_BACKEND
=
"cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG
# Convert x86_64 to amd64 for consistency with Docker ARG
if
[
"
$ARCH
"
=
"x86_64"
]
;
then
if
[
"
$ARCH
"
=
"x86_64"
]
;
then
...
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
...
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
FLASHINF_REF
=
"
$2
"
FLASHINF_REF
=
"
$2
"
shift
2
shift
2
;;
;;
--torch-backend
)
TORCH_BACKEND
=
"
$2
"
shift
2
;;
-h
|
--help
)
-h
|
--help
)
echo
"Usage:
$0
[--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
echo
"Usage:
$0
[--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]
[--torch-backend BACKEND]
"
echo
"Options:"
echo
"Options:"
echo
" --editable Install vllm in editable mode (default)"
echo
" --editable Install vllm in editable mode (default)"
echo
" --no-editable Install vllm in non-editable mode"
echo
" --no-editable Install vllm in non-editable mode"
...
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
...
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
echo
" --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
echo
" --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
echo
" --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
echo
" --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
echo
" --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
echo
" --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
echo
" --torch-backend BACKEND Torch backend to use (default: cu128)"
exit
0
exit
0
;;
;;
*
)
*
)
...
@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
...
@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
echo
" VLLM_REF:
$VLLM_REF
"
echo
" VLLM_REF:
$VLLM_REF
"
echo
" MAX_JOBS:
$MAX_JOBS
"
echo
" MAX_JOBS:
$MAX_JOBS
"
echo
" ARCH:
$ARCH
"
echo
" ARCH:
$ARCH
"
echo
" TORCH_BACKEND:
$TORCH_BACKEND
"
# Install common dependencies
# Install common dependencies
uv pip
install
pip cuda-python
uv pip
install
pip cuda-python
...
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
...
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
else
else
echo
"Installing vllm for AMD64 architecture"
echo
"Installing vllm for AMD64 architecture"
if
[
"
$EDITABLE
"
=
"true"
]
;
then
if
[
"
$EDITABLE
"
=
"true"
]
;
then
VLLM_USE_PRECOMPILED
=
1 uv pip
install
-e
.
VLLM_USE_PRECOMPILED
=
1 uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
else
else
VLLM_USE_PRECOMPILED
=
1 uv pip
install
.
VLLM_USE_PRECOMPILED
=
1 uv pip
install
.
--torch-backend
=
$TORCH_BACKEND
fi
fi
fi
fi
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment