Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
20c5daf3
"docs/vscode:/vscode.git/clone" did not exist on "24cb926ef507a20b09dbe7065783c1339db4d3e5"
Unverified
Commit
20c5daf3
authored
Jul 22, 2025
by
ptarasiewiczNV
Committed by
GitHub
Jul 22, 2025
Browse files
fix: install torch distribution matching container cuda version (#2027)
parent
4449f3da
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
14 additions
and
5 deletions
+14
-5
container/Dockerfile.vllm
container/Dockerfile.vllm
+4
-2
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+10
-3
No files found.
container/Dockerfile.vllm
View file @
20c5daf3
...
...
@@ -11,6 +11,7 @@ ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG VLLM_REF="059d4cd"
ARG TORCH_BACKEND="cu128"
# After this commit deepgemm API changed
# 1.0.0 -> 2.0.0
...
...
@@ -38,9 +39,10 @@ ARG ARCH_ALT=x86_64
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS base
# Redeclare ARCH
and
ARCH_ALT so they're available in this stage
# Redeclare ARCH
,
ARCH_ALT
, TORCH_BACKEND
so they're available in this stage
ARG ARCH
ARG ARCH_ALT
ARG TORCH_BACKEND
USER root
ARG PYTHON_VERSION=3.12
...
...
@@ -192,7 +194,7 @@ RUN --mount=type=bind,source=./container/deps/,target=/tmp/deps \
--mount=type=cache,target=/root/.cache/uv \
cp /tmp/deps/vllm/install_vllm.sh /tmp/install_vllm.sh && \
chmod +x /tmp/install_vllm.sh && \
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
/tmp/install_vllm.sh --editable --vllm-ref $VLLM_REF --max-jobs $MAX_JOBS --arch $ARCH --installation-dir /opt --deepgemm-ref $DEEPGEMM_REF --flashinf-ref $FLASHINF_REF
--torch-backend $TORCH_BACKEND
ENV LD_LIBRARY_PATH=\
/opt/vllm/tools/ep_kernels/ep_kernels_workspace/nvshmem_install/lib:\
...
...
container/deps/vllm/install_vllm.sh
View file @
20c5daf3
...
...
@@ -26,6 +26,7 @@ INSTALLATION_DIR=/tmp
ARCH
=
$(
uname
-m
)
DEEPGEMM_REF
=
"6c9558e"
FLASHINF_REF
=
"1d72ed4"
TORCH_BACKEND
=
"cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG
if
[
"
$ARCH
"
=
"x86_64"
]
;
then
...
...
@@ -68,8 +69,12 @@ while [[ $# -gt 0 ]]; do
FLASHINF_REF
=
"
$2
"
shift
2
;;
--torch-backend
)
TORCH_BACKEND
=
"
$2
"
shift
2
;;
-h
|
--help
)
echo
"Usage:
$0
[--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]"
echo
"Usage:
$0
[--editable|--no-editable] [--vllm-ref REF] [--max-jobs NUM] [--arch ARCH] [--deepgemm-ref REF] [--flashinf-ref REF]
[--torch-backend BACKEND]
"
echo
"Options:"
echo
" --editable Install vllm in editable mode (default)"
echo
" --no-editable Install vllm in non-editable mode"
...
...
@@ -79,6 +84,7 @@ while [[ $# -gt 0 ]]; do
echo
" --installation-dir DIR Directory to install vllm (default: /tmp/vllm)"
echo
" --deepgemm-ref REF Git reference for DeepGEMM (default: 6c9558e)"
echo
" --flashinf-ref REF Git reference for Flash Infer (default: 1d72ed4)"
echo
" --torch-backend BACKEND Torch backend to use (default: cu128)"
exit
0
;;
*
)
...
...
@@ -96,6 +102,7 @@ echo " EDITABLE: $EDITABLE"
echo
" VLLM_REF:
$VLLM_REF
"
echo
" MAX_JOBS:
$MAX_JOBS
"
echo
" ARCH:
$ARCH
"
echo
" TORCH_BACKEND:
$TORCH_BACKEND
"
# Install common dependencies
uv pip
install
pip cuda-python
...
...
@@ -128,9 +135,9 @@ if [ "$ARCH" = "arm64" ]; then
else
echo
"Installing vllm for AMD64 architecture"
if
[
"
$EDITABLE
"
=
"true"
]
;
then
VLLM_USE_PRECOMPILED
=
1 uv pip
install
-e
.
VLLM_USE_PRECOMPILED
=
1 uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
else
VLLM_USE_PRECOMPILED
=
1 uv pip
install
.
VLLM_USE_PRECOMPILED
=
1 uv pip
install
.
--torch-backend
=
$TORCH_BACKEND
fi
fi
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment