Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5f57ea5f
Unverified
Commit
5f57ea5f
authored
Aug 19, 2025
by
Dmitry Tokarev
Committed by
GitHub
Aug 19, 2025
Browse files
chore: Finish vllm upgrade to 0.10.1 + cleanup (#2528)
parent
07cfc3a1
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
15 deletions
+18
-15
components/backends/vllm/src/dynamo/vllm/args.py
components/backends/vllm/src/dynamo/vllm/args.py
+1
-1
container/Dockerfile.vllm
container/Dockerfile.vllm
+4
-4
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+12
-9
pyproject.toml
pyproject.toml
+1
-1
No files found.
components/backends/vllm/src/dynamo/vllm/args.py
View file @
5f57ea5f
...
@@ -170,7 +170,7 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
...
@@ -170,7 +170,7 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
logger
.
info
(
f
"Allocated ZMQ KV events port:
{
kv_port
}
(worker_id=
{
worker_id
}
)"
)
logger
.
info
(
f
"Allocated ZMQ KV events port:
{
kv_port
}
(worker_id=
{
worker_id
}
)"
)
# Allocate side channel ports
# Allocate side channel ports
# https://github.com/vllm-project/vllm/blob/releases/v0.10.
0
/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L
372
# https://github.com/vllm-project/vllm/blob/releases/v0.10.
1
/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L
443
# NIXL calculates ports as: base_port + (dp_rank * tp_size) + tp_rank
# NIXL calculates ports as: base_port + (dp_rank * tp_size) + tp_rank
# For dp_rank, we need to reserve tp_size consecutive ports
# For dp_rank, we need to reserve tp_size consecutive ports
tp_size
=
config
.
engine_args
.
tensor_parallel_size
or
1
tp_size
=
config
.
engine_args
.
tensor_parallel_size
or
1
...
...
container/Dockerfile.vllm
View file @
5f57ea5f
...
@@ -13,15 +13,15 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
...
@@ -13,15 +13,15 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="
77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
ARG VLLM_REF="
aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
ARG TORCH_BACKEND="cu128"
ARG TORCH_BACKEND="cu128"
# Match 0.10.
0
vLLM release
# Match 0.10.
1
vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.
0
# https://github.com/vllm-project/vllm/releases/tag/v0.10.
1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64"
ARG DEEPGEMM_REF="f85ec64"
ARG FLASHINF_REF="v0.2.
8rc
1"
ARG FLASHINF_REF="v0.2.
1
1"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
...
container/deps/vllm/install_vllm.sh
View file @
5f57ea5f
...
@@ -20,13 +20,16 @@ set -euo pipefail
...
@@ -20,13 +20,16 @@ set -euo pipefail
# Parse arguments
# Parse arguments
EDITABLE
=
true
EDITABLE
=
true
VLLM_REF
=
"77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
VLLM_REF
=
"aab549870df50edf0512f0a59b574f692f546465"
# from v0.10.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION
=
"https://vllm-wheels.s3.us-west-2.amazonaws.com/
${
VLLM_REF
}
/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL
=
"https://github.com/vllm-project/vllm.git"
VLLM_GIT_URL
=
"https://github.com/vllm-project/vllm.git"
MAX_JOBS
=
16
MAX_JOBS
=
16
INSTALLATION_DIR
=
/tmp
INSTALLATION_DIR
=
/tmp
ARCH
=
$(
uname
-m
)
ARCH
=
$(
uname
-m
)
DEEPGEMM_REF
=
"f85ec64"
DEEPGEMM_REF
=
"f85ec64"
FLASHINF_REF
=
"v0.2.
8rc
1"
FLASHINF_REF
=
"v0.2.
1
1"
TORCH_BACKEND
=
"cu128"
TORCH_BACKEND
=
"cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG
# Convert x86_64 to amd64 for consistency with Docker ARG
...
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
...
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
echo
"Options:"
echo
"Options:"
echo
" --editable Install vllm in editable mode (default)"
echo
" --editable Install vllm in editable mode (default)"
echo
" --no-editable Install vllm in non-editable mode"
echo
" --no-editable Install vllm in non-editable mode"
echo
" --vllm-ref REF Git reference to checkout (default:
f4135232b9a8c4845f8961fb1cd17581c56ae2ce
)"
echo
f
" --vllm-ref REF Git reference to checkout (default:
${
VLLM_REF
}
)"
echo
" --max-jobs NUM Maximum number of parallel jobs (default:
16
)"
echo
f
" --max-jobs NUM Maximum number of parallel jobs (default:
${
MAX_JOBS
}
)"
echo
" --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo
" --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo
" --installation-dir DIR Directory to install vllm (default:
/tmp/vllm
)"
echo
f
" --installation-dir DIR Directory to install vllm (default:
${
INSTALLATION_DIR
}
)"
echo
" --deepgemm-ref REF Git reference for DeepGEMM (default:
1876566
)"
echo
f
" --deepgemm-ref REF Git reference for DeepGEMM (default:
${
DEEPGEMM_REF
}
)"
echo
" --flashinf-ref REF Git reference for Flash Infer (default:
v0.2.8rc1
)"
echo
f
" --flashinf-ref REF Git reference for Flash Infer (default:
${
FLASHINF_REF
}
)"
echo
" --torch-backend BACKEND Torch backend to use (default:
cu128
)"
echo
f
" --torch-backend BACKEND Torch backend to use (default:
${
TORCH_BACKEND
}
)"
exit
0
exit
0
;;
;;
*
)
*
)
...
@@ -154,7 +157,7 @@ else
...
@@ -154,7 +157,7 @@ else
exit
1
exit
1
fi
fi
export
VLLM_PRECOMPILED_WHEEL_LOCATION
=
https://vllm-wheels.s3.us-west-2.amazonaws.com/
${
VLLM_RE
F
}
/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
export
VLLM_PRECOMPILED_WHEEL_LOCATION
=
"
${
VLLM_
P
RE
COMPILED_WHEEL_LOCATION
}
"
if
[
"
$EDITABLE
"
=
"true"
]
;
then
if
[
"
$EDITABLE
"
=
"true"
]
;
then
uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
...
...
pyproject.toml
View file @
5f57ea5f
...
@@ -56,7 +56,7 @@ trtllm =[
...
@@ -56,7 +56,7 @@ trtllm =[
vllm
=
[
vllm
=
[
"uvloop"
,
"uvloop"
,
"nixl<=0.4.1"
,
"nixl<=0.4.1"
,
"vllm==0.10.
0
"
,
"vllm
[flashinfer]
==0.10.
1
"
,
]
]
sglang
=
[
sglang
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment