Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
5f57ea5f
"...git@developer.sourcefind.cn:2222/OpenDAS/vllm_cscc.git" did not exist on "0730414999343e722590ace615d5814c7e5b6827"
Unverified
Commit
5f57ea5f
authored
Aug 19, 2025
by
Dmitry Tokarev
Committed by
GitHub
Aug 19, 2025
Browse files
chore: Finish vllm upgrade to 0.10.1 + cleanup (#2528)
parent
07cfc3a1
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
18 additions
and
15 deletions
+18
-15
components/backends/vllm/src/dynamo/vllm/args.py
components/backends/vllm/src/dynamo/vllm/args.py
+1
-1
container/Dockerfile.vllm
container/Dockerfile.vllm
+4
-4
container/deps/vllm/install_vllm.sh
container/deps/vllm/install_vllm.sh
+12
-9
pyproject.toml
pyproject.toml
+1
-1
No files found.
components/backends/vllm/src/dynamo/vllm/args.py
View file @
5f57ea5f
...
@@ -170,7 +170,7 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
...
@@ -170,7 +170,7 @@ async def configure_ports_with_etcd(config: Config, etcd_client):
logger
.
info
(
f
"Allocated ZMQ KV events port:
{
kv_port
}
(worker_id=
{
worker_id
}
)"
)
logger
.
info
(
f
"Allocated ZMQ KV events port:
{
kv_port
}
(worker_id=
{
worker_id
}
)"
)
# Allocate side channel ports
# Allocate side channel ports
# https://github.com/vllm-project/vllm/blob/releases/v0.10.
0
/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L
372
# https://github.com/vllm-project/vllm/blob/releases/v0.10.
1
/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py#L
443
# NIXL calculates ports as: base_port + (dp_rank * tp_size) + tp_rank
# NIXL calculates ports as: base_port + (dp_rank * tp_size) + tp_rank
# For dp_rank, we need to reserve tp_size consecutive ports
# For dp_rank, we need to reserve tp_size consecutive ports
tp_size
=
config
.
engine_args
.
tensor_parallel_size
or
1
tp_size
=
config
.
engine_args
.
tensor_parallel_size
or
1
...
...
container/Dockerfile.vllm
View file @
5f57ea5f
...
@@ -13,15 +13,15 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
...
@@ -13,15 +13,15 @@ ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# Make sure to update the dependency version in pyproject.toml when updating this
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="
77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
ARG VLLM_REF="
aab549870df50edf0512f0a59b574f692f546465" # from v0.10.1
ARG TORCH_BACKEND="cu128"
ARG TORCH_BACKEND="cu128"
# Match 0.10.
0
vLLM release
# Match 0.10.
1
vLLM release
# https://github.com/vllm-project/vllm/releases/tag/v0.10.
0
# https://github.com/vllm-project/vllm/releases/tag/v0.10.
1
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# Pinned to commit before https://github.com/deepseek-ai/DeepGEMM/pull/112 for DeepGEMM which seems to break on H100:
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
# "RuntimeError: Failed: CUDA runtime error csrc/jit/kernel_runtime.hpp:108 '98'"
ARG DEEPGEMM_REF="f85ec64"
ARG DEEPGEMM_REF="f85ec64"
ARG FLASHINF_REF="v0.2.
8rc
1"
ARG FLASHINF_REF="v0.2.
1
1"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
...
container/deps/vllm/install_vllm.sh
View file @
5f57ea5f
...
@@ -20,13 +20,16 @@ set -euo pipefail
...
@@ -20,13 +20,16 @@ set -euo pipefail
# Parse arguments
# Parse arguments
EDITABLE
=
true
EDITABLE
=
true
VLLM_REF
=
"77a6bf07aedf132aad2b6719f6d87abc5d3311ab"
VLLM_REF
=
"aab549870df50edf0512f0a59b574f692f546465"
# from v0.10.1
# When updating above VLLM_REF make sure precompiled wheel file URL is correct. Run this command:
# aws s3 ls s3://vllm-wheels/${VLLM_REF}/ --region us-west-2 --no-sign-request
VLLM_PRECOMPILED_WHEEL_LOCATION
=
"https://vllm-wheels.s3.us-west-2.amazonaws.com/
${
VLLM_REF
}
/vllm-0.10.1-cp38-abi3-manylinux1_x86_64.whl"
VLLM_GIT_URL
=
"https://github.com/vllm-project/vllm.git"
VLLM_GIT_URL
=
"https://github.com/vllm-project/vllm.git"
MAX_JOBS
=
16
MAX_JOBS
=
16
INSTALLATION_DIR
=
/tmp
INSTALLATION_DIR
=
/tmp
ARCH
=
$(
uname
-m
)
ARCH
=
$(
uname
-m
)
DEEPGEMM_REF
=
"f85ec64"
DEEPGEMM_REF
=
"f85ec64"
FLASHINF_REF
=
"v0.2.
8rc
1"
FLASHINF_REF
=
"v0.2.
1
1"
TORCH_BACKEND
=
"cu128"
TORCH_BACKEND
=
"cu128"
# Convert x86_64 to amd64 for consistency with Docker ARG
# Convert x86_64 to amd64 for consistency with Docker ARG
...
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
...
@@ -83,13 +86,13 @@ while [[ $# -gt 0 ]]; do
echo
"Options:"
echo
"Options:"
echo
" --editable Install vllm in editable mode (default)"
echo
" --editable Install vllm in editable mode (default)"
echo
" --no-editable Install vllm in non-editable mode"
echo
" --no-editable Install vllm in non-editable mode"
echo
" --vllm-ref REF Git reference to checkout (default:
f4135232b9a8c4845f8961fb1cd17581c56ae2ce
)"
echo
f
" --vllm-ref REF Git reference to checkout (default:
${
VLLM_REF
}
)"
echo
" --max-jobs NUM Maximum number of parallel jobs (default:
16
)"
echo
f
" --max-jobs NUM Maximum number of parallel jobs (default:
${
MAX_JOBS
}
)"
echo
" --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo
" --arch ARCH Architecture (amd64|arm64, default: auto-detect)"
echo
" --installation-dir DIR Directory to install vllm (default:
/tmp/vllm
)"
echo
f
" --installation-dir DIR Directory to install vllm (default:
${
INSTALLATION_DIR
}
)"
echo
" --deepgemm-ref REF Git reference for DeepGEMM (default:
1876566
)"
echo
f
" --deepgemm-ref REF Git reference for DeepGEMM (default:
${
DEEPGEMM_REF
}
)"
echo
" --flashinf-ref REF Git reference for Flash Infer (default:
v0.2.8rc1
)"
echo
f
" --flashinf-ref REF Git reference for Flash Infer (default:
${
FLASHINF_REF
}
)"
echo
" --torch-backend BACKEND Torch backend to use (default:
cu128
)"
echo
f
" --torch-backend BACKEND Torch backend to use (default:
${
TORCH_BACKEND
}
)"
exit
0
exit
0
;;
;;
*
)
*
)
...
@@ -154,7 +157,7 @@ else
...
@@ -154,7 +157,7 @@ else
exit
1
exit
1
fi
fi
export
VLLM_PRECOMPILED_WHEEL_LOCATION
=
https://vllm-wheels.s3.us-west-2.amazonaws.com/
${
VLLM_RE
F
}
/vllm-1.0.0.dev-cp38-abi3-manylinux1_x86_64.whl
export
VLLM_PRECOMPILED_WHEEL_LOCATION
=
"
${
VLLM_
P
RE
COMPILED_WHEEL_LOCATION
}
"
if
[
"
$EDITABLE
"
=
"true"
]
;
then
if
[
"
$EDITABLE
"
=
"true"
]
;
then
uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
uv pip
install
-e
.
--torch-backend
=
$TORCH_BACKEND
...
...
pyproject.toml
View file @
5f57ea5f
...
@@ -56,7 +56,7 @@ trtllm =[
...
@@ -56,7 +56,7 @@ trtllm =[
vllm
=
[
vllm
=
[
"uvloop"
,
"uvloop"
,
"nixl<=0.4.1"
,
"nixl<=0.4.1"
,
"vllm==0.10.
0
"
,
"vllm
[flashinfer]
==0.10.
1
"
,
]
]
sglang
=
[
sglang
=
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment