"tests/vscode:/vscode.git/clone" did not exist on "a629b86abf0d3ba132b78221b3a12dcdf482be53"
Unverified Commit d7c11b65 authored by Karen Chung's avatar Karen Chung Committed by GitHub
Browse files

chore: bump vLLM to 0.12.0 (#4736)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Signed-off-by: default avatarKaren Chung <karenc@nvidia.com>
Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
Co-authored-by: default avataralec-flowers <aflowers@nvidia.com>
Co-authored-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent c0992522
......@@ -15,14 +15,13 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.9"
# Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="v0.11.2"
ARG VLLM_REF="v0.12.0"
# FlashInfer Ref used to install flashinfer-cubin and flashinfer-jit-cache
ARG FLASHINF_REF="v0.5.2"
ARG FLASHINF_REF="v0.5.3"
# If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF=""
# LMCache version - 0.3.9+ required for vLLM 0.11.2 compatibility
ARG LMCACHE_REF="0.3.9.post2"
ARG LMCACHE_REF="0.3.10"
# sccache configuration - inherit from base build
ARG USE_SCCACHE
......
......@@ -11,7 +11,7 @@
set -euo pipefail
VLLM_REF="v0.11.2"
VLLM_REF="v0.12.0"
# Basic Configurations
ARCH=$(uname -m)
......@@ -22,9 +22,9 @@ INSTALLATION_DIR=/tmp
TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels
DEEPGEMM_REF=""
CUDA_VERSION="12.9"
FLASHINF_REF="v0.5.2"
FLASHINF_REF="v0.5.3"
# LMCache version - 0.3.9+ required for vLLM 0.11.2 compatibility
LMCACHE_REF="0.3.9.post2"
LMCACHE_REF="0.3.10"
while [[ $# -gt 0 ]]; do
case $1 in
......
......@@ -56,7 +56,7 @@ trtllm =[
vllm = [
"uvloop",
"nixl[cu12]<=0.7.1",
"vllm[flashinfer]==0.11.2",
"vllm[flashinfer]==0.12.0",
]
sglang = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment