"...git@developer.sourcefind.cn:2222/renzhc/diffusers_dcu.git" did not exist on "687bc2772721af584d649129f8d2a28ca56a9ad8"
Unverified Commit d7c11b65 authored by Karen Chung's avatar Karen Chung Committed by GitHub
Browse files

chore: bump vLLM to 0.12.0 (#4736)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Signed-off-by: default avatarKaren Chung <karenc@nvidia.com>
Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
Co-authored-by: default avataralec-flowers <aflowers@nvidia.com>
Co-authored-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent c0992522
...@@ -15,14 +15,13 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04" ...@@ -15,14 +15,13 @@ ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
ARG CUDA_VERSION="12.9" ARG CUDA_VERSION="12.9"
# Make sure to update the dependency version in pyproject.toml when updating this # Make sure to update the dependency version in pyproject.toml when updating this
ARG VLLM_REF="v0.11.2" ARG VLLM_REF="v0.12.0"
# FlashInfer Ref used to install flashinfer-cubin and flashinfer-jit-cache # FlashInfer Ref used to install flashinfer-cubin and flashinfer-jit-cache
ARG FLASHINF_REF="v0.5.2" ARG FLASHINF_REF="v0.5.3"
# If left blank, then we will fallback to vLLM defaults # If left blank, then we will fallback to vLLM defaults
ARG DEEPGEMM_REF="" ARG DEEPGEMM_REF=""
# LMCache version - 0.3.9+ required for vLLM 0.11.2 compatibility ARG LMCACHE_REF="0.3.10"
ARG LMCACHE_REF="0.3.9.post2"
# sccache configuration - inherit from base build # sccache configuration - inherit from base build
ARG USE_SCCACHE ARG USE_SCCACHE
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
set -euo pipefail set -euo pipefail
VLLM_REF="v0.11.2" VLLM_REF="v0.12.0"
# Basic Configurations # Basic Configurations
ARCH=$(uname -m) ARCH=$(uname -m)
...@@ -22,9 +22,9 @@ INSTALLATION_DIR=/tmp ...@@ -22,9 +22,9 @@ INSTALLATION_DIR=/tmp
TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels TORCH_CUDA_ARCH_LIST="9.0;10.0" # For EP Kernels
DEEPGEMM_REF="" DEEPGEMM_REF=""
CUDA_VERSION="12.9" CUDA_VERSION="12.9"
FLASHINF_REF="v0.5.2" FLASHINF_REF="v0.5.3"
# LMCache version - 0.3.9+ required for vLLM 0.11.2 compatibility # LMCache version - 0.3.9+ required for vLLM 0.11.2 compatibility
LMCACHE_REF="0.3.9.post2" LMCACHE_REF="0.3.10"
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
......
...@@ -56,7 +56,7 @@ trtllm =[ ...@@ -56,7 +56,7 @@ trtllm =[
vllm = [ vllm = [
"uvloop", "uvloop",
"nixl[cu12]<=0.7.1", "nixl[cu12]<=0.7.1",
"vllm[flashinfer]==0.11.2", "vllm[flashinfer]==0.12.0",
] ]
sglang = [ sglang = [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment