Unverified Commit 60feb955 authored by Karen Chung's avatar Karen Chung Committed by GitHub
Browse files

chore: bump vLLM to 0.11.2 (#4476)

parent c5e8c4c2
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
ARG LOCAL_VLLM_IMAGE="vllm-elastic-ep:latest_all2all_buffer_input" ARG LOCAL_VLLM_IMAGE="vllm-elastic-ep:latest_all2all_buffer_input"
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none" ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
# Other build arguments # Other build arguments
ARG PYTHON_VERSION=3.12 ARG PYTHON_VERSION=3.12
...@@ -57,7 +57,7 @@ RUN apt-get update && \ ...@@ -57,7 +57,7 @@ RUN apt-get update && \
# prometheus dependencies # prometheus dependencies
ca-certificates \ ca-certificates \
# DeepGemm uses 'cuobjdump' which does not come with CUDA image # DeepGemm uses 'cuobjdump' which does not come with CUDA image
cuda-command-line-tools-12-8 && \ cuda-command-line-tools-12-9 && \
rm -rf /var/lib/apt/lists/* rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools from vLLM image (for JIT compilation) # Copy CUDA development tools from vLLM image (for JIT compilation)
......
...@@ -60,7 +60,6 @@ spec: ...@@ -60,7 +60,6 @@ spec:
- --model - --model
- deepseek-ai/DeepSeek-V2-Lite - deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code - --trust-remote-code
- --disable-log-requests
- --tensor-parallel-size - --tensor-parallel-size
- "1" - "1"
- --data-parallel-size - --data-parallel-size
......
...@@ -63,7 +63,6 @@ spec: ...@@ -63,7 +63,6 @@ spec:
- --model - --model
- deepseek-ai/DeepSeek-V2-Lite - deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code - --trust-remote-code
- --disable-log-requests
- --tensor-parallel-size - --tensor-parallel-size
- "1" - "1"
- --data-parallel-size - --data-parallel-size
...@@ -130,7 +129,6 @@ spec: ...@@ -130,7 +129,6 @@ spec:
- --model - --model
- deepseek-ai/DeepSeek-V2-Lite - deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code - --trust-remote-code
- --disable-log-requests
- --is-prefill-worker - --is-prefill-worker
- --tensor-parallel-size - --tensor-parallel-size
- "1" - "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment