"examples/backends/vllm/deploy/agg_router_kv_approx.yaml" did not exist on "5bf23d54f3e46a15ff5000773a32d8829befa919"
Unverified Commit 60feb955 authored by Karen Chung's avatar Karen Chung Committed by GitHub
Browse files

chore: bump vLLM to 0.11.2 (#4476)

parent c5e8c4c2
......@@ -9,7 +9,7 @@
ARG LOCAL_VLLM_IMAGE="vllm-elastic-ep:latest_all2all_buffer_input"
ARG DYNAMO_BASE_IMAGE="dynamo:latest-none"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.9.0-runtime-ubuntu24.04"
# Other build arguments
ARG PYTHON_VERSION=3.12
......@@ -57,7 +57,7 @@ RUN apt-get update && \
# prometheus dependencies
ca-certificates \
# DeepGemm uses 'cuobjdump' which does not come with CUDA image
cuda-command-line-tools-12-8 && \
cuda-command-line-tools-12-9 && \
rm -rf /var/lib/apt/lists/*
# Copy CUDA development tools from vLLM image (for JIT compilation)
......
......@@ -60,7 +60,6 @@ spec:
- --model
- deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code
- --disable-log-requests
- --tensor-parallel-size
- "1"
- --data-parallel-size
......
......@@ -63,7 +63,6 @@ spec:
- --model
- deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code
- --disable-log-requests
- --tensor-parallel-size
- "1"
- --data-parallel-size
......@@ -130,7 +129,6 @@ spec:
- --model
- deepseek-ai/DeepSeek-V2-Lite
- --trust-remote-code
- --disable-log-requests
- --is-prefill-worker
- --tensor-parallel-size
- "1"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment