Unverified Commit 2d7f5fc5 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

fix: vllm disagg launch script should not enable kv routing (#4063)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 3555b72f
...@@ -5,7 +5,7 @@ set -e ...@@ -5,7 +5,7 @@ set -e
trap 'echo Cleaning up...; kill 0' EXIT trap 'echo Cleaning up...; kill 0' EXIT
# run ingress # run ingress
python -m dynamo.frontend --router-mode kv --http-port=8000 & python -m dynamo.frontend --http-port=8000 &
# --enforce-eager is added for quick deployment. for production use, need to remove this flag # --enforce-eager is added for quick deployment. for production use, need to remove this flag
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager & CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager &
......
...@@ -4,8 +4,8 @@ ...@@ -4,8 +4,8 @@
set -e set -e
trap 'echo Cleaning up...; kill 0' EXIT trap 'echo Cleaning up...; kill 0' EXIT
# run ingress with KV router # run ingress
python -m dynamo.frontend --router-mode kv --http-port=8000 & python -m dynamo.frontend --http-port=8000 &
# run decode worker on GPU 0, without enabling KVBM # run decode worker on GPU 0, without enabling KVBM
# NOTE: remove --enforce-eager for production use # NOTE: remove --enforce-eager for production use
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment