Unverified Commit 2d7f5fc5 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

fix: vllm disagg launch script should not enable kv routing (#4063)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 3555b72f
......@@ -5,7 +5,7 @@ set -e
trap 'echo Cleaning up...; kill 0' EXIT
# run ingress
python -m dynamo.frontend --router-mode kv --http-port=8000 &
python -m dynamo.frontend --http-port=8000 &
# --enforce-eager is added for quick deployment. for production use, need to remove this flag
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager &
......
......@@ -4,8 +4,8 @@
set -e
trap 'echo Cleaning up...; kill 0' EXIT
# run ingress with KV router
python -m dynamo.frontend --router-mode kv --http-port=8000 &
# run ingress
python -m dynamo.frontend --http-port=8000 &
# run decode worker on GPU 0, without enabling KVBM
# NOTE: remove --enforce-eager for production use
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment