"docs/vscode:/vscode.git/clone" did not exist on "ae53d7c4dbf9ffbb1560fb590f50678bd7c60284"
Unverified Commit 35323da2 authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: vllm launch script errors for disagg and spec decoding (#6562)


Signed-off-by: default avataralec-flowers <aflowers@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent 44d43d0c
...@@ -23,6 +23,6 @@ CUDA_VISIBLE_DEVICES=0 python -m dynamo.vllm \ ...@@ -23,6 +23,6 @@ CUDA_VISIBLE_DEVICES=0 python -m dynamo.vllm \
"model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", "model": "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B",
"draft_tensor_parallel_size": 1, "draft_tensor_parallel_size": 1,
"num_speculative_tokens": 2, "num_speculative_tokens": 2,
"method": "eagle" "method": "eagle3"
}' \ }' \
--gpu-memory-utilization 0.8 --gpu-memory-utilization 0.8
\ No newline at end of file
...@@ -20,13 +20,15 @@ python -m dynamo.frontend \ ...@@ -20,13 +20,15 @@ python -m dynamo.frontend \
CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm \ CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm \
--model $MODEL \ --model $MODEL \
--enforce-eager \ --enforce-eager \
--disaggregation-mode decode & --disaggregation-mode decode \
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' &
VLLM_NIXL_SIDE_CHANNEL_PORT=20096 \ VLLM_NIXL_SIDE_CHANNEL_PORT=20096 \
CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \
--model $MODEL \ --model $MODEL \
--enforce-eager \ --enforce-eager \
--disaggregation-mode decode & --disaggregation-mode decode \
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' &
# two prefill workers with KVBM enabled # two prefill workers with KVBM enabled
# Each worker needs unique ZMQ ports to avoid KVBM coordination conflicts # Each worker needs unique ZMQ ports to avoid KVBM coordination conflicts
......
...@@ -57,7 +57,8 @@ python3 -m dynamo.vllm \ ...@@ -57,7 +57,8 @@ python3 -m dynamo.vllm \
--enforce-eager \ --enforce-eager \
--disaggregation-mode decode \ --disaggregation-mode decode \
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \ --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \
--gpu-memory-utilization ${GPU_MEM_FRACTION} & --gpu-memory-utilization ${GPU_MEM_FRACTION} \
--max-model-len 16384 &
DECODE_PID=$! DECODE_PID=$!
# Wait for decode worker to initialize before starting prefill worker # Wait for decode worker to initialize before starting prefill worker
...@@ -79,5 +80,6 @@ python3 -m dynamo.vllm \ ...@@ -79,5 +80,6 @@ python3 -m dynamo.vllm \
--disaggregation-mode prefill \ --disaggregation-mode prefill \
--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \ --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' \
--gpu-memory-utilization ${GPU_MEM_FRACTION} \ --gpu-memory-utilization ${GPU_MEM_FRACTION} \
--max-model-len 16384 \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}' --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}'
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment