Unverified Commit 761f67e0 authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

fix: update kv-router usage (#1238)

parent cc40af70
......@@ -48,7 +48,7 @@ To measure the performance of dynamo with planner, we start from a 1p1d deployme
```bash
cd examples/llm
dynamo serve graphs.disagg:Frontend -f disagg_1p1d.yml
dynamo serve graphs.disagg_router:Frontend -f disagg_1p1d.yml
# in terminal 2
genai-perf profile \
......@@ -82,7 +82,7 @@ In this example, we use a fixed 2p2d engine as baseline. Planner provides a `--n
```bash
# in terminal 1
dynamo serve graphs.disagg:Frontend -f disagg_2p2d.yml
dynamo serve graphs.disagg_router:Frontend -f disagg_2p2d.yml
# in terminal 2
genai-perf profile --tokenizer deepseek-ai/DeepSeek-R1-Distill-Llama-8B -m deepseek-ai/DeepSeek-R1-Distill-Llama-8B --service-kind openai --endpoint-type chat --url http://localhost:8000 --streaming --input-file payload:sin_b512_t600_rr5.0-20.0-150.0_io3000150-3000150-0.2-0.8-10.jsonl
......
......@@ -17,6 +17,7 @@ Common:
block-size: 64
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
router: kv-load
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
......@@ -24,8 +25,11 @@ Frontend:
port: 8000
Processor:
router: kv-load
common-configs: [model, block-size]
common-configs: [model, block-size, router]
Router:
min-workers: 1
common-configs: [model, block-size, router]
VllmWorker:
remote-prefill: true
......
......@@ -17,6 +17,7 @@ Common:
block-size: 64
max-model-len: 16384
kv-transfer-config: '{"kv_connector":"DynamoNixlConnector"}'
router: kv-load
Frontend:
served_model_name: deepseek-ai/DeepSeek-R1-Distill-Llama-8B
......@@ -25,7 +26,11 @@ Frontend:
Processor:
router: kv-load
common-configs: [model, block-size]
common-configs: [model, block-size, router]
Router:
min-workers: 1
common-configs: [model, block-size, router]
VllmWorker:
remote-prefill: true
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment