Unverified Commit 869562da authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: add examples for kv state approximation based routing (#5320)

parent 648e1cd2
...@@ -13,16 +13,22 @@ trap cleanup EXIT INT TERM ...@@ -13,16 +13,22 @@ trap cleanup EXIT INT TERM
# Parse command line arguments # Parse command line arguments
ENABLE_OTEL=false ENABLE_OTEL=false
APPROX_MODE=false
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case $1 in case $1 in
--enable-otel) --enable-otel)
ENABLE_OTEL=true ENABLE_OTEL=true
shift shift
;; ;;
--approx)
APPROX_MODE=true
shift
;;
-h|--help) -h|--help)
echo "Usage: $0 [OPTIONS]" echo "Usage: $0 [OPTIONS]"
echo "Options:" echo "Options:"
echo " --enable-otel Enable OpenTelemetry tracing" echo " --enable-otel Enable OpenTelemetry tracing"
echo " --approx Enable approximate KV routing (no KV events)"
echo " -h, --help Show this help message" echo " -h, --help Show this help message"
echo "" echo ""
echo "Note: System metrics are enabled by default on ports 8081 (worker-1), 8082 (worker-2)" echo "Note: System metrics are enabled by default on ports 8081 (worker-1), 8082 (worker-2)"
...@@ -47,11 +53,23 @@ fi ...@@ -47,11 +53,23 @@ fi
# run ingress # run ingress
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000) # dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
FRONTEND_ARGS=(--router-mode kv)
if [ "$APPROX_MODE" = true ]; then
FRONTEND_ARGS+=(--no-kv-events)
fi
OTEL_SERVICE_NAME=dynamo-frontend \ OTEL_SERVICE_NAME=dynamo-frontend \
python3 -m dynamo.frontend --router-mode kv & python3 -m dynamo.frontend "${FRONTEND_ARGS[@]}" &
DYNAMO_PID=$! DYNAMO_PID=$!
# run worker # run worker
# Build KV events args conditionally (only when not in approx mode)
KV_EVENTS_ARGS_1=()
KV_EVENTS_ARGS_2=()
if [ "$APPROX_MODE" = false ]; then
KV_EVENTS_ARGS_1=(--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}')
KV_EVENTS_ARGS_2=(--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}')
fi
OTEL_SERVICE_NAME=dynamo-worker-1 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_WORKER1:-8081} \ OTEL_SERVICE_NAME=dynamo-worker-1 DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT_WORKER1:-8081} \
python3 -m dynamo.sglang \ python3 -m dynamo.sglang \
--model-path Qwen/Qwen3-0.6B \ --model-path Qwen/Qwen3-0.6B \
...@@ -59,7 +77,7 @@ python3 -m dynamo.sglang \ ...@@ -59,7 +77,7 @@ python3 -m dynamo.sglang \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5557"}' \ "${KV_EVENTS_ARGS_1[@]}" \
--enable-metrics \ --enable-metrics \
"${TRACE_ARGS[@]}" & "${TRACE_ARGS[@]}" &
WORKER_PID=$! WORKER_PID=$!
...@@ -71,6 +89,6 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \ ...@@ -71,6 +89,6 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.sglang \
--page-size 16 \ --page-size 16 \
--tp 1 \ --tp 1 \
--trust-remote-code \ --trust-remote-code \
--kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:5558"}' \ "${KV_EVENTS_ARGS_2[@]}" \
--enable-metrics \ --enable-metrics \
"${TRACE_ARGS[@]}" "${TRACE_ARGS[@]}"
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
# Environment variables with defaults
export DYNAMO_HOME=${DYNAMO_HOME:-"/workspace"}
export MODEL_PATH=${MODEL_PATH:-"Qwen/Qwen3-0.6B"}
export SERVED_MODEL_NAME=${SERVED_MODEL_NAME:-"Qwen/Qwen3-0.6B"}
export AGG_ENGINE_ARGS=${AGG_ENGINE_ARGS:-"$DYNAMO_HOME/examples/backends/trtllm/engine_configs/qwen3/agg.yaml"}
# Setup cleanup trap
cleanup() {
echo "Cleaning up background processes..."
kill $DYNAMO_PID 2>/dev/null || true
wait $DYNAMO_PID 2>/dev/null || true
echo "Cleanup complete."
}
trap cleanup EXIT INT TERM
# run frontend with KV router in approximate mode (i.e. no KV events)
python3 -m dynamo.frontend --router-mode kv --no-kv-events &
DYNAMO_PID=$!
# run worker (no event publishing needed - frontend handles routing with predictive approx kv mode)
python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
--extra-engine-args "$AGG_ENGINE_ARGS"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment