fix: Mark enable_kv_events=True for user-specified vLLM KVEventsConfig in scripts (#4418)

Signed-off-by: Karen Chung <karenc@nvidia.com>

fix: Mark enable_kv_events=True for user-specified vLLM KVEventsConfig in scripts (#4418)
Signed-off-by: Karen Chung <karenc@nvidia.com>
7b967c77 · Karen Chung · GitHub · 2f1778c1 · 7b967c77 · 7b967c77
Unverified Commit 7b967c77 authored Nov 18, 2025 by Karen Chung Committed by GitHub Nov 18, 2025
4 changed files
--- a/components/src/dynamo/vllm/args.py
+++ b/components/src/dynamo/vllm/args.py
@@ -357,6 +357,12 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
    # If user provided their own config, use that
    if c := getattr(config.engine_args, "kv_events_config"):
+        # Warn user that enable_kv_cache_events probably should be True (user may have omitted it from JSON)
+        if not c.enable_kv_cache_events:
+            logger.warning(
+                "User provided --kv_events_config which set enable_kv_cache_events to False (default). "
+                "To publish events, explicitly set enable_kv_cache_events to True."
+            )
        logger.info(f"Using user-provided kv_events_config {c}")
        return c

--- a/examples/backends/vllm/launch/agg_kvbm_router.sh
+++ b/examples/backends/vllm/launch/agg_kvbm_router.sh
@@ -27,7 +27,7 @@ CUDA_VISIBLE_DEVICES=0 DYN_KVBM_CPU_CACHE_GB=2 \
    --enforce-eager \
    --connector kvbm \
    --gpu-memory-utilization 0.4 \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080"}' &
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080","enable_kv_cache_events":true}' &
 DYN_KVBM_LEADER_ZMQ_PUB_PORT=56003 \
 DYN_KVBM_LEADER_ZMQ_ACK_PORT=56004 \
@@ -38,4 +38,4 @@ CUDA_VISIBLE_DEVICES=0 DYN_KVBM_CPU_CACHE_GB=2 \
    --enforce-eager \
    --connector kvbm \
    --gpu-memory-utilization 0.4 \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081"}'
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}'
--- a/examples/backends/vllm/launch/agg_router.sh
+++ b/examples/backends/vllm/launch/agg_router.sh
@@ -24,7 +24,7 @@ CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
    --connector none \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080"}' &
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080","enable_kv_cache_events":true}' &
 VLLM_NIXL_SIDE_CHANNEL_PORT=20097 \
 CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \
@@ -32,4 +32,4 @@ CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
    --connector none \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081"}'
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}'
--- a/examples/backends/vllm/launch/disagg_router.sh
+++ b/examples/backends/vllm/launch/disagg_router.sh
@@ -24,14 +24,14 @@ CUDA_VISIBLE_DEVICES=0 python3 -m dynamo.vllm \
    --model $MODEL \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080"}'&
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20080","enable_kv_cache_events":true}'&
 VLLM_NIXL_SIDE_CHANNEL_PORT=20097 \
 CUDA_VISIBLE_DEVICES=1 python3 -m dynamo.vllm \
    --model $MODEL \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081"}' &
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20081","enable_kv_cache_events":true}' &
 # two prefill workers
 # When registered with --is-prefill-worker, these workers are automatically detected
@@ -42,7 +42,7 @@ CUDA_VISIBLE_DEVICES=2 python3 -m dynamo.vllm \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
    --is-prefill-worker \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20082"}'&
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20082","enable_kv_cache_events":true}'&
 VLLM_NIXL_SIDE_CHANNEL_PORT=20099 \
 CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.vllm \
@@ -50,4 +50,4 @@ CUDA_VISIBLE_DEVICES=3 python3 -m dynamo.vllm \
    --block-size $BLOCK_SIZE \
    --enforce-eager \
    --is-prefill-worker \
-    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20083"}'
+    --kv-events-config '{"publisher":"zmq","topic":"kv-events","endpoint":"tcp://*:20083","enable_kv_cache_events":true}'