feat: disable kv events in vLLM when lora is enabled (#4128)

7802f969 · Biswa Panda · GitHub · 5c46fa4c · 7802f969 · 7802f969
Unverified Commit 7802f969 authored Nov 10, 2025 by Biswa Panda Committed by GitHub Nov 10, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 0 deletions

components/src/dynamo/vllm/args.py components/src/dynamo/vllm/args.py +18 -0

components/src/dynamo/vllm/main.py components/src/dynamo/vllm/main.py +3 -0

No files found.
--- a/components/src/dynamo/vllm/args.py
+++ b/components/src/dynamo/vllm/args.py
@@ -326,6 +326,24 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
    if not config.engine_args.enable_prefix_caching:
        return None
+    # There is a bug with KV events publishing when LORA is enabled.
+    # This is fixed in https://github.com/vllm-project/vllm/pull/27728 but not released yet.
+    # remove below check once new vLLM version is released with the fix.
+    if config.engine_args.enable_lora:
+        if config.engine_args.kv_events_config is None:
+            # No explicit kv events config provided by user, we'll disable kv cache because LoRA is enabled and its not supported yet.
+            return None
+        else:
+            # User provided their own kv events config and it'll not work when LoRA is enabled.
+            message = (
+                "KV events doesn't work when LoRA is enabled due to upstream vLLM bug. "
+                "Please see https://github.com/vllm-project/vllm/pull/27728."
+                "For now, either disable lora or dont use explicit kv envents config."
+                "Dont set both --kv-events-config and --enable-lora in vllm command line args."
+            )
+            logger.error(message)
+            raise ValueError(message)
    # If user provided their own config, use that
    if c := getattr(config.engine_args, "kv_events_config"):
        logger.info(f"Using user-provided kv_events_config {c}")

--- a/components/src/dynamo/vllm/main.py
+++ b/components/src/dynamo/vllm/main.py
@@ -153,6 +153,9 @@ def setup_kv_event_publisher(
        logger.info("Skipping KV event publisher setup for decode worker")
        return None
+    if config.engine_args.kv_events_config is None:
+        return None
    # Get data_parallel_size to create publishers for all dp_ranks
    data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
    kv_publishers = []