Unverified Commit 7802f969 authored by Biswa Panda's avatar Biswa Panda Committed by GitHub
Browse files

feat: disable kv events in vLLM when lora is enabled (#4128)

parent 5c46fa4c
...@@ -326,6 +326,24 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]: ...@@ -326,6 +326,24 @@ def create_kv_events_config(config: Config) -> Optional[KVEventsConfig]:
if not config.engine_args.enable_prefix_caching: if not config.engine_args.enable_prefix_caching:
return None return None
# There is a bug with KV events publishing when LORA is enabled.
# This is fixed in https://github.com/vllm-project/vllm/pull/27728 but not released yet.
# remove below check once new vLLM version is released with the fix.
if config.engine_args.enable_lora:
if config.engine_args.kv_events_config is None:
# No explicit kv events config provided by user, we'll disable kv cache because LoRA is enabled and its not supported yet.
return None
else:
# User provided their own kv events config and it'll not work when LoRA is enabled.
message = (
"KV events doesn't work when LoRA is enabled due to upstream vLLM bug. "
"Please see https://github.com/vllm-project/vllm/pull/27728."
"For now, either disable lora or dont use explicit kv envents config."
"Dont set both --kv-events-config and --enable-lora in vllm command line args."
)
logger.error(message)
raise ValueError(message)
# If user provided their own config, use that # If user provided their own config, use that
if c := getattr(config.engine_args, "kv_events_config"): if c := getattr(config.engine_args, "kv_events_config"):
logger.info(f"Using user-provided kv_events_config {c}") logger.info(f"Using user-provided kv_events_config {c}")
......
...@@ -153,6 +153,9 @@ def setup_kv_event_publisher( ...@@ -153,6 +153,9 @@ def setup_kv_event_publisher(
logger.info("Skipping KV event publisher setup for decode worker") logger.info("Skipping KV event publisher setup for decode worker")
return None return None
if config.engine_args.kv_events_config is None:
return None
# Get data_parallel_size to create publishers for all dp_ranks # Get data_parallel_size to create publishers for all dp_ranks
data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1) data_parallel_size = getattr(vllm_config.parallel_config, "data_parallel_size", 1)
kv_publishers = [] kv_publishers = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment