Commit 7cf75628 authored by tlipoca9's avatar tlipoca9 Committed by GitHub
Browse files

chore: check prefix caching while using kv router (#592)

Neelay approved merge
parent 7662de1c
......@@ -80,6 +80,12 @@ class VllmWorker:
self.engine_args.pipeline_parallel_size = 1
if self.engine_args.router == "kv":
if not self.engine_args.enable_prefix_caching:
logger.info(
"When using KV router, prefix caching must be enabled, setting to True"
)
self.engine_args.enable_prefix_caching = True
VLLM_WORKER_ID = dynamo_context["endpoints"][0].lease_id()
os.environ["VLLM_WORKER_ID"] = str(VLLM_WORKER_ID)
os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment