Unverified Commit 208ce622 authored by Roger Wang's avatar Roger Wang Committed by GitHub
Browse files

[V1]Enable APC by default only for text models (#10148)


Signed-off-by: default avatarRoger Wang <ywang@roblox.com>
parent 1ff4aed5
...@@ -65,7 +65,10 @@ class LLMEngine: ...@@ -65,7 +65,10 @@ class LLMEngine:
elif usage_context == UsageContext.OPENAI_API_SERVER: elif usage_context == UsageContext.OPENAI_API_SERVER:
scheduler_config.max_num_seqs = 1024 scheduler_config.max_num_seqs = 1024
scheduler_config.max_num_batched_tokens = 2048 scheduler_config.max_num_batched_tokens = 2048
cache_config.enable_prefix_caching = True
# TODO (ywang96): Enable APC by default when VLM supports it.
if not model_config.is_multimodal_model:
cache_config.enable_prefix_caching = True
logger.info( logger.info(
"Initializing an LLM engine (v%s) with config: " "Initializing an LLM engine (v%s) with config: "
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment