Unverified Commit b1f3e189 authored by Cody Yu's avatar Cody Yu Committed by GitHub
Browse files

[MISC] Keep chunked prefill enabled by default with long context when prefix...

[MISC] Keep chunked prefill enabled by default with long context when prefix caching is enabled (#8342)
parent 04e7c4e7
...@@ -878,7 +878,6 @@ class EngineArgs: ...@@ -878,7 +878,6 @@ class EngineArgs:
if (is_gpu and not use_sliding_window and not use_spec_decode if (is_gpu and not use_sliding_window and not use_spec_decode
and not self.enable_lora and not self.enable_lora
and not self.enable_prompt_adapter and not self.enable_prompt_adapter
and not self.enable_prefix_caching
and not has_seqlen_agnostic_layers): and not has_seqlen_agnostic_layers):
self.enable_chunked_prefill = True self.enable_chunked_prefill = True
logger.warning( logger.warning(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment