Commit ea0ccfe6 authored by zhuwenwen's avatar zhuwenwen
Browse files

support --no-enable-prefix-caching

parent 7a5df8f7
......@@ -470,6 +470,9 @@ class ModelConfig:
logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None
"""One or more logits processors' fully-qualified class names or class
definitions"""
enable_chunked_prefill: Optional[bool] = None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
def compute_hash(self) -> str:
"""
......@@ -500,6 +503,7 @@ class ModelConfig:
factors.append(self.rope_theta)
# hf_config can control how the model looks!
factors.append(self.hf_config.to_json_string())
factors.append(self.enable_chunked_prefill)
str_factors = str(factors)
assert_hashable(str_factors)
return hashlib.sha256(str(factors).encode()).hexdigest()
......
......@@ -1590,6 +1590,9 @@ class EngineArgs:
# For pooling tasks the default is False
if model_config.runner_type != "pooling":
self.enable_chunked_prefill = True
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
if self.enable_prefix_caching is None:
self.enable_prefix_caching = True
else:
......@@ -1603,6 +1606,10 @@ class EngineArgs:
action = "Enabling" if \
incremental_prefill_supported else "Disabling"
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
if self.enable_chunked_prefill is None:
self.enable_chunked_prefill = incremental_prefill_supported
logger.info("(%s) chunked prefill by default", action)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment