Commit 573531eb authored by zhuwenwen's avatar zhuwenwen
Browse files

support --no-enable-chunked-prefill of v1

parent 33f37e9f
......@@ -276,6 +276,9 @@ class ModelConfig:
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
"""[DEPRECATED] Use `pooler_config` instead. This field will be removed in
v0.12.0 or v1.0.0, whichever is sooner."""
enable_chunked_prefill: Optional[bool] = None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
# Multimodal config and init vars
multimodal_config: Optional[MultiModalConfig] = None
......@@ -320,6 +323,7 @@ class ModelConfig:
factors.append(self.rope_scaling)
factors.append(self.rope_theta)
factors.append(self.video_pruning_rate)
factors.append(self.enable_chunked_prefill)
# hf_config can control how the model looks!
try:
......
......@@ -1055,6 +1055,7 @@ class EngineArgs:
logits_processors=self.logits_processors,
video_pruning_rate=self.video_pruning_rate,
io_processor_plugin=self.io_processor_plugin,
enable_chunked_prefill=self.enable_chunked_prefill,
)
def validate_tensorizer_args(self):
......@@ -1561,6 +1562,10 @@ class EngineArgs:
if model_config.runner_type != "pooling":
self.enable_chunked_prefill = True
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
# TODO: When prefix caching supports prompt embeds inputs, this
# check can be removed.
if (self.enable_prompt_embeds
......@@ -1584,6 +1589,10 @@ class EngineArgs:
action = "Enabling" if \
incremental_prefill_supported else "Disabling"
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
if self.enable_chunked_prefill is None:
self.enable_chunked_prefill = incremental_prefill_supported
logger.info("(%s) chunked prefill by default", action)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment