"vllm/vscode:/vscode.git/clone" did not exist on "4f3676e72628ac067330e3acbf769d92afc2f7ea"
Commit 573531eb authored by zhuwenwen's avatar zhuwenwen
Browse files

support --no-enable-chunked-prefill of v1

parent 33f37e9f
......@@ -276,6 +276,9 @@ class ModelConfig:
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
"""[DEPRECATED] Use `pooler_config` instead. This field will be removed in
v0.12.0 or v1.0.0, whichever is sooner."""
enable_chunked_prefill: Optional[bool] = None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
# Multimodal config and init vars
multimodal_config: Optional[MultiModalConfig] = None
......@@ -320,6 +323,7 @@ class ModelConfig:
factors.append(self.rope_scaling)
factors.append(self.rope_theta)
factors.append(self.video_pruning_rate)
factors.append(self.enable_chunked_prefill)
# hf_config can control how the model looks!
try:
......
......@@ -1055,6 +1055,7 @@ class EngineArgs:
logits_processors=self.logits_processors,
video_pruning_rate=self.video_pruning_rate,
io_processor_plugin=self.io_processor_plugin,
enable_chunked_prefill=self.enable_chunked_prefill,
)
def validate_tensorizer_args(self):
......@@ -1560,6 +1561,10 @@ class EngineArgs:
# For pooling tasks the default is False
if model_config.runner_type != "pooling":
self.enable_chunked_prefill = True
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
# TODO: When prefix caching supports prompt embeds inputs, this
# check can be removed.
......@@ -1583,6 +1588,10 @@ class EngineArgs:
action = "Enabling" if \
incremental_prefill_supported else "Disabling"
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
if self.enable_chunked_prefill is None:
self.enable_chunked_prefill = incremental_prefill_supported
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment