Commit 2cf181fd authored by zhuwenwen's avatar zhuwenwen
Browse files

remove --no-enable-chunked-prefill of v1

parent 6791e80c
...@@ -276,9 +276,6 @@ class ModelConfig: ...@@ -276,9 +276,6 @@ class ModelConfig:
override_pooler_config: Optional[Union[dict, PoolerConfig]] = None override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
"""[DEPRECATED] Use `pooler_config` instead. This field will be removed in """[DEPRECATED] Use `pooler_config` instead. This field will be removed in
v0.12.0 or v1.0.0, whichever is sooner.""" v0.12.0 or v1.0.0, whichever is sooner."""
enable_chunked_prefill: Optional[bool] = None
"""If True, prefill requests can be chunked based
on the remaining max_num_batched_tokens."""
# Multimodal config and init vars # Multimodal config and init vars
multimodal_config: Optional[MultiModalConfig] = None multimodal_config: Optional[MultiModalConfig] = None
...@@ -323,7 +320,6 @@ class ModelConfig: ...@@ -323,7 +320,6 @@ class ModelConfig:
factors.append(self.rope_scaling) factors.append(self.rope_scaling)
factors.append(self.rope_theta) factors.append(self.rope_theta)
factors.append(self.video_pruning_rate) factors.append(self.video_pruning_rate)
factors.append(self.enable_chunked_prefill)
# hf_config can control how the model looks! # hf_config can control how the model looks!
try: try:
......
...@@ -1055,7 +1055,6 @@ class EngineArgs: ...@@ -1055,7 +1055,6 @@ class EngineArgs:
logits_processors=self.logits_processors, logits_processors=self.logits_processors,
video_pruning_rate=self.video_pruning_rate, video_pruning_rate=self.video_pruning_rate,
io_processor_plugin=self.io_processor_plugin, io_processor_plugin=self.io_processor_plugin,
enable_chunked_prefill=self.enable_chunked_prefill,
) )
def validate_tensorizer_args(self): def validate_tensorizer_args(self):
...@@ -1562,11 +1561,6 @@ class EngineArgs: ...@@ -1562,11 +1561,6 @@ class EngineArgs:
if model_config.runner_type != "pooling": if model_config.runner_type != "pooling":
self.enable_chunked_prefill = True self.enable_chunked_prefill = True
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
# TODO: When prefix caching supports prompt embeds inputs, this # TODO: When prefix caching supports prompt embeds inputs, this
# check can be removed. # check can be removed.
if (self.enable_prompt_embeds if (self.enable_prompt_embeds
...@@ -1589,10 +1583,6 @@ class EngineArgs: ...@@ -1589,10 +1583,6 @@ class EngineArgs:
action = "Enabling" if \ action = "Enabling" if \
incremental_prefill_supported else "Disabling" incremental_prefill_supported else "Disabling"
if model_config.enable_chunked_prefill is not None and \
model_config.enable_chunked_prefill is False:
self.enable_chunked_prefill = False
if self.enable_chunked_prefill is None: if self.enable_chunked_prefill is None:
self.enable_chunked_prefill = incremental_prefill_supported self.enable_chunked_prefill = incremental_prefill_supported
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment