remove --no-enable-chunked-prefill of v1

2cf181fd · zhuwenwen · 6791e80c · 2cf181fd · 2cf181fd
Commit 2cf181fd authored Oct 13, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 14 deletions

vllm/config/model.py vllm/config/model.py +0 -4

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -10

No files found.
--- a/vllm/config/model.py
+++ b/vllm/config/model.py
@@ -276,9 +276,6 @@ class ModelConfig:
    override_pooler_config: Optional[Union[dict, PoolerConfig]] = None
    """[DEPRECATED] Use `pooler_config` instead. This field will be removed in
    v0.12.0 or v1.0.0, whichever is sooner."""
-    enable_chunked_prefill: Optional[bool] = None
-    """If True, prefill requests can be chunked based
-    on the remaining max_num_batched_tokens."""

    # Multimodal config and init vars
    multimodal_config: Optional[MultiModalConfig] = None
@@ -323,7 +320,6 @@ class ModelConfig:
        factors.append(self.rope_scaling)
        factors.append(self.rope_theta)
        factors.append(self.video_pruning_rate)
-        factors.append(self.enable_chunked_prefill)

        # hf_config can control how the model looks!
        try:

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1055,7 +1055,6 @@ class EngineArgs:
            logits_processors=self.logits_processors,
            video_pruning_rate=self.video_pruning_rate,
            io_processor_plugin=self.io_processor_plugin,
-            enable_chunked_prefill=self.enable_chunked_prefill,
        )

    def validate_tensorizer_args(self):
@@ -1562,11 +1561,6 @@ class EngineArgs:
        if model_config.runner_type != "pooling":
            self.enable_chunked_prefill = True

-            if model_config.enable_chunked_prefill is not None and \
-                model_config.enable_chunked_prefill is False:
-                self.enable_chunked_prefill = False
-
-
            # TODO: When prefix caching supports prompt embeds inputs, this
            # check can be removed.
            if (self.enable_prompt_embeds
@@ -1590,10 +1584,6 @@ class EngineArgs:
            action = "Enabling" if \
                incremental_prefill_supported else "Disabling"

-            if model_config.enable_chunked_prefill is not None and \
-                model_config.enable_chunked_prefill is False:
-                self.enable_chunked_prefill = False
-
            if self.enable_chunked_prefill is None:
                self.enable_chunked_prefill = incremental_prefill_supported
                logger.info("(%s) chunked prefill by default", action)