remove --no-enable-chunked-prefill of v1

bdaaf39d · zhuwenwen · 06a1bee2 · bdaaf39d · bdaaf39d
Commit bdaaf39d authored Oct 13, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 13 deletions

vllm/config.py vllm/config.py +0 -5

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -8

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -419,10 +419,6 @@ class ModelConfig:
    override_attention_dtype: Optional[str] = None
    """Override dtype for attention"""

-    enable_chunked_prefill: Optional[bool] = None
-    """If True, prefill requests can be chunked based
-    on the remaining max_num_batched_tokens."""
-
    def compute_hash(self) -> str:
        """
        WARNING: Whenever a new field is added to this config,
@@ -452,7 +448,6 @@ class ModelConfig:
        factors.append(self.rope_theta)
        # hf_config can control how the model looks!
        factors.append(self.hf_config.to_json_string())
-        factors.append(self.enable_chunked_prefill)
        str_factors = str(factors)
        assert_hashable(str_factors)
        return hashlib.sha256(str(factors).encode()).hexdigest()

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1004,7 +1004,6 @@ class EngineArgs:
            enable_sleep_mode=self.enable_sleep_mode,
            model_impl=self.model_impl,
            override_attention_dtype=self.override_attention_dtype,
-            enable_chunked_prefill=self.enable_chunked_prefill,
        )

    def create_load_config(self) -> LoadConfig:
@@ -1594,9 +1593,6 @@ class EngineArgs:
        # For pooling tasks the default is False
        if model_config.runner_type != "pooling":
            self.enable_chunked_prefill = True
-            if model_config.enable_chunked_prefill is not None and \
-                model_config.enable_chunked_prefill is False:
-                self.enable_chunked_prefill = False
            if self.enable_prefix_caching is None:
                self.enable_prefix_caching = True
        else:
@@ -1611,10 +1607,6 @@ class EngineArgs:
            action = "Enabling" if \
                incremental_prefill_supported else "Disabling"

-            if model_config.enable_chunked_prefill is not None and \
-                model_config.enable_chunked_prefill is False:
-                self.enable_chunked_prefill = False
-
            if self.enable_chunked_prefill is None:
                self.enable_chunked_prefill = incremental_prefill_supported
                logger.info("(%s) chunked prefill by default", action)