[Cleanup] Remove no-longer-used `SpeculativeConfig.enable_chunked_prefill` (#27826)

Signed-off-by: Nick Hill <nhill@redhat.com>

[Cleanup] Remove no-longer-used `SpeculativeConfig.enable_chunked_prefill` (#27826)
Signed-off-by: Nick Hill <nhill@redhat.com>
9e5bd307 · Nick Hill · GitHub · fc16f1c4 · 9e5bd307 · 9e5bd307
Unverified Commit 9e5bd307 authored Oct 31, 2025 by Nick Hill Committed by GitHub Oct 31, 2025
Showing with 1 addition and 16 deletions

vllm/config/speculative.py vllm/config/speculative.py +0 -10

vllm/engine/arg_utils.py vllm/engine/arg_utils.py +0 -6

vllm/entrypoints/openai/api_server.py vllm/entrypoints/openai/api_server.py +1 -0

No files found.
--- a/vllm/config/speculative.py
+++ b/vllm/config/speculative.py
@@ -78,10 +78,6 @@ class SpeculativeConfig:
    draft_tensor_parallel_size: int | None = Field(default=None, ge=1)
    """The degree of the tensor parallelism for the draft model. Can only be 1
    or the same as the target model's tensor parallel size."""
-    disable_logprobs: bool = True
-    """If set to True, token log probabilities are not returned during
-    speculative decoding. If set to False, token log probabilities are returned
-    according to the log probability settings in SamplingParams."""

    # Draft model configuration
    quantization: me_quant.QuantizationMethods | None = None
@@ -126,12 +122,6 @@ class SpeculativeConfig:
    """The configuration of the target model."""
    target_parallel_config: SkipValidation[ParallelConfig] = None  # type: ignore
    """The parallel configuration for the target model."""
-    enable_chunked_prefill: SkipValidation[bool] = None  # type: ignore
-    """Whether vLLM is configured to use chunked prefill or not. Used for
-    raising an error since it's not yet compatible with speculative decode."""
-    disable_log_stats: SkipValidation[bool] = None  # type: ignore
-    """Whether to disable the periodic printing of stage times in speculative
-    decoding."""

    # params generated in the post-init stage
    draft_model_config: SkipValidation[ModelConfig] = None  # type: ignore

--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -1246,8 +1246,6 @@ class EngineArgs:
        self,
        target_model_config: ModelConfig,
        target_parallel_config: ParallelConfig,
-        enable_chunked_prefill: bool,
-        disable_log_stats: bool,
    ) -> SpeculativeConfig | None:
        """Initializes and returns a SpeculativeConfig object based on
        `speculative_config`.
@@ -1267,8 +1265,6 @@ class EngineArgs:
            {
                "target_model_config": target_model_config,
                "target_parallel_config": target_parallel_config,
-                "enable_chunked_prefill": enable_chunked_prefill,
-                "disable_log_stats": disable_log_stats,
            }
        )
        return SpeculativeConfig(**self.speculative_config)
@@ -1561,8 +1557,6 @@ class EngineArgs:
        speculative_config = self.create_speculative_config(
            target_model_config=model_config,
            target_parallel_config=parallel_config,
-            enable_chunked_prefill=self.enable_chunked_prefill,
-            disable_log_stats=self.disable_log_stats,
        )

        # make sure num_lookahead_slots is set appropriately depending on

--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -241,6 +241,7 @@ async def build_async_engine_client_from_engine_args(
        )

        # Don't keep the dummy data in memory
+        assert async_llm is not None
        await async_llm.reset_mm_cache()

        yield async_llm