[Deprecation] Deprecate code in 0.17 as scheduled (#35441)

Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>

[Deprecation] Deprecate code in 0.17 as scheduled (#35441)
Signed-off-by: yewentao256 <zhyanwentao@126.com> Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
e113a301 · Wentao Ye · GitHub · 1dafb29f · e113a301 · e113a301
Unverified Commit e113a301 authored Feb 28, 2026 by Wentao Ye Committed by GitHub Feb 28, 2026
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 28 deletions

vllm/sampling_params.py vllm/sampling_params.py +1 -17

vllm/v1/engine/input_processor.py vllm/v1/engine/input_processor.py +0 -11

No files found.
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -7,7 +7,7 @@ import json as json_mod
 from dataclasses import field
 from enum import Enum, IntEnum
 from functools import cached_property
-from typing import Annotated, Any
+from typing import Any
 import msgspec
 from pydantic.dataclasses import dataclass
@@ -209,10 +209,6 @@ class SamplingParams(
    """Whether to add spaces between special tokens in the output."""
    include_stop_str_in_output: bool = False
    """Whether to include the stop strings in output text."""
-    truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None
-    """If set to -1, will use the truncation size supported by the model. If
-    set to an integer k, will use only the last k tokens from the prompt
-    (i.e., left truncation). If set to `None`, truncation is disabled."""
    output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE
    skip_clone: bool = False
    """Internal flag indicating that this SamplingParams instance is safe to
@@ -273,7 +269,6 @@ class SamplingParams(
        detokenize: bool = True,
        skip_special_tokens: bool = True,
        spaces_between_special_tokens: bool = True,
-        truncate_prompt_tokens: Annotated[int, msgspec.Meta(ge=-1)] | None = None,
        output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE,
        structured_outputs: StructuredOutputsParams | None = None,
        logit_bias: dict[int, float] | dict[str, float] | None = None,
@@ -313,7 +308,6 @@ class SamplingParams(
            detokenize=detokenize,
            skip_special_tokens=skip_special_tokens,
            spaces_between_special_tokens=spaces_between_special_tokens,
-            truncate_prompt_tokens=truncate_prompt_tokens,
            output_kind=output_kind,
            structured_outputs=structured_outputs,
            logit_bias=logit_bias,
@@ -449,15 +443,6 @@ class SamplingParams(
                parameter="prompt_logprobs",
                value=self.prompt_logprobs,
            )
-        if self.truncate_prompt_tokens is not None and (
-            self.truncate_prompt_tokens == 0 or self.truncate_prompt_tokens < -1
-        ):
-            raise VLLMValidationError(
-                f"truncate_prompt_tokens must be an integer >= 1 or -1, "
-                f"got {self.truncate_prompt_tokens}",
-                parameter="truncate_prompt_tokens",
-                value=self.truncate_prompt_tokens,
-            )
        assert isinstance(self.stop_token_ids, list)
        if not all(isinstance(st_id, int) for st_id in self.stop_token_ids):
            raise ValueError(
@@ -835,7 +820,6 @@ class SamplingParams(
            f"skip_special_tokens={self.skip_special_tokens}, "
            "spaces_between_special_tokens="
            f"{self.spaces_between_special_tokens}, "
-            f"truncate_prompt_tokens={self.truncate_prompt_tokens}, "
            f"structured_outputs={self.structured_outputs}, "
            f"extra_args={self.extra_args})"
        )

--- a/vllm/v1/engine/input_processor.py
+++ b/vllm/v1/engine/input_processor.py
@@ -2,7 +2,6 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import time
-import warnings
 from collections.abc import Mapping
 from typing import Any, Literal
@@ -114,16 +113,6 @@ class InputProcessor:
        supported_tasks: tuple[SupportedTask, ...],
    ) -> None:
        """Raise `ValueError` if SamplingParams or PoolingParams is not valid."""
-        if params.truncate_prompt_tokens is not None:
-            params_type = type(params).__name__
-            warnings.warn(
-                f"The `truncate_prompt_tokens` parameter in `{params_type}` "
-                "is deprecated and will be removed in v0.17. "
-                "Please pass it via `tokenization_kwargs` instead.",
-                DeprecationWarning,
-                stacklevel=2,
-            )
        if isinstance(params, SamplingParams):
            supported_generation_tasks = [
                task for task in supported_tasks if task in GENERATION_TASKS