[Bugfix] Fix negative max_tokens when input prompt is too long (#36789)

Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>

[Bugfix] Fix negative max_tokens when input prompt is too long (#36789)
Signed-off-by: Isotr0py <mozf@mail2.sysu.edu.cn>
c84b519c · Isotr0py · GitHub · 741ecf06 · c84b519c · c84b519c
Unverified Commit c84b519c authored Mar 12, 2026 by Isotr0py Committed by GitHub Mar 11, 2026
Show whitespace changes
Inline Side-by-side

Showing with 19 additions and 0 deletions

tests/entrypoints/test_utils.py tests/entrypoints/test_utils.py +14 -0

vllm/entrypoints/utils.py vllm/entrypoints/utils.py +5 -0

No files found.
--- a/tests/entrypoints/test_utils.py
+++ b/tests/entrypoints/test_utils.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
 from vllm.entrypoints.utils import get_max_tokens, sanitize_message
@@ -80,3 +82,15 @@ class TestGetMaxTokens:
            default_sampling_params={"max_tokens": 2048},
        )
        assert result == 512
+    def test_input_length_exceeds_max_model_len(self):
+        with pytest.raises(
+            ValueError,
+            match="Input length .* exceeds model's maximum context length .*",
+        ):
+            get_max_tokens(
+                max_model_len=100,
+                max_tokens=50,
+                input_length=150,
+                default_sampling_params={"max_tokens": 2048},
+            )
--- a/vllm/entrypoints/utils.py
+++ b/vllm/entrypoints/utils.py
@@ -178,6 +178,11 @@ def get_max_tokens(
    default_sampling_params: dict,
    override_max_tokens: int | None = None,
 ) -> int:
+    if max_model_len < input_length:
+        raise ValueError(
+            f"Input length ({input_length}) exceeds model's maximum "
+            f"context length ({max_model_len})."
+        )
    model_max_tokens = max_model_len - input_length
    platform_max_tokens = current_platform.get_max_output_tokens(input_length)
    fallback_max_tokens = (