[Bugfix] Fix the max_seq_len limit of 16384 for DeepSeek models (#20322)

Signed-off-by: Wang Huaqiang <huaqiang.wang@intel.com>

[Bugfix] Fix the max_seq_len limit of 16384 for DeepSeek models (#20322)
Signed-off-by: Wang Huaqiang <huaqiang.wang@intel.com>
ccbfb1d1 · WangHuaqiang · GitHub · 9e5552aa · ccbfb1d1 · ccbfb1d1
Unverified Commit ccbfb1d1 authored Jul 02, 2025 by WangHuaqiang Committed by GitHub Jul 02, 2025
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 4 deletions

tests/test_config.py tests/test_config.py +2 -0

vllm/config.py vllm/config.py +11 -4

No files found.
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -412,6 +412,8 @@ def test_load_config_pt_load_map_location(pt_load_map_location):
        ("BAAI/bge-reranker-base", None, 512, False),
        ("BAAI/bge-reranker-base", 256, 256, False),
        ("BAAI/bge-reranker-base", 513, 512, True),
+        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", None, 131072, False),
+        ("deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", 131073, 131072, True),
    ])
 def test_get_and_verify_max_len(model_id, max_model_len, expected_max_len,
                                should_raise):

--- a/vllm/config.py
+++ b/vllm/config.py
@@ -1442,6 +1442,13 @@ class ModelConfig:
        return getattr(self.hf_config, "matryoshka_dimensions", None)

    def get_and_verify_max_len(self, max_model_len: int):
+        # For pooling models, the tokenizer's `model_max_length` is often a
+        # reliable source for the maximum sequence length. However, for
+        # generative models, this can be incorrect and unduly limit the
+        # context window (e.g., DeepSeek-R1). Therefore, we only consider
+        # tokenizer_config for pooling models.
+        tokenizer_config = None
+        if self.runner_type == "pooling":
            tokenizer_config = try_get_tokenizer_config(
                self.tokenizer,
                trust_remote_code=self.trust_remote_code,