[Frontend] Support GLM-4.5 / GLM-4.7 with enable_thinking: false (#31788)

Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>

[Frontend] Support GLM-4.5 / GLM-4.7 with enable_thinking: false (#31788)
Signed-off-by: chaunceyjiang <chaunceyjiang@gmail.com>
0202971a · Chauncey · GitHub · 2c1a4f24 · 0202971a · 0202971a
Unverified Commit 0202971a authored Jan 06, 2026 by Chauncey Committed by GitHub Jan 06, 2026
3 changed files
--- a/vllm/reasoning/deepseek_v3_reasoning_parser.py
+++ b/vllm/reasoning/deepseek_v3_reasoning_parser.py
@@ -24,9 +24,9 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
    def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
        super().__init__(tokenizer, *args, **kwargs)
-        chat_kwargs = kwargs.pop("chat_template_kwargs", {}) or {}
+        chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {}
-        thinking = bool(chat_kwargs.pop("thinking", False))
+        thinking = bool(chat_kwargs.get("thinking", False))
-        enable_thinking = bool(chat_kwargs.pop("enable_thinking", False))
+        enable_thinking = bool(chat_kwargs.get("enable_thinking", False))
        thinking = thinking or enable_thinking
        if thinking:

--- a/vllm/reasoning/glm4_moe_reasoning_parser.py
+++ b/vllm/reasoning/glm4_moe_reasoning_parser.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
+from vllm.reasoning.holo2_reasoning_parser import Holo2ReasoningParser
-class Glm4MoeModelReasoningParser(DeepSeekR1ReasoningParser):
+class Glm4MoeModelReasoningParser(Holo2ReasoningParser):
    """
-    Reasoning parser for the Glm4MoeModel model is same as DeepSeekR1ReasoningParser.
+    Reasoning parser for the Glm4MoeModel model,which inherits from
+    `Holo2ReasoningParser`.
    """
    pass
--- a/vllm/reasoning/holo2_reasoning_parser.py
+++ b/vllm/reasoning/holo2_reasoning_parser.py
@@ -46,9 +46,10 @@ class Holo2ReasoningParser(ReasoningParser):
        # all requests in the structured output manager. So it is important that without
        # user specified chat template args, the default thinking is True.
-        enable_thinking = bool(chat_kwargs.get("thinking", True))
+        thinking = bool(chat_kwargs.get("thinking", True))
+        enable_thinking = bool(chat_kwargs.get("enable_thinking", True))
-        if enable_thinking:
+        thinking = thinking and enable_thinking
+        if thinking:
            self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs)
        else:
            self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)