Unverified Commit 0202971a authored by Chauncey's avatar Chauncey Committed by GitHub
Browse files

[Frontend] Support GLM-4.5 / GLM-4.7 with enable_thinking: false (#31788)


Signed-off-by: default avatarchaunceyjiang <chaunceyjiang@gmail.com>
parent 2c1a4f24
...@@ -24,9 +24,9 @@ class DeepSeekV3ReasoningParser(ReasoningParser): ...@@ -24,9 +24,9 @@ class DeepSeekV3ReasoningParser(ReasoningParser):
def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs):
super().__init__(tokenizer, *args, **kwargs) super().__init__(tokenizer, *args, **kwargs)
chat_kwargs = kwargs.pop("chat_template_kwargs", {}) or {} chat_kwargs = kwargs.get("chat_template_kwargs", {}) or {}
thinking = bool(chat_kwargs.pop("thinking", False)) thinking = bool(chat_kwargs.get("thinking", False))
enable_thinking = bool(chat_kwargs.pop("enable_thinking", False)) enable_thinking = bool(chat_kwargs.get("enable_thinking", False))
thinking = thinking or enable_thinking thinking = thinking or enable_thinking
if thinking: if thinking:
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from vllm.reasoning.holo2_reasoning_parser import Holo2ReasoningParser
class Glm4MoeModelReasoningParser(DeepSeekR1ReasoningParser): class Glm4MoeModelReasoningParser(Holo2ReasoningParser):
""" """
Reasoning parser for the Glm4MoeModel model is same as DeepSeekR1ReasoningParser. Reasoning parser for the Glm4MoeModel model,which inherits from
`Holo2ReasoningParser`.
""" """
pass pass
...@@ -46,9 +46,10 @@ class Holo2ReasoningParser(ReasoningParser): ...@@ -46,9 +46,10 @@ class Holo2ReasoningParser(ReasoningParser):
# all requests in the structured output manager. So it is important that without # all requests in the structured output manager. So it is important that without
# user specified chat template args, the default thinking is True. # user specified chat template args, the default thinking is True.
enable_thinking = bool(chat_kwargs.get("thinking", True)) thinking = bool(chat_kwargs.get("thinking", True))
enable_thinking = bool(chat_kwargs.get("enable_thinking", True))
if enable_thinking: thinking = thinking and enable_thinking
if thinking:
self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs) self._parser = DeepSeekR1ReasoningParser(tokenizer, *args, **kwargs)
else: else:
self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs) self._parser = IdentityReasoningParser(tokenizer, *args, **kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment