Unverified Commit 3713eb61 authored by Jimmy's avatar Jimmy Committed by GitHub
Browse files

feat(reasoning): improve enable thinking from request (#10875)

parent 5937a56d
...@@ -64,6 +64,7 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -64,6 +64,7 @@ class OpenAIServingChat(OpenAIServingBase):
super().__init__(tokenizer_manager) super().__init__(tokenizer_manager)
self.template_manager = template_manager self.template_manager = template_manager
self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser self.tool_call_parser = self.tokenizer_manager.server_args.tool_call_parser
self.reasoning_parser = self.tokenizer_manager.server_args.reasoning_parser
def _request_id_prefix(self) -> str: def _request_id_prefix(self) -> str:
return "chatcmpl-" return "chatcmpl-"
...@@ -563,10 +564,7 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -563,10 +564,7 @@ class OpenAIServingChat(OpenAIServingBase):
stream_buffers[index] = stream_buffer + delta stream_buffers[index] = stream_buffer + delta
# Handle reasoning content # Handle reasoning content
if ( if self.reasoning_parser and request.separate_reasoning:
self.tokenizer_manager.server_args.reasoning_parser
and request.separate_reasoning
):
reasoning_text, delta = self._process_reasoning_stream( reasoning_text, delta = self._process_reasoning_stream(
index, delta, reasoning_parser_dict, content, request index, delta, reasoning_parser_dict, content, request
) )
...@@ -756,7 +754,7 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -756,7 +754,7 @@ class OpenAIServingChat(OpenAIServingBase):
# Handle reasoning content # Handle reasoning content
reasoning_text = None reasoning_text = None
reasoning_parser = self.tokenizer_manager.server_args.reasoning_parser reasoning_parser = self.reasoning_parser
if reasoning_parser and request.separate_reasoning: if reasoning_parser and request.separate_reasoning:
is_force_reasoning = ( is_force_reasoning = (
self.template_manager.force_reasoning self.template_manager.force_reasoning
...@@ -1010,7 +1008,7 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -1010,7 +1008,7 @@ class OpenAIServingChat(OpenAIServingBase):
or self._get_enable_thinking_from_request(request) or self._get_enable_thinking_from_request(request)
) )
reasoning_parser_dict[index] = ReasoningParser( reasoning_parser_dict[index] = ReasoningParser(
self.tokenizer_manager.server_args.reasoning_parser, self.reasoning_parser,
request.stream_reasoning, request.stream_reasoning,
is_force_reasoning, is_force_reasoning,
) )
...@@ -1050,11 +1048,11 @@ class OpenAIServingChat(OpenAIServingBase): ...@@ -1050,11 +1048,11 @@ class OpenAIServingChat(OpenAIServingBase):
""" """
if hasattr(request, "chat_template_kwargs") and request.chat_template_kwargs: if hasattr(request, "chat_template_kwargs") and request.chat_template_kwargs:
# For Qwen3 models, `enable_thinking` is supported. # For Qwen3 models, `enable_thinking` is supported.
if request.chat_template_kwargs.get("enable_thinking") is not None: if self.reasoning_parser in ["qwen3", "glm45"]:
return request.chat_template_kwargs.get("enable_thinking") return request.chat_template_kwargs.get("enable_thinking", False)
# For DeepSeek-V3.1 models, `thinking` is supported. # For DeepSeek-V3.1 models, `thinking` is supported.
elif request.chat_template_kwargs.get("thinking") is not None: elif self.reasoning_parser in ["deepseek-v3"]:
return request.chat_template_kwargs.get("thinking") return request.chat_template_kwargs.get("thinking", False)
else: else:
return False return False
return False return False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment