Unverified Commit ceade195 authored by Jared Wen's avatar Jared Wen Committed by GitHub
Browse files

[BugFix] Support custom tool parsers when tool_choice is `required` and named function (#39870)


Signed-off-by: default avatarJaredforReal <w13431838023@gmail.com>
Signed-off-by: default avatarsfeng33 <4florafeng@gmail.com>
Co-authored-by: default avatarsfeng33 <4florafeng@gmail.com>
parent 747256bb
......@@ -557,6 +557,20 @@ class OpenAIServingChat(OpenAIServing):
and self._should_stream_with_auto_tool_parsing(request)
)
# Determine whether required/named tool_choice should fall back to
# the auto tool_parser path instead of the standard JSON-based parsing.
# This happens when the parser declares supports_required_and_named=False
# (e.g. GLM models that output XML instead of JSON).
tool_choice_uses_parser = (
self.tool_parser is not None
and not self.tool_parser.supports_required_and_named
and request.tools
and (
request.tool_choice == "required"
or isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
)
)
all_previous_token_ids: list[list[int]] | None
function_name_returned = [False] * num_choices
if self.tool_call_id_type == "kimi_k2":
......@@ -569,7 +583,12 @@ class OpenAIServingChat(OpenAIServing):
# Only one of these will be used, thus previous_texts and
# all_previous_token_ids will not be used twice in the same iteration.
if is_mistral_grammar_path or tool_choice_auto or reasoning_parser:
if (
is_mistral_grammar_path
or tool_choice_auto
or tool_choice_uses_parser
or reasoning_parser
):
# These are only required in "auto" tool choice case
all_previous_token_ids = [[] for _ in range(num_choices)]
reasoning_end_arr = [False] * num_choices
......@@ -764,7 +783,12 @@ class OpenAIServingChat(OpenAIServing):
delta_message: DeltaMessage | None
# just update previous_texts and previous_token_ids
if is_mistral_grammar_path or tool_choice_auto or reasoning_parser:
if (
is_mistral_grammar_path
or tool_choice_auto
or tool_choice_uses_parser
or reasoning_parser
):
assert previous_texts is not None
assert all_previous_token_ids is not None
previous_text = previous_texts[i]
......@@ -813,7 +837,9 @@ class OpenAIServingChat(OpenAIServing):
if result.tools_called:
tools_streamed[i] = True
# handle streaming deltas for tools with named tool_choice
elif tool_choice_function_name:
# Skip when tool_choice_uses_parser so it falls through
# to the auto tool_parser branches below.
elif tool_choice_function_name and not tool_choice_uses_parser:
# When encountering think end id in prompt_token_ids
# i.e {"enable_thinking": False},
# check BEFORE calling the parser to avoid a spurious
......@@ -851,7 +877,6 @@ class OpenAIServingChat(OpenAIServing):
):
reasoning_end_arr[i] = True
if delta_message and delta_message.content:
# This need to be added to next `delta_text`
current_text = delta_message.content
delta_message.content = None
else:
......@@ -896,7 +921,12 @@ class OpenAIServingChat(OpenAIServing):
)
tools_streamed[i] = True
elif request.tool_choice == "required":
# Skip when tool_choice_uses_parser so it falls through
# to the auto tool_parser branches below.
elif (
request.tool_choice == "required"
and not tool_choice_uses_parser
):
assert previous_texts is not None
previous_text = previous_texts[i]
current_text = previous_text + delta_text
......@@ -966,7 +996,10 @@ class OpenAIServingChat(OpenAIServing):
# update the previous values for the next iteration
if (
is_mistral_grammar_path or tool_choice_auto or reasoning_parser
is_mistral_grammar_path
or tool_choice_auto
or tool_choice_uses_parser
or reasoning_parser
) and not self.use_harmony:
assert previous_texts is not None
assert all_previous_token_ids is not None
......
......@@ -627,7 +627,7 @@ class OpenAIServing:
and isinstance(request.tool_choice, ToolChoiceFunction)
):
assert content is not None
# Forced Function Call
# Forced Function Call (Responses API)
function_calls.append(
FunctionCall(name=request.tool_choice.name, arguments=content)
)
......@@ -636,14 +636,20 @@ class OpenAIServing:
not use_mistral_tool_parser
and request.tool_choice
and isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam)
and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
):
# Named function with standard JSON-based parsing
assert content is not None
# Forced Function Call
function_calls.append(
FunctionCall(name=request.tool_choice.function.name, arguments=content)
)
content = None # Clear content since tool is called.
elif not use_mistral_tool_parser and request.tool_choice == "required":
elif (
not use_mistral_tool_parser
and request.tool_choice == "required"
and (tool_parser_cls is None or tool_parser_cls.supports_required_and_named)
):
# "required" with standard JSON-based parsing
tool_calls = []
with contextlib.suppress(ValidationError):
content = content or ""
......@@ -662,15 +668,30 @@ class OpenAIServing:
use_mistral_tool_parser
or (
enable_auto_tools
and (request.tool_choice == "auto" or request.tool_choice is None)
and (
request.tool_choice == "auto"
or request.tool_choice is None
or (
not tool_parser_cls.supports_required_and_named
and request.tools
and (
request.tool_choice == "required"
or isinstance(
request.tool_choice,
ChatCompletionNamedToolChoiceParam,
)
)
)
)
)
):
# Automatic Tool Call Parsing (also used as fallback for
# required/named when supports_required_and_named=False)
if tokenizer is None:
raise ValueError(
"Tokenizer not available when `skip_tokenizer_init=True`"
)
# Automatic Tool Call Parsing
try:
tool_parser = tool_parser_cls(tokenizer, request.tools)
except RuntimeError as e:
......
......@@ -44,6 +44,17 @@ class ToolParser:
derived classes.
"""
# When True (default), the serving layer uses the standard JSON-based
# parsing for tool_choice="required" and named function tool_choice,
# which works for models where guided decoding produces well-formed
# JSON output (e.g. Hermes).
# Subclasses set False when the standard parsing does not work for
# their model's output format (e.g. GLM models that use XML). When
# False, the serving layer falls back to the tool_parser's
# extract_tool_calls / extract_tool_calls_streaming methods for
# required/named tool_choice, treating them the same as "auto".
supports_required_and_named: bool = True
def __init__(
self,
tokenizer: TokenizerLike,
......
......@@ -23,6 +23,8 @@ logger = init_logger(__name__)
class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
supports_required_and_named = False
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
# GLM-4.7 format: <tool_call>func_name[<arg_key>...]*</tool_call>
......
......@@ -20,6 +20,7 @@ import regex as re
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionNamedToolChoiceParam,
ChatCompletionRequest,
)
from vllm.entrypoints.openai.engine.protocol import (
......@@ -50,6 +51,8 @@ class Glm4MoeModelToolParser(ToolParser):
call, and diffs against what was previously sent to emit only new content.
"""
supports_required_and_named = False
def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
super().__init__(tokenizer, tools)
# Stateful streaming fields
......@@ -156,7 +159,25 @@ class Glm4MoeModelToolParser(ToolParser):
def adjust_request(
self, request: ChatCompletionRequest | ResponsesRequest
) -> ChatCompletionRequest | ResponsesRequest:
"""Adjust request parameters for tool call token handling."""
"""Adjust request parameters for tool call token handling.
For required/named tool_choice, skip setting structured_outputs
because GLM models output tool calls in XML format (per chat
template). Guided decoding would force JSON output, conflicting
with the XML format and causing parsing failures.
"""
if request.tools:
tc = request.tool_choice
if tc == "required" or isinstance(tc, ChatCompletionNamedToolChoiceParam):
# Do NOT call super().adjust_request() for required/named,
# because it would set structured_outputs and force JSON
# output via guided decoding. GLM models use XML tool-call
# syntax (defined in the chat template), so guided decoding
# must be skipped to let the model output XML freely.
# The tool_parser handles extraction from XML output.
if request.tool_choice != "none":
request.skip_special_tokens = False
return request
request = super().adjust_request(request)
if request.tools and request.tool_choice != "none":
# Ensure tool call tokens (<tool_call>, </tool_call>) are not skipped
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment