Unverified Commit 0e39202c authored by Flora Feng's avatar Flora Feng Committed by GitHub
Browse files

[Bugfix] Fix GLM tool parser streaming with MTP or stream interval (#39253)


Signed-off-by: default avatarsfeng33 <4florafeng@gmail.com>
parent 9dd5ee01
...@@ -117,28 +117,24 @@ class TestGlm47ExtractToolCalls: ...@@ -117,28 +117,24 @@ class TestGlm47ExtractToolCalls:
def _reset(parser): def _reset(parser):
parser._buffer = ""
parser._in_tool_call = False
parser.current_tool_name_sent = False parser.current_tool_name_sent = False
parser._current_tool_name = None
parser._pending_key = None
parser._streaming_string_value = False
parser.prev_tool_call_arr = [] parser.prev_tool_call_arr = []
parser.current_tool_id = -1 parser.current_tool_id = -1
parser.streamed_args_for_tool = [] parser.streamed_args_for_tool = []
parser._tool_call_ids = [] parser._tool_call_ids = []
parser._args_started = [] parser._sent_content_idx = 0
parser._args_closed = []
parser._seen_keys = []
class TestGlm47Streaming: class TestGlm47Streaming:
def test_no_args(self, glm47_tool_parser, mock_request): def test_no_args(self, glm47_tool_parser, mock_request):
_reset(glm47_tool_parser) _reset(glm47_tool_parser)
for chunk in ["<tool_call>", "get_current_date", "</tool_call>"]: chunks = ["<tool_call>", "get_current_date", "</tool_call>"]
current_text = ""
for chunk in chunks:
current_text += chunk
glm47_tool_parser.extract_tool_calls_streaming( glm47_tool_parser.extract_tool_calls_streaming(
previous_text="", previous_text="",
current_text="", current_text=current_text,
delta_text=chunk, delta_text=chunk,
previous_token_ids=[], previous_token_ids=[],
current_token_ids=[], current_token_ids=[],
...@@ -149,10 +145,7 @@ class TestGlm47Streaming: ...@@ -149,10 +145,7 @@ class TestGlm47Streaming:
def test_with_args(self, glm47_tool_parser, mock_request): def test_with_args(self, glm47_tool_parser, mock_request):
_reset(glm47_tool_parser) _reset(glm47_tool_parser)
# Split chunks so that the incremental string streaming path chunks = [
# processes the value, its closing tag, and the tool-call closing
# tag in separate calls.
for chunk in [
"<tool_call>", "<tool_call>",
"get_weather\n", "get_weather\n",
"<arg_key>city</arg_key>", "<arg_key>city</arg_key>",
...@@ -160,14 +153,18 @@ class TestGlm47Streaming: ...@@ -160,14 +153,18 @@ class TestGlm47Streaming:
"Beijing", "Beijing",
"</arg_value>", "</arg_value>",
"</tool_call>", "</tool_call>",
]: ]
current_text = ""
for chunk in chunks:
current_text += chunk
glm47_tool_parser.extract_tool_calls_streaming( glm47_tool_parser.extract_tool_calls_streaming(
previous_text="", previous_text="",
current_text="", current_text=current_text,
delta_text=chunk, delta_text=chunk,
previous_token_ids=[], previous_token_ids=[],
current_token_ids=[], current_token_ids=[],
delta_token_ids=[], delta_token_ids=[],
request=mock_request, request=mock_request,
) )
assert glm47_tool_parser.prev_tool_call_arr[0]["arguments"]["city"] == "Beijing" args = json.loads(glm47_tool_parser.prev_tool_call_arr[0]["arguments"])
assert args["city"] == "Beijing"
This diff is collapsed.
...@@ -31,6 +31,19 @@ Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool ...@@ -31,6 +31,19 @@ Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool
logger = init_logger(__name__) logger = init_logger(__name__)
def partial_tag_overlap(text: str, tag: str) -> int:
"""Length of the longest prefix of *tag* that matches a suffix of *text*.
E.g. text ending in ``"<tool_"`` returns 6 when tag is ``"<tool_call>"``.
Returns 0 when there is no overlap.
"""
max_check = min(len(tag) - 1, len(text))
for k in range(max_check, 0, -1):
if text.endswith(tag[:k]):
return k
return 0
def find_common_prefix(s1: str, s2: str) -> str: def find_common_prefix(s1: str, s2: str) -> str:
""" """
Finds a common prefix that is shared between two strings, if there is one. Finds a common prefix that is shared between two strings, if there is one.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment