Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d734445f
Unverified
Commit
d734445f
authored
Apr 08, 2026
by
yoke
Committed by
GitHub
Apr 08, 2026
Browse files
[Bugfix][Frontend] Fix Gemma4 streaming HTML duplication after tool calls (#38909)
Signed-off-by:
yoke233
<
yoke2012@gmail.com
>
parent
927975ea
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
64 additions
and
2 deletions
+64
-2
tests/tool_parsers/test_gemma4_tool_parser.py
tests/tool_parsers/test_gemma4_tool_parser.py
+60
-0
vllm/tool_parsers/gemma4_tool_parser.py
vllm/tool_parsers/gemma4_tool_parser.py
+4
-2
No files found.
tests/tool_parsers/test_gemma4_tool_parser.py
View file @
d734445f
...
@@ -531,3 +531,63 @@ class TestStreamingExtraction:
...
@@ -531,3 +531,63 @@ class TestStreamingExtraction:
assert
"<|"
not
in
args_text
,
(
assert
"<|"
not
in
args_text
,
(
f
"Partial delimiter leaked into JSON:
{
args_text
!
r
}
"
f
"Partial delimiter leaked into JSON:
{
args_text
!
r
}
"
)
)
def
test_streaming_does_not_duplicate_plain_text_after_tool_call
(
self
,
parser
,
mock_request
,
monkeypatch
):
"""Buffered plain text after a tool call must not corrupt current_text."""
captured_current_texts
:
list
[
str
]
=
[]
original_extract_streaming
=
parser
.
_extract_streaming
def
wrapped_extract_streaming
(
previous_text
,
current_text
,
delta_text
):
captured_current_texts
.
append
(
current_text
)
return
original_extract_streaming
(
previous_text
,
current_text
,
delta_text
)
monkeypatch
.
setattr
(
parser
,
"_extract_streaming"
,
wrapped_extract_streaming
)
chunks
=
[
"<|tool_call>"
,
"call:get_weather{"
,
'location:<|"|>Paris<|"|>}'
,
"<tool_call|><"
,
"div>"
,
]
results
=
self
.
_simulate_streaming
(
parser
,
mock_request
,
chunks
)
content_parts
=
[
delta
.
content
for
delta
,
_
in
results
if
delta
is
not
None
and
delta
.
content
]
assert
""
.
join
(
content_parts
)
==
"<div>"
assert
captured_current_texts
[
-
1
].
endswith
(
"<tool_call|><div>"
)
assert
not
captured_current_texts
[
-
1
].
endswith
(
"<tool_call|><<div>"
)
def
test_streaming_html_argument_does_not_duplicate_tag_prefixes
(
self
,
parser
,
mock_request
):
"""HTML content inside tool arguments must not be duplicated."""
chunks
=
[
"<|tool_call>"
,
"call:write_file{"
,
'path:<|"|>index.html<|"|>,'
,
'content:<|"|><!DOCTYPE html>
\n
<'
,
'html lang="zh-CN">
\n
<'
,
"head>
\n
<"
,
'meta charset="UTF-8">
\n
<'
,
'meta name="viewport" content="width=device-width">
\n
'
,
'<|"|>}'
,
"<tool_call|>"
,
]
results
=
self
.
_simulate_streaming
(
parser
,
mock_request
,
chunks
)
args_text
=
self
.
_collect_arguments
(
results
)
assert
args_text
parsed_args
=
json
.
loads
(
args_text
)
assert
parsed_args
[
"path"
]
==
"index.html"
assert
(
parsed_args
[
"content"
]
==
"<!DOCTYPE html>
\n
"
'<html lang="zh-CN">
\n
'
"<head>
\n
"
' <meta charset="UTF-8">
\n
'
' <meta name="viewport" content="width=device-width">
\n
'
)
vllm/tool_parsers/gemma4_tool_parser.py
View file @
d734445f
...
@@ -436,8 +436,10 @@ class Gemma4ToolParser(ToolParser):
...
@@ -436,8 +436,10 @@ class Gemma4ToolParser(ToolParser):
)
->
DeltaMessage
|
None
:
)
->
DeltaMessage
|
None
:
# Buffer delta text to handle multi-token special sequences
# Buffer delta text to handle multi-token special sequences
delta_text
=
self
.
_buffer_delta_text
(
delta_text
)
delta_text
=
self
.
_buffer_delta_text
(
delta_text
)
# Reconstruct current_text after buffering to stay in sync
# Keep current_text from the upstream stream state. The buffered delta
current_text
=
previous_text
+
delta_text
# is only for emission, and must not be stitched back into the
# accumulated model text or normal content like "<div>" can be
# duplicated into "<<div>" when a tool call just ended.
# If no tool call token seen yet, emit as content
# If no tool call token seen yet, emit as content
if
self
.
tool_call_start_token
not
in
current_text
:
if
self
.
tool_call_start_token
not
in
current_text
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment