Unverified Commit 8ebd872f authored by Martin Vit's avatar Martin Vit Committed by GitHub
Browse files

[Tool Parser] Fix Qwen3Coder streaming parameter loss with speculative decode (#35615)


Signed-off-by: default avatarMartin Vit <martin@voipmonitor.org>
Co-authored-by: default avatarClaude Opus 4.6 <noreply@anthropic.com>
parent 168ee03e
...@@ -1249,13 +1249,23 @@ class OpenAIServingChat(OpenAIServing): ...@@ -1249,13 +1249,23 @@ class OpenAIServingChat(OpenAIServing):
) )
# get the expected call based on partial JSON # get the expected call based on partial JSON
# parsing which "autocompletes" the JSON # parsing which "autocompletes" the JSON.
expected_call = json.dumps( # Tool parsers (e.g. Qwen3Coder) store
tool_parser.prev_tool_call_arr[index].get( # arguments as a JSON string in
"arguments", {} # prev_tool_call_arr. Calling json.dumps()
), # on an already-serialized string would
ensure_ascii=False, # double-serialize it (e.g. '{"k":1}' becomes
# '"{\\"k\\":1}"'), which then causes the
# replace() below to fail and append the
# entire double-serialized string as a
# spurious final delta.
args = tool_parser.prev_tool_call_arr[index].get(
"arguments", {}
) )
if isinstance(args, str):
expected_call = args
else:
expected_call = json.dumps(args, ensure_ascii=False)
# get what we've streamed so far for arguments # get what we've streamed so far for arguments
# for the current tool # for the current tool
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment