Unverified Commit 35fa7129 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat(v4): cherry-pick #8665 onto release/deepseekv4 (#8709)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 01002df7
{
"request_id": "deepseek-v4-fragmented-tokens-test",
"expected_output": {"normal_content": "", "reasoning_content": "Break tokens apart aggressively.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "ping", "arguments": "{\"host\": \"example.com\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<think>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"B","role":"assistant","reasoning_content":"B"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant","reasoning_content":"r"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant","reasoning_content":"e"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant","reasoning_content":"a"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"k","role":"assistant","reasoning_content":"k"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant","reasoning_content":" "}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant","reasoning_content":"t"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant","reasoning_content":"o"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"k","role":"assistant","reasoning_content":"k"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant","reasoning_content":"e"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant","reasoning_content":"n"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant","reasoning_content":"s"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant","reasoning_content":" "}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant","reasoning_content":"a"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"p","role":"assistant","reasoning_content":"p"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant","reasoning_content":"a"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant","reasoning_content":"r"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant","reasoning_content":"t"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant","reasoning_content":" "}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant","reasoning_content":"a"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"g","role":"assistant","reasoning_content":"g"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"g","role":"assistant","reasoning_content":"g"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant","reasoning_content":"r"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant","reasoning_content":"e"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant","reasoning_content":"s"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant","reasoning_content":"s"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"i","role":"assistant","reasoning_content":"i"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"v","role":"assistant","reasoning_content":"v"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant","reasoning_content":"e"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant","reasoning_content":"l"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"y","role":"assistant","reasoning_content":"y"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":".","role":"assistant","reasoning_content":"."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"</think>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"_","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"c","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"i","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"v","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"k","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"=","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"p","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"i","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"g","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"p","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"=","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"h","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":" ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"i","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"g","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"=","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"u","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\"","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"x","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"p","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":".","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"c","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"/","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"p","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"m","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"r","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"/","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"i","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"v","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"k","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"e","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"<","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"/","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"D","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"S","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"M","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"L","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"|","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"t","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"o","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"_","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"c","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"a","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"l","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":"s","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-fragmented-tokens","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-mixed-param-types-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "send_notification", "arguments": "{\"recipient\": \"user@example.com\", \"priority\": 3, \"urgent\": true, \"tags\": [\"billing\", \"overdue\"], \"metadata\": {\"ticket\": \"T-42\", \"retries\": 2}}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<think>The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters.</think>","role":"assistant","reasoning_content":"The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"send_notification\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"recipient\" string=\"true\">user@example.com</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"priority\" string=\"false\">3</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"urgent\" string=\"false\">true</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"tags\" string=\"false\">[\"billing\", \"overdue\"]</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"metadata\" string=\"false\">{\"ticket\": \"T-42\", \"retries\": 2}</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-multi-tool-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Beijing\", \"format\": \"celsius\"}"}}, {"id": "call_2", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Shanghai\", \"format\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<think>The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information.</think>","role":"assistant","reasoning_content":"The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Beijing</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Shanghai</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-no-tool-test",
"expected_output": {"normal_content": "Hi! I'm here to help — what would you like to work on today?", "reasoning_content": "User greeted me politely. A short friendly reply is appropriate; no tools needed.", "tool_calls": []},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"<think>User greeted me politely. A short friendly reply is appropriate; no tools needed.</think>","role":"assistant","reasoning_content":"User greeted me politely. A short friendly reply is appropriate; no tools needed."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"Hi! I'm here to help — ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"what would you like to work on today?","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"stop"}]}}
]
}
{
"request_id": "deepseek-v4-special-chars-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "save_note", "arguments": "{\"note\": \"He said \\\"hello\\\".\\n\\t'world' `backtick` — 中文测试 — 🚀✨ <not_a_sentinel> & </not_a_sentinel>.\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<think>The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note.</think>","role":"assistant","reasoning_content":"The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"save_note\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"note\" string=\"true\">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"He said \"hello\".\n\t'world' `backtick` ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"— 中文测试 — 🚀✨ ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<not_a_sentinel> & </not_a_sentinel>.</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-tool-call-test",
"expected_output": {"normal_content": "", "reasoning_content": "User wants the current weather in Beijing. I'll call get_current_weather with celsius units.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Beijing\", \"format\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<think>User wants the current weather in Beijing. I'll call get_current_weather with celsius units.</think>","role":"assistant","reasoning_content":"User wants the current weather in Beijing. I'll call get_current_weather with celsius units."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Beijing</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Tests for DeepSeek V4 encoding against official test data
//!
//! These tests use the official test files from:
//! https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/tree/main/encoding
use dynamo_llm::preprocessor::prompt::deepseek_v4::{ThinkingMode, encode_messages};
use serde_json::Value as JsonValue;
use std::fs;
use std::path::PathBuf;
fn get_test_data_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/deepseek-v4")
}
/// Load an input fixture. V4 fixtures come in two shapes:
/// 1. `{"tools": [...], "messages": [...]}` — tools injected on first (system) message
/// 2. bare `[...]` — just the messages array
fn load_messages(path: &PathBuf) -> Vec<JsonValue> {
let raw: JsonValue = serde_json::from_str(
&fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read {:?}", path)),
)
.unwrap_or_else(|_| panic!("Failed to parse {:?}", path));
if let Some(messages) = raw.get("messages").and_then(|m| m.as_array()) {
let mut messages = messages.clone();
if let Some(tools) = raw.get("tools")
&& let Some(first) = messages.get_mut(0)
&& let Some(obj) = first.as_object_mut()
{
obj.insert("tools".to_string(), tools.clone());
}
messages
} else if let Some(arr) = raw.as_array() {
arr.clone()
} else {
panic!("Unexpected input shape in {:?}", path);
}
}
fn run_official_test(input_file: &str, output_file: &str, thinking_mode: ThinkingMode) {
let test_dir = get_test_data_path();
let messages = load_messages(&test_dir.join(input_file));
let expected = fs::read_to_string(test_dir.join(output_file))
.unwrap_or_else(|_| panic!("Failed to read {}", output_file));
let actual = encode_messages(&messages, thinking_mode, true)
.unwrap_or_else(|e| panic!("encode_messages failed for {}: {:?}", input_file, e));
let exp = expected.trim_end();
let act = actual.trim_end();
if exp != act {
println!("=== Test: {} ===", input_file);
let exp_lines: Vec<&str> = exp.lines().collect();
let act_lines: Vec<&str> = act.lines().collect();
for (i, (el, al)) in exp_lines.iter().zip(act_lines.iter()).enumerate() {
if el != al {
println!("Line {} differs:", i + 1);
println!(" Expected: {:?}", el);
println!(" Actual: {:?}", al);
break;
}
}
if exp_lines.len() != act_lines.len() {
println!(
"\nLine count mismatch: expected {} lines, got {} lines",
exp_lines.len(),
act_lines.len()
);
}
panic!("Output does not match expected for {}", input_file);
}
}
/// Case 1 — thinking mode, single tool, tool result round-trip.
#[test]
fn test_official_thinking_with_tools() {
run_official_test(
"test_input_1.json",
"test_output_1.txt",
ThinkingMode::Thinking,
);
}
/// Case 2 — thinking mode, no tools, multi-turn (drop_thinking strips earlier reasoning).
#[test]
fn test_official_thinking_no_tools_multiturn() {
run_official_test(
"test_input_2.json",
"test_output_2.txt",
ThinkingMode::Thinking,
);
}
/// Case 3 — thinking mode, developer role with tools + latest_reminder + tool result.
#[test]
fn test_official_developer_with_tools_and_reminder() {
run_official_test(
"test_input_3.json",
"test_output_3.txt",
ThinkingMode::Thinking,
);
}
/// Case 4 — chat mode, latest_reminder + task="action" + mask preservation.
#[test]
fn test_official_chat_mode_action_task() {
run_official_test("test_input_4.json", "test_output_4.txt", ThinkingMode::Chat);
}
......@@ -1106,6 +1106,167 @@ mod tests {
);
}
// ---- DeepSeek V4 (DSML format) streaming parser tests ----
//
// V4 emits tool calls inside a DSML block:
// <|DSML|tool_calls>
// <|DSML|invoke name="fn">
// <|DSML|parameter name="k" string="true|false">v</|DSML|parameter>
// </|DSML|invoke>
// </|DSML|tool_calls>
// Fixtures live under tests/data/vllm/deepseek-v4/.
/// Shared harness for DeepSeek V4 e2e fixtures that end in a tool call.
async fn run_deepseek_v4_tool_call_fixture(file_path: &str) {
let test_data = load_test_data(file_path);
let input_stream = stream::iter(test_data.stream_chunks);
let output_chunks = parse_response_stream(
input_stream,
true,
true,
Some("deepseek_v4".to_string()),
Some("deepseek_v4".to_string()),
)
.await;
assert!(!output_chunks.is_empty(), "Should have output chunks");
let aggregated = aggregate_content_from_chunks(&output_chunks);
assert_eq!(
aggregated.reasoning_content, test_data.expected_reasoning_content,
"Should have extracted reasoning content.",
);
assert_eq!(
aggregated.normal_content, test_data.expected_normal_content,
"Normal content should match expected value.",
);
let expected_has_tool_calls = !test_data.expected_tool_calls.is_empty();
assert_eq!(
aggregated.has_tool_calls, expected_has_tool_calls,
"Tool calls presence should match expected value"
);
assert_tool_calls(&aggregated.tool_calls, &test_data.expected_tool_calls);
assert!(
validate_finish_reason(&output_chunks, FinishReason::ToolCalls),
"finish_reason validation failed for tool call case"
);
}
/// Single tool call, thinking mode (direct V4 analog of the V3 tool fixture).
/// `CASE.1` + `CASE.8` + `CASE.9` — single tool call, streaming assembly, paired with reasoning. Also validates `CASE.12` finish_reason=tool_calls.
#[tokio::test]
async fn test_deepseek_v4_e2e_with_tools_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// No tool call — thinking + plain body; finish_reason=stop.
/// `CASE.3` + `CASE.10` — no tool call + reasoning only. Also validates `CASE.12` finish_reason=stop.
#[tokio::test]
async fn test_deepseek_v4_e2e_with_no_tools_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_no_tool.json",
DATA_ROOT_PATH
);
let test_data = load_test_data(&file_path);
let input_stream = stream::iter(test_data.stream_chunks);
let output_chunks = parse_response_stream(
input_stream,
true,
true,
Some("deepseek_v4".to_string()),
Some("deepseek_v4".to_string()),
)
.await;
assert!(!output_chunks.is_empty(), "Should have output chunks");
let aggregated = aggregate_content_from_chunks(&output_chunks);
assert_eq!(
aggregated.reasoning_content, test_data.expected_reasoning_content,
"Should have extracted reasoning content.",
);
assert_eq!(
aggregated.normal_content, test_data.expected_normal_content,
"Normal content should match expected value.",
);
assert!(!aggregated.has_tool_calls, "Should not have any tool calls");
assert!(
validate_finish_reason(&output_chunks, FinishReason::Stop),
"finish_reason validation failed for non-tool call case"
);
}
/// Two parallel tool calls inside one DSML block.
/// `CASE.2` — parallel tool calls in one DSML block.
#[tokio::test]
async fn test_deepseek_v4_e2e_multi_tool_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_multi_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// string="true" vs string="false" — numbers, booleans, arrays, objects must
/// round-trip as their proper JSON types inside arguments.
/// `CASE.7` — complex args (mixed string="true|false" → strings / numbers / bools / arrays / objects round-trip).
#[tokio::test]
async fn test_deepseek_v4_e2e_mixed_param_types_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_mixed_param_types.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Body text emitted before the DSML block — parser must populate both
/// normal_content and tool_calls.
/// `CASE.13` — normal text interleaved before the DSML block.
#[tokio::test]
async fn test_deepseek_v4_e2e_content_before_tool_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_content_before_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Parameter value containing unicode, emoji, embedded quotes/newlines/tabs,
/// and fragments that look like sentinels but aren't — must not confuse the
/// parser, which anchors only on the exact </|DSML|parameter> token.
/// `CASE.7` — Unicode / special characters inside argument values. (`CASE.xml.entities` is N/A for DSML — no entity decoding.)
#[tokio::test]
async fn test_deepseek_v4_e2e_special_chars_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_special_chars.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Adversarial streaming: every DSML character is its own delta (~200 chunks).
/// Exercises buffer accumulation across chunk boundaries.
/// `CASE.8` — streaming chunk-boundary splits (tokens straddle chunks).
#[tokio::test]
async fn test_deepseek_v4_e2e_fragmented_tokens_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_fragmented_tokens.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
// ---- Kimi K2 streaming jail reproduction tests ----
//
// These reproduce the customer-reported issue (DIS-1765): Kimi K2 agentic
......
# dynamo-parsers
Rust crate for parsing **tool calls** and **reasoning content** out of raw LLM
output. Wire-format-aware, streaming-first, model-family-aware.
This is the post-model side of Dynamo's chat-completions pipeline: given a
token stream from vLLM or SGLang, extract structured `Vec<ToolCall>` +
`reasoning_content` for the client. The pre-model side (prompt formatting)
lives in `lib/llm/src/preprocessor/prompt/`.
## What's in the crate
Two top-level modules, each with its own parser registry:
```
lib/parsers/
├── src/
│ ├── tool_calling/ ← tool-call extraction (17 registered parsers)
│ │ ├── parsers.rs — registry + dispatch (detect_and_parse_tool_call)
│ │ ├── config.rs — per-parser ToolCallConfig
│ │ ├── response.rs — ToolCallResponse shape (wire type)
│ │ ├── dsml/ — DeepSeek V3.2 / V4 DSML grammar
│ │ ├── xml/ — hermes, glm47, kimi_k2, minimax_m2, qwen3_coder
│ │ ├── json/ — deepseek_v3, deepseek_v3_1, nemotron_deci/nano, jamba, mistral, phi4, llama3_json
│ │ ├── harmony/ — OpenAI gpt-oss (Harmony token stream, uses openai_harmony crate)
│ │ └── pythonic/ — Python function-call syntax (some Llama variants)
│ └── reasoning/ ← reasoning-content extraction (14 registered parsers)
│ ├── mod.rs — registry + dispatch
│ ├── base_parser.rs — BasicReasoningParser (<think> ... </think>)
│ ├── gpt_oss_parser.rs — Harmony channel parsing
│ ├── granite_parser.rs — Granite-style
│ └── minimax_append_think_parser.rs — MiniMax inline-reasoning
```
## How a request flows through the crate
```
token stream from engine
┌─────────────────────────────────┐
│ reasoning parser │ — registered by name via
│ (basic / gpt_oss / ...) │ reasoning::mod.rs get_reasoning_parser_map()
│ │ returns: (reasoning_content, non_reasoning_tail)
└─────────────────────────────────┘
▼ (non-reasoning tail)
┌─────────────────────────────────┐
│ tool-call parser │ — registered by name via
│ dispatched on parser name │ tool_calling::parsers::get_tool_parser_map()
│ which picks a ParserConfig: │
│ - Dsml(DsmlParserConfig) │ → try_tool_call_parse_dsml
│ - Json(JsonParserConfig) │ → try_tool_call_parse_json
│ - Xml(XmlParserConfig) │ → try_tool_call_parse_xml
│ - KimiK2(KimiK2ParserConfig)│ → try_tool_call_parse_kimi_k2
│ - Pythonic / Harmony │
└─────────────────────────────────┘
Vec<ToolCallResponse> + normal_text
```
Main public entry points in `tool_calling/parsers.rs`:
- `detect_and_parse_tool_call(input, parser_name, schema) -> (calls, normal_text)`
- `try_tool_call_parse(input, config) -> (calls, normal_text)` (lower-level, bypasses the registry)
- `detect_tool_call_start(chunk, parser_name)` — streaming: "is this chunk starting a tool-call block?"
- `find_tool_call_end_position(chunk, parser_name)` — streaming: "where does the block end in this chunk?"
## Parser-family cheat sheet
When adding a new model, the right parser family is usually one of:
| Family | Grammar | Shared engine | Examples |
| -- | -- | -- | -- |
| **DSML** | `<|DSML|tool_calls>...` with typed `string="true|false"` parameters | `dsml/parser.rs` | DeepSeek V3.2, V4 |
| **XML** | `<tool_call>...</tool_call>` with nested `<parameter>` or `<function>` | `xml/parser.rs` (generic) or own file for variants | hermes, qwen3_coder, minimax_m2, glm47 (own), kimi_k2 (own, special-token XML) |
| **JSON** | Start sentinel + bare JSON array of `{name, arguments}` | `json/base_json_parser.rs` | deepseek_v3, deepseek_v3_1, nemotron_deci/nano |
| **Harmony** | OpenAI Harmony token stream with `<\|channel\|>`, `<\|message\|>`, `<\|call\|>` | `harmony/harmony_parser.rs` (wraps external `openai_harmony` crate) | gpt-oss-20B / 120B |
| **Pythonic** | `[func_name(arg=value, ...)]` Python function-call syntax | `pythonic/pythonic_parser.rs` | some Llama variants |
Reasoning parsers:
| Family | Grammar | Shared engine | Examples |
| -- | -- | -- | -- |
| **Basic (think-tag)** | `<think>...</think>` | `reasoning/base_parser.rs` (BasicReasoningParser) | Qwen3, Nemotron, Kimi K2.5, DeepSeek R1 / V4, GLM-4.5+ |
| **Append-think** | `<think>...</think>` left inline as text, with `<think>` prefix on first chunk | `reasoning/minimax_append_think_parser.rs` | MiniMax M2 |
| **Harmony channel** | Hidden `analysis` channel | `reasoning/gpt_oss_parser.rs` (wraps external `openai_harmony`) | gpt-oss-20B / 120B |
| **Granite** | Custom start/end tokens | `reasoning/granite_parser.rs` | IBM Granite |
## Adding a new parser
1. **Pick the family** from the cheat sheet above. If an existing config-driven
family fits, add a `ToolCallConfig::<your_model>()` constructor in
`tool_calling/config.rs`, register it in `tool_calling/parsers.rs`. Done —
you inherit all the shared parser and tests.
2. **If the grammar is genuinely new**, add a module under `tool_calling/` and
add a `ParserConfig` variant in `config.rs`. Follow the existing parser
modules for layout.
3. **For reasoning**, prefer aliasing to `BasicReasoningParser` unless the
grammar truly diverges (append-think, Harmony channels). Most new models
use plain `<think>...</think>` and can share.
4. **Write tests.** Minimum viable set is in [`TESTING.md`](./TESTING.md) (T1–T20
taxonomy). At minimum: T1/T2/T3 for correctness, T5 for truncation
behavior, T8/T9 for streaming, T14 for interleaved text. `N/A` categories
should be explicitly called out in a comment rather than silently skipped.
## Related docs
- [`TESTING.md`](./TESTING.md) — corner-case taxonomy (T1–T20). What every
parser should be tested against, what's N/A per family, what's a universal
gap today.
- `lib/llm/tests/data/` — captured streaming fixtures per (engine × model)
that feed `test_streaming_tool_parsers.rs`. The replay side of the testing
story.
## Integration with the rest of Dynamo
- `lib/llm/src/preprocessor/prompt/` — pre-model side. Writes the prompts
that (eventually) come back and get parsed here.
- `lib/llm/src/preprocessor.rs` — top-level request/response pipeline.
Decides whether to run the reasoning parser based on
`is_reasoning_disabled_by_request`, then hands the reasoning-stripped
tail to the tool-call parser.
- `components/src/dynamo/frontend/` — Python frontend that surfaces parsed
output as OpenAI-compatible SSE chunks to the client.
# Tool-Call / Reasoning Parser Corner Cases
Reference taxonomy for unit testing tool-call and reasoning parsers. Each parser
added under `src/tool_calling/` or `src/reasoning/` should cover the generic
`CASE.<n>` categories; family-specific parsers also cover their respective
`CASE.xml<n>` / `CASE.harmony<n>` categories. `N/A` should be called out
explicitly in the test file rather than silently omitted.
Category layout:
- **`CASE.1`–`CASE.16`****Generic**. Apply to every parser regardless of grammar.
- **`CASE.xml1`–`CASE.xml2`** — XML-family only (hermes, glm47, qwen3_coder, minimax_m2, kimi_k2).
- **`CASE.harmony1`** — Harmony only (gpt-oss).
Per-model gap tracking lives elsewhere (not in this repo).
## Quick reference
### Generic (all parsers)
- **`CASE.1`** Single tool call — happy path (one complete, well-formed call). Ex: `xml::test_parse_simple_tool_call`.
- **`CASE.2`** Multiple tool calls — sequential or parallel (2+ in one response). Ex: `tool_choice::test_streaming_required_tool_parallel`.
- **`CASE.3`** No tool call (response is text only). Ex: `xml::test_parse_no_tool_calls`.
- **`CASE.4`** Malformed / partial JSON args (truncated, missing close brace, invalid syntax). Ex: `deepseek_v3::test_parse_..._with_invalid_json`, `xml::test_parse_missing_..._closing_tag`.
- **`CASE.5`** Missing end-token recovery (recover calls when `section_end` is absent due to max_tokens / EOS). Ex: `kimi_k2::test_parse_malformed_no_section_end`.
- **`CASE.6`** Empty args (`arguments={}` / no-arg call). Ex: `kimi_k2::test_parse_no_arg_call`.
- **`CASE.7`** Complex arg types (nested objects, arrays, bool, number, Unicode / newlines in values). Ex: `kimi_k2::test_parse_complex_json_arguments`.
- **`CASE.8`** Streaming — token-by-token assembly + chunk-boundary splits. Ex: `basic::test_buffer_state_persistence_across_calls`, `basic::test_partial_token_matching_closing_tag`, `basic::test_kimi_k2_one_shot_split`.
- **`CASE.9`** Paired reasoning + tool in same response. Ex: `test_reasoning_parser::test_nemotron_with_reasoning_and_tool_calls`.
- **`CASE.10`** Reasoning only (think tags, no tool call). Ex: `basic::test_detect_and_parse_reasoning_reasoning`.
- **`CASE.11`** `tool_choice` = auto / required / named / none. Ex: `tool_choice::test_named_tool_choice_parses_json` (**hermes only today**).
- **`CASE.12`** `finish_reason` semantics (`stop` / `tool_calls` / `length` mapping). Ex: `tool_choice_finish_reasons::*` (hermes only); `test_streaming_tool_parsers::test_qwen_finish_reason_length_vllm`.
- **`CASE.13`** Normal text interleaved with tool calls. Ex: `xml::test_parse_with_normal_text`.
- **`CASE.14`** Empty content / empty `tool_calls` array / null response. Ex: `parallel_tool_call_integration::test_empty_tool_calls`.
- **`CASE.15`** Duplicate tool calls (same name twice). No test anywhere in the repo; universal gap.
- **`CASE.16`** Regression for a specific customer bug (ticket ID referenced in test name or body). Ex: `kimi_k2::test_parse_malformed_no_section_end`.
### XML-family (`CASE.xml*`)
- **`CASE.xml1`** XML entity / HTML unescape handling (`&lt;`, `&amp;`, `&quot;` in parameter values). Ex: `xml::test_html_unescape`, `glm47::test_xml_entity_decoding`.
- **`CASE.xml2`** Schema-aware type coercion (string → number/bool/array based on declared parameter schema). Ex: `xml::test_schema_aware_type_conversion`, `glm47::test_type_coercion_*`.
### Harmony (`CASE.harmony*`)
- **`CASE.harmony1`** Channel / recipient parsing (analysis / commentary / final channels). Ex: `harmony_parser::test_parse_tool_calls_harmony_*`.
### Universal gaps (no test anywhere, not promoted to numbered categories)
- Unicode in function names (non-ASCII tool names, emoji).
- Numeric overflow in args (very large int / float outside JSON spec range).
- Empty function name (`"name": ""`).
- Concurrent parallel requests (process-level contention during parse).
- Guided-decoding ↔ tool-call interaction (constrained generation emits malformed args).
- Extremely long output (≥10 KB tool-call JSON in a single call).
- Mid-stream error injection / interruption (worker kill, network drop mid-parse).
- Schema arg-count mismatch (model emits extra or missing args vs declared schema).
---
## `CASE.1` — Single tool call, happy path
One complete, well-formed call in the response.
- Applies to every tool-call parser.
- Baseline correctness check. If `CASE.1` fails, nothing else below matters.
- Example: `dsml/parser.rs::test_parse_single_tool_call_string_param`.
## `CASE.2` — Multiple tool calls (sequential or parallel)
Two or more calls in one response, in the same block or back-to-back.
- Applies to every tool-call parser.
- Some grammars emit parallel calls in one block (DSML, XML); others emit
sequential top-level sentinels (JSON dialects). Either way, extract all.
- Example: `dsml/parser.rs::test_parse_multiple_tool_calls`,
`tool_choice::test_streaming_required_tool_parallel`.
## `CASE.3` — No tool call
Response is plain text, no tool-call grammar present.
- Applies to every tool-call parser.
- Must return empty `Vec<ToolCall>` and the input as `normal_text`. Zero false
positives.
- Example: `dsml/parser.rs::test_parse_no_tool_calls`.
## `CASE.4` — Malformed / partial JSON args
Truncated JSON, missing close brace, invalid syntax inside the arguments
payload.
- Applies to every tool-call parser. For parsers whose grammar never embeds
JSON (none today — all top-N families embed JSON somewhere), mark explicit
`N/A`.
- Behavior must be documented: either graceful fallback to string (DSML's
current behavior via `serde_json::from_str(...).unwrap_or_else(|_| String(...))`)
or explicit error. Silent drop is the failure mode.
- Example: `dsml/parser.rs::test_parse_deepseek_v4_malformed_json_value_falls_back_to_string`,
`deepseek_v3_parser.rs::test_parse_tool_calls_deepseek_v3_with_invalid_json`.
## `CASE.5` — Missing end-token recovery
The model's response is truncated before the closing fence arrives
(`<|tool_calls_section_end|>` for Kimi, `</|DSML|tool_calls>` for DeepSeek
DSML, etc.) — typically because the engine hit `max_tokens` or the model
emitted EOS mid-generation.
- Applies to every tool-call parser with paired start/end fences.
- Customer-facing bug class: silent drop of the in-flight call looks like a
successful HTTP 200 with no tool_calls and no error.
- Two acceptable resolutions: (a) recover completed invokes even without the
outer close fence (Kimi K2 does this post-fix), or (b) return an explicit
error. Either way, pin the behavior with a test so a future change is
intentional.
- Example (post-recovery): `kimi_k2_parser.rs::test_parse_malformed_no_section_end`.
- Example (behavior-pinning, pre-recovery): `dsml/parser.rs::test_parse_deepseek_v4_missing_end_token`.
## `CASE.6` — Empty args
Tool call with `arguments={}`, or a no-parameter invoke.
- Applies to every tool-call parser.
- Must still return the call — empty args is a valid call, not a missing one.
- Example: `kimi_k2_parser.rs::test_parse_no_arg_call`,
`dsml/parser.rs::test_parse_deepseek_v4_no_parameters`.
## `CASE.7` — Complex argument types
Nested objects, arrays, booleans, numbers, mixed types, Unicode values, and
newlines inside argument values.
- Applies to every tool-call parser.
- For grammars that carry type hints (DSML's `string="true|false"`), verify
JSON round-tripping. For XML grammars without hints, the type-coercion
half of the test is covered under `CASE.xml2` instead — here just verify
that complex values make it through without truncation or escape bugs.
- Example: `dsml/parser.rs::test_parse_mixed_types_realistic`,
`kimi_k2_parser.rs::test_parse_complex_json_arguments`.
## `CASE.8` — Streaming
Chunked input arriving over SSE. Covers two concerns that tend to fail
together:
1. **Token-by-token assembly** — the parser incrementally reconstructs the
tool-call structure across many small chunks.
2. **Chunk-boundary splits** — start fence, end fence, or parameter name /
value straddles a chunk boundary. Partial-token matching must return
`true` (keep buffering, don't flush as plain text) and complete the
match on the next chunk.
- Applies to every tool-call parser. Dominant production path.
- Example: `basic::test_buffer_state_persistence_across_calls`,
`basic::test_partial_token_matching_closing_tag`,
`basic::test_kimi_k2_one_shot_split`,
`test_streaming_tool_parsers::test_deepseek_v4_e2e_fragmented_tokens_vllm`.
## `CASE.9` — Paired reasoning + tool in same response
Model emits `<think>...</think>` (or analog) followed by a tool call. Both
must be extracted: `reasoning_content` populated AND `tool_calls` populated.
- Applies to every (tool, reasoning) parser pair.
- Watch for the "unclosed think-tag swallows tool call" bug — if the reasoning
parser is greedy it may eat the tool-call content that follows.
- Example: `test_reasoning_parser::test_nemotron_with_reasoning_and_tool_calls`,
`test_reasoning_parser::test_kimi_k25_with_reasoning_and_tool_calls`.
## `CASE.10` — Reasoning only
`<think>...</think>` or analog present, no tool call. Parser must populate
`reasoning_content` and leave `tool_calls` empty.
- Applies to every reasoning parser.
- Example: `reasoning/base_parser.rs::test_detect_and_parse_reasoning_reasoning`,
`reasoning/mod.rs::test_deepseek_v4_detect_and_parse`.
## `CASE.11` — `tool_choice` = auto / required / named / none
Each of the four OpenAI `tool_choice` modes exercised per parser.
- Applies to every tool-call parser.
- Cross-parser suites at `lib/llm/tests/tool_choice.rs` /
`parallel_tool_call_integration.rs` / `tool_choice_finish_reasons.rs`
run `hermes` only today. Adding a new parser requires parametrizing those
suites or adding a per-parser equivalent.
- Universal gap across most parsers in the repo as of 2026-04.
- Example: `tool_choice::test_named_tool_choice_parses_json`,
`tool_choice::test_required_tool_choice_parses_json_array`.
## `CASE.12` — `finish_reason` semantics
`stop` vs `tool_calls` vs `length` mapping, in both streaming and
non-streaming paths.
- Applies to every tool-call parser.
- When a tool call lands, `finish_reason` must become `tool_calls`. When
`max_tokens` truncates mid-stream, `length` must propagate — this is
often the signal that should trigger `CASE.5` recovery on the parser side.
- Example: `tool_choice_finish_reasons::test_named_tool_choice_normal_stop_becomes_tool_calls`,
`test_streaming_tool_parsers::test_qwen_finish_reason_length_vllm`.
## `CASE.13` — Normal text interleaved with tool calls
Model emits narration text before / after / between tool-call blocks. Parser
must split content correctly: text → `normal_content`, calls → `tool_calls`.
- Applies to every tool-call parser.
- Example: `dsml/parser.rs::test_parse_with_normal_text`,
`test_streaming_tool_parsers.rs::test_deepseek_v4_e2e_content_before_tool_vllm`.
## `CASE.14` — Empty content / empty `tool_calls` array / null response
Engine emits a chunk with `delta.content = ""`, or a final response with
`tool_calls: []`, or `null` values inside arguments.
- Applies to every tool-call parser.
- Null-value handling inside parameters is parser-level (`parse_parameters`
in DSML handles it via `serde_json::Value::Null`). Empty-choices /
empty-stream handling is typically at the e2e integration layer.
- Example: `dsml/parser.rs::test_parse_null_parameter`,
`parallel_tool_call_integration::test_empty_tool_calls`.
## `CASE.15` — Duplicate tool calls (same name twice)
Two calls to the same function name in one response, possibly with the same
arguments.
- Applies to every tool-call parser.
- **Zero coverage across the entire repo as of 2026-04.** Universal gap.
- Expected behavior: both calls must appear in `tool_calls` with distinct
IDs. (The runtime / client is responsible for deciding whether duplicate
invocation is intended.)
## `CASE.16` — Regression for a specific customer bug
Test named after (or containing) a ticket reference, pinning the fix for
a customer-reported failure.
- Applies per-incident. Not a category every parser needs to cover in
advance; populated as bugs are reported and fixed.
- Existing example: `kimi_k2_parser.rs::test_parse_malformed_no_section_end`.
---
## `CASE.xml1` — XML entity / HTML unescape handling
Parameter values contain XML-encoded entities (`&lt;`, `&amp;`, `&quot;`,
`&apos;`, numeric entities like `&#38;`) that must be decoded before the
value is surfaced to the client.
- Applies only to XML-family tool-call parsers: `hermes`, `glm47`,
`qwen3_coder`, `minimax_m2`, `kimi_k2` (despite its special-token outer
fence, the inner parameter payload is XML-ish).
- **N/A for DSML** — the `string="true|false"` attribute tells the parser
whether to JSON-decode or pass through verbatim; no entity decoding pass.
- **N/A for JSON-family and Harmony** — JSON has its own escape semantics
handled by `serde_json`.
- Example: `xml/parser.rs::test_html_unescape`,
`glm47_parser.rs::test_xml_entity_decoding`.
## `CASE.xml2` — Schema-aware type coercion
Parser uses the declared tool schema to coerce string args to
number / bool / array based on the declared parameter type.
- Applies only to XML-family parsers without explicit type annotations in
the wire format. `xml/parser.rs`, `glm47_parser.rs` do this.
- **N/A for DSML** — the `string="true|false"` attribute carries the type
intent per parameter, so no schema lookup is needed.
- **N/A for JSON-family** — JSON has native types.
- **N/A for Harmony** — payload is JSON inside the channel envelope.
- Example: `xml/parser.rs::test_schema_aware_type_conversion`,
`glm47_parser.rs::test_type_coercion_array_comma_separated`.
---
## `CASE.harmony1` — Channel / recipient parsing
OpenAI Harmony's token stream carries channel metadata
(`<|channel|>analysis|commentary|final<|message|>`) and recipient targets
(`to=functions.foo`). Parser must route the `commentary` channel content
into tool-call extraction while surfacing `analysis` as reasoning and
`final` as the user-visible output.
- **Harmony only.** N/A for every other family.
- Example: `harmony/harmony_parser.rs::test_parse_tool_calls_harmony_with_multi_args`,
`harmony/harmony_parser.rs::test_parse_tool_calls_harmony_with_normal_text`.
---
## Applicability summary
| Category block | Parsers | Notes |
| -- | -- | -- |
| `CASE.1``CASE.16` (generic) | All | Required contract for every parser |
| `CASE.xml1``CASE.xml2` | XML-family only | Entity decoding + schema-aware coercion |
| `CASE.harmony1` | Harmony only | Channel routing |
## Adding a new parser: what you must include
Minimum viable set for a new tool-call parser:
1. `CASE.1`, `CASE.2`, `CASE.3` — baseline correctness.
2. `CASE.4` or explicit N/A justification — handle or refuse malformed input.
3. `CASE.5` — pin behavior when the outer fence is missing. Silent drop is a
regression waiting to happen.
4. `CASE.6`, `CASE.7` — empty and complex args.
5. `CASE.8` — streaming. Essentially non-negotiable for any parser that sits
behind a streaming frontend.
6. `CASE.13` — interleaved text.
7. `CASE.15` — document whether duplicate calls are supported. Flat gap
today; landing a test with the parser establishes the contract.
8. Family-specific categories where applicable: `CASE.xml1` / `CASE.xml2`
for XML grammars, `CASE.harmony1` for Harmony.
For reasoning parsers, replace `CASE.4` / `CASE.5` / `CASE.8`-assembly with
`CASE.8`-partial-close-tag and `CASE.10` (reasoning-only).
......@@ -29,6 +29,22 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map.insert("basic", ReasoningParserType::Basic);
map.insert("gpt_oss", ReasoningParserType::GptOss);
map.insert("qwen3", ReasoningParserType::Qwen);
// DeepSeek-V4 uses the same `<think>` / `</think>` delimiters as Qwen
// (confirmed against deepseek-ai/DeepSeek-V4-Pro's encoding_dsv4.py)
// so it delegates to the same `BasicReasoningParser` config today. We
// still route through a dedicated `DeepSeekV4` variant rather than
// hard-aliasing to `Qwen` so future divergence (different special
// tokens, max-thinking mode, etc.) has a place to land without rippling
// through Qwen's own config.
//
// The three name aliases exist because callers set this via
// `--dyn-reasoning-parser` / `--reasoning-parser` with whatever string
// the HF model / vLLM recipe / chat-template author picked. We accept
// all three separator conventions (snake / kebab / concat) rather than
// force a single canonical form on users.
map.insert("deepseek_v4", ReasoningParserType::DeepSeekV4);
map.insert("deepseek-v4", ReasoningParserType::DeepSeekV4);
map.insert("deepseekv4", ReasoningParserType::DeepSeekV4);
map.insert("nemotron_deci", ReasoningParserType::NemotronDeci);
map.insert("kimi", ReasoningParserType::Kimi);
map.insert("kimi_k25", ReasoningParserType::KimiK25);
......@@ -110,6 +126,14 @@ pub enum ReasoningParserType {
Basic,
GptOss,
Qwen,
/// DeepSeek-V4-Pro / V4-Flash. Currently uses the same `<think>` /
/// `</think>` `BasicReasoningParser` config as Qwen (V4 never appends
/// `<think>` in the completion — the chat template always pre-injects it,
/// so the parser starts via `set_in_reasoning(true)` rather than
/// `force_reasoning`). A dedicated variant keeps future V4-specific
/// divergence (different delimiters, thinking-effort modes) from leaking
/// into Qwen's behavior.
DeepSeekV4,
NemotronDeci,
Kimi,
KimiK25,
......@@ -161,6 +185,12 @@ impl ReasoningParserType {
ReasoningParserType::Qwen => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
// Same `<think>` / `</think>` config as Qwen today; kept as a
// distinct variant so V4-specific divergence has somewhere to land.
// See `ReasoningParserType::DeepSeekV4` docstring for rationale.
ReasoningParserType::DeepSeekV4 => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
ReasoningParserType::NemotronDeci => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
......@@ -246,6 +276,9 @@ mod tests {
"basic",
"gpt_oss",
"qwen3",
"deepseek_v4",
"deepseek-v4",
"deepseekv4",
"nemotron_deci",
"kimi",
"kimi_k25",
......@@ -261,6 +294,45 @@ mod tests {
assert!(parsers.contains(&parser));
}
}
/// `CASE.10` — reasoning-only (V4 `<think>`/`</think>`).
#[test]
fn test_deepseek_v4_detect_and_parse() {
for parser_name in ["deepseek_v4", "deepseek-v4", "deepseekv4"] {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name(parser_name);
let result = parser.detect_and_parse_reasoning("<think>thinking</think>answer", &[]);
assert_eq!(result.reasoning_text, "thinking");
assert_eq!(result.normal_text, "answer");
}
}
/// `CASE.3` / `CASE.10` — no reasoning tags ⇒ no `reasoning_content`.
#[test]
fn test_deepseek_v4_no_forced_reasoning_without_tags() {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("deepseek_v4");
let result = parser.detect_and_parse_reasoning("answer only", &[]);
assert_eq!(result.reasoning_text, "");
assert_eq!(result.normal_text, "answer only");
}
/// `CASE.8` — streaming reasoning parse (chunked).
#[test]
fn test_deepseek_v4_streaming() {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("deepseek_v4");
let chunks = ["<think>rea", "son</think>answer"];
let mut reasoning = String::new();
let mut normal = String::new();
for chunk in chunks {
let result = parser.parse_reasoning_streaming_incremental(chunk, &[]);
reasoning.push_str(&result.reasoning_text);
normal.push_str(&result.normal_text);
}
assert_eq!(reasoning, "reason");
assert_eq!(normal, "answer");
}
#[test]
fn test_kimi_k25_detect_and_parse() {
......
......@@ -391,6 +391,22 @@ impl ToolCallConfig {
}
}
pub fn deepseek_v4() -> Self {
// DeepSeek V4 format (DSML):
// <|DSML|tool_calls>
// <|DSML|invoke name="function_name">
// <|DSML|parameter name="param_name" string="true|false">value</|DSML|parameter>
// </|DSML|invoke>
// </|DSML|tool_calls>
Self {
parser_config: ParserConfig::Dsml(DsmlParserConfig {
function_calls_start: "<|DSML|tool_calls>".to_string(),
function_calls_end: "</|DSML|tool_calls>".to_string(),
..Default::default()
}),
}
}
pub fn minimax_m2() -> Self {
// MiniMax-M2.1 format:
// <minimax:tool_call>
......
......@@ -5,11 +5,84 @@
// https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding/encoding_dsv32.py
use regex::Regex;
use std::collections::HashMap;
use std::sync::{Arc, OnceLock, RwLock};
use uuid::Uuid;
use super::super::config::DsmlParserConfig;
use super::super::response::{CalledFunction, ToolCallResponse, ToolCallType};
/// Compiled regex trio for a given `DsmlParserConfig`. Compiled once and
/// reused across every subsequent parse/stream chunk.
struct DsmlRegexes {
block: Regex,
invoke: Regex,
parameter: Regex,
}
/// Cache key = the six config strings that drive the three regex patterns.
/// V3.2 and V4 are the only variants in use, so the cache has at most two
/// entries for the lifetime of the process.
type DsmlRegexKey = (String, String, String, String, String, String);
fn regex_cache() -> &'static RwLock<HashMap<DsmlRegexKey, Arc<DsmlRegexes>>> {
static CACHE: OnceLock<RwLock<HashMap<DsmlRegexKey, Arc<DsmlRegexes>>>> = OnceLock::new();
CACHE.get_or_init(|| RwLock::new(HashMap::new()))
}
/// Return the compiled regex trio for `config`, compiling on first use.
///
/// Each parse call previously recompiled three regexes from `format!`'d
/// patterns that embed the config strings — expensive on streaming hot paths.
/// The cache is keyed on the raw config strings (not the escaped patterns)
/// so distinct configs that happen to escape identically still get distinct
/// entries.
fn get_dsml_regexes(config: &DsmlParserConfig) -> anyhow::Result<Arc<DsmlRegexes>> {
let key: DsmlRegexKey = (
config.function_calls_start.clone(),
config.function_calls_end.clone(),
config.invoke_start_prefix.clone(),
config.invoke_end.clone(),
config.parameter_prefix.clone(),
config.parameter_end.clone(),
);
// Fast path: shared read lock, common after the first parse of each config.
if let Some(regexes) = regex_cache()
.read()
.expect("DSML regex cache read lock poisoned")
.get(&key)
{
return Ok(Arc::clone(regexes));
}
// Slow path: compile and install. Use `entry` so a concurrent compiler of
// the same key only inserts once (we still compile speculatively, then
// drop the duplicate — cheap on a map of <= 2 keys).
let block = Regex::new(&format!(
r"(?s){}\s*(.*?)\s*{}",
regex::escape(&config.function_calls_start),
regex::escape(&config.function_calls_end),
))?;
let invoke = Regex::new(&format!(
r#"(?s){}\"([^"]+)\"\s*>(.*?){}"#,
regex::escape(&config.invoke_start_prefix),
regex::escape(&config.invoke_end),
))?;
let parameter = Regex::new(&format!(
r#"(?s){}\"([^"]+)\"\s+string=\"(true|false)\"\s*>(.*?){}"#,
regex::escape(&config.parameter_prefix),
regex::escape(&config.parameter_end),
))?;
let regexes = Arc::new(DsmlRegexes {
block,
invoke,
parameter,
});
let mut cache = regex_cache()
.write()
.expect("DSML regex cache write lock poisoned");
Ok(Arc::clone(cache.entry(key).or_insert(regexes)))
}
/// DeepSeek V3.2 uses DSML (DeepSeek Markup Language) format for tool calls:
///
/// <|DSML|function_calls>
......@@ -97,24 +170,16 @@ fn extract_tool_calls(
config: &DsmlParserConfig,
) -> anyhow::Result<Vec<ToolCallResponse>> {
let mut tool_calls = Vec::new();
let regexes = get_dsml_regexes(config)?;
// Find all function_calls blocks
// Matches: <|DSML|function_calls> ... </|DSML|function_calls>
// Pattern: (?s) = dot matches newlines
// \s*(.*?)\s* = capture content between start/end tags (non-greedy)
let block_pattern = format!(
r"(?s){}\s*(.*?)\s*{}",
regex::escape(&config.function_calls_start),
regex::escape(&config.function_calls_end)
);
let block_regex = Regex::new(&block_pattern)?;
for block_match in block_regex.captures_iter(text) {
// Find all function_calls blocks — the block regex captures the content
// between start/end tags (non-greedy, dot-matches-newline).
for block_match in regexes.block.captures_iter(text) {
if let Some(block_content) = block_match.get(1) {
let block = block_content.as_str();
// Extract individual invokes from this block
let invokes = extract_invokes(block, config)?;
let invokes = extract_invokes(block, &regexes)?;
tool_calls.extend(invokes);
}
}
......@@ -123,29 +188,18 @@ fn extract_tool_calls(
}
/// Extract individual invoke blocks from function_calls content
fn extract_invokes(
block: &str,
config: &DsmlParserConfig,
) -> anyhow::Result<Vec<ToolCallResponse>> {
fn extract_invokes(block: &str, regexes: &DsmlRegexes) -> anyhow::Result<Vec<ToolCallResponse>> {
let mut invokes = Vec::new();
// Regex to match: <|DSML|invoke name="function_name">..content..</|DSML|invoke>
// Note: invoke_start_prefix is "<|DSML|invoke name=" (no quotes, we add them in pattern)
let invoke_pattern = format!(
r#"(?s){}\"([^"]+)\"\s*>(.*?){}"#,
regex::escape(&config.invoke_start_prefix),
regex::escape(&config.invoke_end)
);
let invoke_regex = Regex::new(&invoke_pattern)?;
for invoke_match in invoke_regex.captures_iter(block) {
// Matches: <|DSML|invoke name="function_name">..content..</|DSML|invoke>
for invoke_match in regexes.invoke.captures_iter(block) {
if let (Some(name_match), Some(content_match)) = (invoke_match.get(1), invoke_match.get(2))
{
let function_name = name_match.as_str().trim().to_string();
let invoke_content = content_match.as_str();
// Parse parameters from invoke content
let parameters = parse_parameters(invoke_content, config)?;
let parameters = parse_parameters(invoke_content, regexes)?;
// Create tool call response
let arguments_json = serde_json::to_string(&parameters)?;
......@@ -167,24 +221,12 @@ fn extract_invokes(
/// Parse parameters from invoke content
fn parse_parameters(
content: &str,
config: &DsmlParserConfig,
regexes: &DsmlRegexes,
) -> anyhow::Result<serde_json::Map<String, serde_json::Value>> {
let mut parameters = serde_json::Map::new();
// Build pattern with proper escaping
// Match: <|DSML|parameter name="param_name" string="true|false">value</|DSML|parameter>
// Note: parameter_prefix is "<|DSML|parameter name=" (no quotes, we add them in pattern)
let prefix_escaped = regex::escape(&config.parameter_prefix);
let end_escaped = regex::escape(&config.parameter_end);
let param_pattern = format!(
r#"(?s){}\"([^"]+)\"\s+string=\"(true|false)\"\s*>(.*?){}"#,
prefix_escaped, end_escaped
);
let param_regex = Regex::new(&param_pattern)?;
for param_match in param_regex.captures_iter(content) {
// Matches: <|DSML|parameter name="param_name" string="true|false">value</|DSML|parameter>
for param_match in regexes.parameter.captures_iter(content) {
if let (Some(name_match), Some(string_match), Some(value_match)) =
(param_match.get(1), param_match.get(2), param_match.get(3))
{
......@@ -224,6 +266,14 @@ mod tests {
DsmlParserConfig::default()
}
fn get_v4_test_config() -> DsmlParserConfig {
DsmlParserConfig {
function_calls_start: "<|DSML|tool_calls>".to_string(),
function_calls_end: "</|DSML|tool_calls>".to_string(),
..Default::default()
}
}
#[test]
fn test_detect_tool_call_start() {
let config = get_test_config();
......@@ -239,6 +289,74 @@ mod tests {
assert!(!detect_tool_call_start_dsml("no tool call here", &config));
}
// -------------------------------------------------------------------
// DeepSeek V4 coverage (see lib/parsers/TESTING.md for CASE.* taxonomy).
//
// Covered by the V4 tests below (or by a shared DSML generic test):
// - CASE.1 single-call (parsers.rs :: test_deepseek_v4_single_tool_call)
// - CASE.2 multi-calls (test_parse_deepseek_v4_multiple_tool_calls)
// - CASE.3 no-call (shared: test_parse_no_tool_calls)
// - CASE.4 malformed-args (test_parse_deepseek_v4_malformed_json_value_falls_back_to_string,
// test_parse_deepseek_v4_missing_invoke_close_drops_call)
// - CASE.5 missing-end-token (test_parse_deepseek_v4_missing_end_token{,_multiple_calls})
// — PINNED AS BROKEN: parser drops the call. See TODO below.
// - CASE.6 empty-args (test_parse_deepseek_v4_no_parameters)
// - CASE.7 complex-args (shared: test_parse_mixed_types_realistic, test_parse_nested_object_parameter,
// lib/llm/tests/test_streaming_tool_parsers :: ..._mixed_param_types_vllm,
// ..._special_chars_vllm)
// - CASE.8 streaming (test_detect_tool_call_start_v4, test_find_tool_call_end_position_v4,
// lib/llm/tests/test_streaming_tool_parsers :: ..._fragmented_tokens_vllm)
// - CASE.9 reasoning-plus-tool (lib/llm/tests/test_streaming_tool_parsers :: ..._with_tools_vllm
// — fixtures include <think>...</think> alongside DSML)
// - CASE.10 reasoning-only (reasoning/mod.rs :: test_deepseek_v4_detect_and_parse etc.)
// - CASE.12 finish-reason (lib/llm/tests/test_streaming_tool_parsers :: ..._with_tools_vllm →
// FinishReason::ToolCalls; ..._with_no_tools_vllm → FinishReason::Stop
// — Length variant NOT covered, see TODO)
// - CASE.13 interleaved-text (test_parse_deepseek_v4_multiple_tool_calls prefix text;
// lib/llm/tests/test_streaming_tool_parsers :: ..._content_before_tool_vllm)
//
// - CASE.xml.* N/A — DSML carries per-parameter string="true|false" type hints,
// so XML entity decoding (CASE.xml.entities) and schema-aware
// coercion (CASE.xml.schema-coercion) don't apply.
// - CASE.harmony.* N/A — Harmony-only.
//
// TODO — not yet covered for V4:
// - CASE.5 Fix mid-stream truncation: parser currently drops all calls when
// </|DSML|tool_calls> is absent (max_tokens / EOS before close).
// Same class as Kimi K2 pre-DIS-1765. Recovery pattern: scan for
// complete <|DSML|invoke>...</|DSML|invoke> pairs even without
// the outer close fence (see kimi_k2_parser.rs for precedent).
// Pinning tests below capture the current silent-drop behavior;
// flip them when recovery lands.
// - CASE.4 Variants not pinned: missing </|DSML|parameter> close tag,
// middle-invoke truncation corrupting subsequent invokes (non-greedy
// regex bleed-through). Same structural class as CASE.5.
// - CASE.11 tool_choice auto/required/named/none — cross-parser suites at
// lib/llm/tests/tool_choice.rs run hermes only; V4 not exercised.
// - CASE.12 FinishReason::Length — current E2E fixtures only cover Stop and
// ToolCalls finish reasons. No truncation-forcing fixture.
// - CASE.14 empty-content / null response at the e2e layer.
// - CASE.15 duplicate-calls (same name twice) — universal gap across all parsers.
// - CASE.16 regression — V4 is hours old (2026-04-24); no customer bugs filed yet.
// -------------------------------------------------------------------
/// `CASE.8` — streaming start-token detection (V4 variant).
#[test]
fn test_detect_tool_call_start_v4() {
let config = get_v4_test_config();
assert!(detect_tool_call_start_dsml("<|DSML|tool_calls>", &config));
assert!(detect_tool_call_start_dsml(
"text <|DSML|tool_calls>",
&config
));
assert!(detect_tool_call_start_dsml("<|DSML|tool_c", &config));
assert!(!detect_tool_call_start_dsml(
"<|DSML|function_calls>",
&config
));
assert!(!detect_tool_call_start_dsml("no tool call here", &config));
}
#[test]
fn test_find_tool_call_end_position() {
let config = get_test_config();
......@@ -247,6 +365,15 @@ mod tests {
assert_eq!(&text[pos..], "more");
}
/// `CASE.8` — streaming end-position lookup (V4 variant).
#[test]
fn test_find_tool_call_end_position_v4() {
let config = get_v4_test_config();
let text = "<|DSML|tool_calls><|DSML|invoke name=\"test\"></|DSML|invoke></|DSML|tool_calls>more";
let pos = find_tool_call_end_position_dsml(text, &config);
assert_eq!(&text[pos..], "more");
}
#[test]
fn test_parse_single_tool_call_string_param() {
let input = r#"<|DSML|function_calls>
......@@ -320,6 +447,54 @@ mod tests {
assert_eq!(args2["location"], "Hangzhou");
}
/// `CASE.2` multi-calls + `CASE.13` interleaved-text (prefix text before the block).
#[test]
fn test_parse_deepseek_v4_multiple_tool_calls() {
let input = r#"Let's check this. <|DSML|tool_calls>
<|DSML|invoke name="get_favorite_tourist_spot">
<|DSML|parameter name="city" string="true">Beijing</|DSML|parameter>
</|DSML|invoke>
<|DSML|invoke name="search">
<|DSML|parameter name="query" string="true">search agent benchmark 2024</|DSML|parameter>
<|DSML|parameter name="topn" string="false">10</|DSML|parameter>
<|DSML|parameter name="source" string="true">web</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>"#;
let config = get_v4_test_config();
let (calls, normal) = try_tool_call_parse_dsml(input, &config).unwrap();
assert_eq!(calls.len(), 2);
assert_eq!(normal, Some("Let's check this.".to_string()));
let (name1, args1) = extract_name_and_args(calls[0].clone());
assert_eq!(name1, "get_favorite_tourist_spot");
assert_eq!(args1["city"], "Beijing");
let (name2, args2) = extract_name_and_args(calls[1].clone());
assert_eq!(name2, "search");
assert_eq!(args2["query"], "search agent benchmark 2024");
assert_eq!(args2["topn"], 10);
assert_eq!(args2["source"], "web");
}
/// `CASE.6` — empty args (no-parameter invoke).
#[test]
fn test_parse_deepseek_v4_no_parameters() {
let input = r#"<|DSML|tool_calls>
<|DSML|invoke name="get_current_time">
</|DSML|invoke>
</|DSML|tool_calls>"#;
let config = get_v4_test_config();
let (calls, normal) = try_tool_call_parse_dsml(input, &config).unwrap();
assert_eq!(calls.len(), 1);
assert_eq!(normal, Some("".to_string()));
let (name, args) = extract_name_and_args(calls[0].clone());
assert_eq!(name, "get_current_time");
assert_eq!(args, serde_json::json!({}));
}
#[test]
fn test_parse_with_normal_text() {
let input = r#"Here's the result: <|DSML|function_calls>
......@@ -490,4 +665,116 @@ mod tests {
let (_, args) = extract_name_and_args(calls[0].clone());
assert!(args["value"].is_null());
}
// Corner-case pinning tests. See the V4 coverage manifest above for the
// full mapping from CASE.* → test. Each test's doc-comment names the
// specific CASE it pins.
/// `CASE.5` — missing end-token recovery.
/// **Pinned as broken** — parser drops the call; see the TODO block above.
///
/// If a DeepSeek V4 stream is truncated before `</|DSML|tool_calls>`
/// arrives (max_tokens cut-off, EOS mid-generation, connection drop),
/// the block regex requires both fences and matches zero times. The
/// entire DSML-looking payload falls through as raw `normal_text`; no
/// tool calls are recovered.
///
/// This is the same structural failure mode Kimi K2 had before its
/// parser gained end-token recovery; see
/// `kimi_k2_parser.rs::test_parse_malformed_no_section_end` for the
/// post-fix recovery pattern.
#[test]
fn test_parse_deepseek_v4_missing_end_token() {
// Start fence + complete invoke, but no </|DSML|tool_calls>.
let input = "<|DSML|tool_calls>\n\
<|DSML|invoke name=\"get_weather\">\n\
<|DSML|parameter name=\"city\" string=\"true\">NYC</|DSML|parameter>\n\
</|DSML|invoke>";
let config = get_v4_test_config();
let (calls, normal_text) = try_tool_call_parse_dsml(input, &config).unwrap();
assert!(
calls.is_empty(),
"V4 DSML parser currently drops tool calls when \
</|DSML|tool_calls> is missing. \
If recovery is added, flip this assertion."
);
assert_eq!(
normal_text.as_deref(),
Some(input),
"Unrecovered payload should fall through to normal_text verbatim."
);
}
/// `CASE.5` — multiple complete invokes, missing end fence.
///
/// Even with multiple fully-formed invokes inside the start fence, the
/// absence of the closing fence prevents the block regex from matching.
/// All calls are dropped. If the parser ever gains partial-block
/// recovery, this test will fail and force an intentional update.
#[test]
fn test_parse_deepseek_v4_missing_end_token_multiple_calls() {
let input = "<|DSML|tool_calls>\n\
<|DSML|invoke name=\"a\">\n\
<|DSML|parameter name=\"x\" string=\"true\">1</|DSML|parameter>\n\
</|DSML|invoke>\n\
<|DSML|invoke name=\"b\">\n\
<|DSML|parameter name=\"y\" string=\"true\">2</|DSML|parameter>\n\
</|DSML|invoke>";
let config = get_v4_test_config();
let (calls, _) = try_tool_call_parse_dsml(input, &config).unwrap();
assert!(
calls.is_empty(),
"Even two fully-formed invokes are dropped when the outer \
</|DSML|tool_calls> is missing."
);
}
/// `CASE.4` — malformed JSON in a `string="false"` parameter value falls back
/// to a string. `parse_parameters` explicitly swallows the serde error
/// (unwrap_or_else → Value::String). Pin the fallback so removing it
/// (which would cause the whole call to 500 on ragged-edge JSON) is a
/// deliberate change.
#[test]
fn test_parse_deepseek_v4_malformed_json_value_falls_back_to_string() {
let input = "<|DSML|tool_calls>\n\
<|DSML|invoke name=\"test\">\n\
<|DSML|parameter name=\"payload\" string=\"false\">{this is not valid json</|DSML|parameter>\n\
</|DSML|invoke>\n\
</|DSML|tool_calls>";
let config = get_v4_test_config();
let (calls, _) = try_tool_call_parse_dsml(input, &config).unwrap();
assert_eq!(calls.len(), 1);
let (name, args) = extract_name_and_args(calls[0].clone());
assert_eq!(name, "test");
assert_eq!(
args["payload"], "{this is not valid json",
"Malformed JSON should fall back to the raw string, not drop \
the parameter or the call."
);
}
/// `CASE.4` — malformed invoke (missing `</|DSML|invoke>` but block fences
/// intact). The invoke regex requires its own close tag, so the call is
/// silently dropped. Pin the behavior.
#[test]
fn test_parse_deepseek_v4_missing_invoke_close_drops_call() {
let input = "<|DSML|tool_calls>\n\
<|DSML|invoke name=\"test\">\n\
<|DSML|parameter name=\"x\" string=\"true\">value</|DSML|parameter>\n\
</|DSML|tool_calls>";
let config = get_v4_test_config();
let (calls, _) = try_tool_call_parse_dsml(input, &config).unwrap();
assert!(
calls.is_empty(),
"Malformed invoke (missing </|DSML|invoke>) is dropped today. \
If recovery is added, flip this assertion."
);
}
}
......@@ -43,6 +43,9 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
map.insert("deepseek_v3", ToolCallConfig::deepseek_v3());
map.insert("deepseek_v3_1", ToolCallConfig::deepseek_v3_1());
map.insert("deepseek_v3_2", ToolCallConfig::deepseek_v3_2());
map.insert("deepseek_v4", ToolCallConfig::deepseek_v4());
map.insert("deepseek-v4", ToolCallConfig::deepseek_v4());
map.insert("deepseekv4", ToolCallConfig::deepseek_v4());
map.insert("qwen3_coder", ToolCallConfig::qwen3_coder());
map.insert("jamba", ToolCallConfig::jamba());
map.insert("minimax_m2", ToolCallConfig::minimax_m2());
......@@ -241,6 +244,9 @@ mod tests {
"deepseek_v3",
"deepseek_v3_1",
"deepseek_v3_2",
"deepseek_v4",
"deepseek-v4",
"deepseekv4",
"qwen3_coder",
"jamba",
"nemotron_nano",
......@@ -1701,6 +1707,56 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
assert_eq!(args["topn"], 10); // Should be number, not string
assert_eq!(args["source"], "web");
}
/// `CASE.1` — single-call happy path (V4).
#[tokio::test]
async fn test_deepseek_v4_single_tool_call() {
let input = r#"<|DSML|tool_calls>
<|DSML|invoke name="get_datetime">
<|DSML|parameter name="timezone" string="true">Asia/Shanghai</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>"#;
let (tool_calls, normal_text) =
detect_and_parse_tool_call(input, Some("deepseek_v4"), None)
.await
.expect("Failed to parse");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "get_datetime");
assert_eq!(normal_text, Some("".to_string()));
let args: serde_json::Value =
serde_json::from_str(&tool_calls[0].function.arguments).unwrap();
assert_eq!(args["timezone"], "Asia/Shanghai");
}
/// Alias registration: verifies `deepseek-v4` and `deepseekv4` route to the same parser as `deepseek_v4`. Not a CASE.*; covers registry plumbing.
#[tokio::test]
async fn test_deepseek_v4_compatibility_aliases() {
let input = r#"<|DSML|tool_calls>
<|DSML|invoke name="search">
<|DSML|parameter name="query" string="true">search agent benchmark 2024</|DSML|parameter>
<|DSML|parameter name="topn" string="false">10</|DSML|parameter>
<|DSML|parameter name="source" string="true">web</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>"#;
for parser_name in ["deepseek_v4", "deepseek-v4", "deepseekv4"] {
let (tool_calls, _) = detect_and_parse_tool_call(input, Some(parser_name), None)
.await
.expect("Failed to parse");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "search");
let args: serde_json::Value =
serde_json::from_str(&tool_calls[0].function.arguments).unwrap();
assert_eq!(args["query"], "search agent benchmark 2024");
assert_eq!(args["topn"], 10);
assert_eq!(args["source"], "web");
}
}
#[tokio::test]
async fn test_hermes_parser_without_new_line() {
......
......@@ -794,24 +794,6 @@ class TestToolCallingProtocol:
names.add(tc["function"]["name"])
assert len(names) >= 2, f"expected at least 2 distinct tools, got {names}"
def test_tool_call_ids_unique_in_single_response(self, client: OpenAI, model: str):
result = stream_chat(
client,
model,
messages=[
{
"role": "user",
"content": "Get weather for New York, London, and Tokyo.",
}
],
tools=TOOLS_WEATHER,
tool_choice="required",
parallel_tool_calls=True,
)
assert_finish_reason(result, {"tool_calls"})
ids = [tc["id"] for tc in result.tool_calls]
assert len(ids) == len(set(ids)), f"duplicate tool ids: {ids}"
def test_array_argument_schema_valid(self, client: OpenAI, model: str):
tools = [
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment