Unverified Commit 35fa7129 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat(v4): cherry-pick #8665 onto release/deepseekv4 (#8709)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 01002df7
{
"request_id": "deepseek-v4-mixed-param-types-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "send_notification", "arguments": "{\"recipient\": \"user@example.com\", \"priority\": 3, \"urgent\": true, \"tags\": [\"billing\", \"overdue\"], \"metadata\": {\"ticket\": \"T-42\", \"retries\": 2}}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<think>The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters.</think>","role":"assistant","reasoning_content":"The user asked me to send a high-priority overdue-billing notification. I'll call send_notification with the appropriate parameters."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"send_notification\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"recipient\" string=\"true\">user@example.com</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"priority\" string=\"false\">3</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"urgent\" string=\"false\">true</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"tags\" string=\"false\">[\"billing\", \"overdue\"]</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"metadata\" string=\"false\">{\"ticket\": \"T-42\", \"retries\": 2}</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-mixed","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-multi-tool-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Beijing\", \"format\": \"celsius\"}"}}, {"id": "call_2", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Shanghai\", \"format\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<think>The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information.</think>","role":"assistant","reasoning_content":"The user wants to check the weather in Beijing and Shanghai, I need to call the get_current_weather tool to get this information."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Beijing</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Shanghai</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-multi-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-no-tool-test",
"expected_output": {"normal_content": "Hi! I'm here to help — what would you like to work on today?", "reasoning_content": "User greeted me politely. A short friendly reply is appropriate; no tools needed.", "tool_calls": []},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"<think>User greeted me politely. A short friendly reply is appropriate; no tools needed.</think>","role":"assistant","reasoning_content":"User greeted me politely. A short friendly reply is appropriate; no tools needed."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"Hi! I'm here to help — ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":"what would you like to work on today?","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-no-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"stop"}]}}
]
}
{
"request_id": "deepseek-v4-special-chars-test",
"expected_output": {"normal_content": "", "reasoning_content": "The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "save_note", "arguments": "{\"note\": \"He said \\\"hello\\\".\\n\\t'world' `backtick` — 中文测试 — 🚀✨ <not_a_sentinel> & </not_a_sentinel>.\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<think>The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note.</think>","role":"assistant","reasoning_content":"The user wants me to save a multiline note with special characters, quotes, unicode, and emoji. I'll call save_note."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"save_note\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"note\" string=\"true\">","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"He said \"hello\".\n\t'world' `backtick` ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"— 中文测试 — 🚀✨ ","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"<not_a_sentinel> & </not_a_sentinel>.</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-special","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
{
"request_id": "deepseek-v4-tool-call-test",
"expected_output": {"normal_content": "", "reasoning_content": "User wants the current weather in Beijing. I'll call get_current_weather with celsius units.", "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "get_current_weather", "arguments": "{\"location\": \"Beijing\", \"format\": \"celsius\"}"}}]},
"input_stream": [
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<think>User wants the current weather in Beijing. I'll call get_current_weather with celsius units.</think>","role":"assistant","reasoning_content":"User wants the current weather in Beijing. I'll call get_current_weather with celsius units."}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|tool_calls>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|invoke name=\"get_current_weather\">\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"location\" string=\"true\">Beijing</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"<|DSML|parameter name=\"format\" string=\"true\">celsius</|DSML|parameter>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"</|DSML|invoke>\n","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":"</|DSML|tool_calls>","role":"assistant"}}]}},
{"data":{"id":"chatcmpl-deepseek-v4-tool","choices":[{"index":0,"delta":{"content":null,"role":"assistant"},"finish_reason":"tool_calls"}]}}
]
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Tests for DeepSeek V4 encoding against official test data
//!
//! These tests use the official test files from:
//! https://huggingface.co/deepseek-ai/DeepSeek-V4-Pro/tree/main/encoding
use dynamo_llm::preprocessor::prompt::deepseek_v4::{ThinkingMode, encode_messages};
use serde_json::Value as JsonValue;
use std::fs;
use std::path::PathBuf;
fn get_test_data_path() -> PathBuf {
PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/deepseek-v4")
}
/// Load an input fixture. V4 fixtures come in two shapes:
/// 1. `{"tools": [...], "messages": [...]}` — tools injected on first (system) message
/// 2. bare `[...]` — just the messages array
fn load_messages(path: &PathBuf) -> Vec<JsonValue> {
let raw: JsonValue = serde_json::from_str(
&fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read {:?}", path)),
)
.unwrap_or_else(|_| panic!("Failed to parse {:?}", path));
if let Some(messages) = raw.get("messages").and_then(|m| m.as_array()) {
let mut messages = messages.clone();
if let Some(tools) = raw.get("tools")
&& let Some(first) = messages.get_mut(0)
&& let Some(obj) = first.as_object_mut()
{
obj.insert("tools".to_string(), tools.clone());
}
messages
} else if let Some(arr) = raw.as_array() {
arr.clone()
} else {
panic!("Unexpected input shape in {:?}", path);
}
}
fn run_official_test(input_file: &str, output_file: &str, thinking_mode: ThinkingMode) {
let test_dir = get_test_data_path();
let messages = load_messages(&test_dir.join(input_file));
let expected = fs::read_to_string(test_dir.join(output_file))
.unwrap_or_else(|_| panic!("Failed to read {}", output_file));
let actual = encode_messages(&messages, thinking_mode, true)
.unwrap_or_else(|e| panic!("encode_messages failed for {}: {:?}", input_file, e));
let exp = expected.trim_end();
let act = actual.trim_end();
if exp != act {
println!("=== Test: {} ===", input_file);
let exp_lines: Vec<&str> = exp.lines().collect();
let act_lines: Vec<&str> = act.lines().collect();
for (i, (el, al)) in exp_lines.iter().zip(act_lines.iter()).enumerate() {
if el != al {
println!("Line {} differs:", i + 1);
println!(" Expected: {:?}", el);
println!(" Actual: {:?}", al);
break;
}
}
if exp_lines.len() != act_lines.len() {
println!(
"\nLine count mismatch: expected {} lines, got {} lines",
exp_lines.len(),
act_lines.len()
);
}
panic!("Output does not match expected for {}", input_file);
}
}
/// Case 1 — thinking mode, single tool, tool result round-trip.
#[test]
fn test_official_thinking_with_tools() {
run_official_test(
"test_input_1.json",
"test_output_1.txt",
ThinkingMode::Thinking,
);
}
/// Case 2 — thinking mode, no tools, multi-turn (drop_thinking strips earlier reasoning).
#[test]
fn test_official_thinking_no_tools_multiturn() {
run_official_test(
"test_input_2.json",
"test_output_2.txt",
ThinkingMode::Thinking,
);
}
/// Case 3 — thinking mode, developer role with tools + latest_reminder + tool result.
#[test]
fn test_official_developer_with_tools_and_reminder() {
run_official_test(
"test_input_3.json",
"test_output_3.txt",
ThinkingMode::Thinking,
);
}
/// Case 4 — chat mode, latest_reminder + task="action" + mask preservation.
#[test]
fn test_official_chat_mode_action_task() {
run_official_test("test_input_4.json", "test_output_4.txt", ThinkingMode::Chat);
}
......@@ -1106,6 +1106,167 @@ mod tests {
);
}
// ---- DeepSeek V4 (DSML format) streaming parser tests ----
//
// V4 emits tool calls inside a DSML block:
// <|DSML|tool_calls>
// <|DSML|invoke name="fn">
// <|DSML|parameter name="k" string="true|false">v</|DSML|parameter>
// </|DSML|invoke>
// </|DSML|tool_calls>
// Fixtures live under tests/data/vllm/deepseek-v4/.
/// Shared harness for DeepSeek V4 e2e fixtures that end in a tool call.
async fn run_deepseek_v4_tool_call_fixture(file_path: &str) {
let test_data = load_test_data(file_path);
let input_stream = stream::iter(test_data.stream_chunks);
let output_chunks = parse_response_stream(
input_stream,
true,
true,
Some("deepseek_v4".to_string()),
Some("deepseek_v4".to_string()),
)
.await;
assert!(!output_chunks.is_empty(), "Should have output chunks");
let aggregated = aggregate_content_from_chunks(&output_chunks);
assert_eq!(
aggregated.reasoning_content, test_data.expected_reasoning_content,
"Should have extracted reasoning content.",
);
assert_eq!(
aggregated.normal_content, test_data.expected_normal_content,
"Normal content should match expected value.",
);
let expected_has_tool_calls = !test_data.expected_tool_calls.is_empty();
assert_eq!(
aggregated.has_tool_calls, expected_has_tool_calls,
"Tool calls presence should match expected value"
);
assert_tool_calls(&aggregated.tool_calls, &test_data.expected_tool_calls);
assert!(
validate_finish_reason(&output_chunks, FinishReason::ToolCalls),
"finish_reason validation failed for tool call case"
);
}
/// Single tool call, thinking mode (direct V4 analog of the V3 tool fixture).
/// `CASE.1` + `CASE.8` + `CASE.9` — single tool call, streaming assembly, paired with reasoning. Also validates `CASE.12` finish_reason=tool_calls.
#[tokio::test]
async fn test_deepseek_v4_e2e_with_tools_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// No tool call — thinking + plain body; finish_reason=stop.
/// `CASE.3` + `CASE.10` — no tool call + reasoning only. Also validates `CASE.12` finish_reason=stop.
#[tokio::test]
async fn test_deepseek_v4_e2e_with_no_tools_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_no_tool.json",
DATA_ROOT_PATH
);
let test_data = load_test_data(&file_path);
let input_stream = stream::iter(test_data.stream_chunks);
let output_chunks = parse_response_stream(
input_stream,
true,
true,
Some("deepseek_v4".to_string()),
Some("deepseek_v4".to_string()),
)
.await;
assert!(!output_chunks.is_empty(), "Should have output chunks");
let aggregated = aggregate_content_from_chunks(&output_chunks);
assert_eq!(
aggregated.reasoning_content, test_data.expected_reasoning_content,
"Should have extracted reasoning content.",
);
assert_eq!(
aggregated.normal_content, test_data.expected_normal_content,
"Normal content should match expected value.",
);
assert!(!aggregated.has_tool_calls, "Should not have any tool calls");
assert!(
validate_finish_reason(&output_chunks, FinishReason::Stop),
"finish_reason validation failed for non-tool call case"
);
}
/// Two parallel tool calls inside one DSML block.
/// `CASE.2` — parallel tool calls in one DSML block.
#[tokio::test]
async fn test_deepseek_v4_e2e_multi_tool_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_multi_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// string="true" vs string="false" — numbers, booleans, arrays, objects must
/// round-trip as their proper JSON types inside arguments.
/// `CASE.7` — complex args (mixed string="true|false" → strings / numbers / bools / arrays / objects round-trip).
#[tokio::test]
async fn test_deepseek_v4_e2e_mixed_param_types_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_mixed_param_types.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Body text emitted before the DSML block — parser must populate both
/// normal_content and tool_calls.
/// `CASE.13` — normal text interleaved before the DSML block.
#[tokio::test]
async fn test_deepseek_v4_e2e_content_before_tool_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_content_before_tool.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Parameter value containing unicode, emoji, embedded quotes/newlines/tabs,
/// and fragments that look like sentinels but aren't — must not confuse the
/// parser, which anchors only on the exact </|DSML|parameter> token.
/// `CASE.7` — Unicode / special characters inside argument values. (`CASE.xml.entities` is N/A for DSML — no entity decoding.)
#[tokio::test]
async fn test_deepseek_v4_e2e_special_chars_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_special_chars.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
/// Adversarial streaming: every DSML character is its own delta (~200 chunks).
/// Exercises buffer accumulation across chunk boundaries.
/// `CASE.8` — streaming chunk-boundary splits (tokens straddle chunks).
#[tokio::test]
async fn test_deepseek_v4_e2e_fragmented_tokens_vllm() {
let file_path = format!(
"{}/vllm/deepseek-v4/chat_completion_stream_fragmented_tokens.json",
DATA_ROOT_PATH
);
run_deepseek_v4_tool_call_fixture(&file_path).await;
}
// ---- Kimi K2 streaming jail reproduction tests ----
//
// These reproduce the customer-reported issue (DIS-1765): Kimi K2 agentic
......
# dynamo-parsers
Rust crate for parsing **tool calls** and **reasoning content** out of raw LLM
output. Wire-format-aware, streaming-first, model-family-aware.
This is the post-model side of Dynamo's chat-completions pipeline: given a
token stream from vLLM or SGLang, extract structured `Vec<ToolCall>` +
`reasoning_content` for the client. The pre-model side (prompt formatting)
lives in `lib/llm/src/preprocessor/prompt/`.
## What's in the crate
Two top-level modules, each with its own parser registry:
```
lib/parsers/
├── src/
│ ├── tool_calling/ ← tool-call extraction (17 registered parsers)
│ │ ├── parsers.rs — registry + dispatch (detect_and_parse_tool_call)
│ │ ├── config.rs — per-parser ToolCallConfig
│ │ ├── response.rs — ToolCallResponse shape (wire type)
│ │ ├── dsml/ — DeepSeek V3.2 / V4 DSML grammar
│ │ ├── xml/ — hermes, glm47, kimi_k2, minimax_m2, qwen3_coder
│ │ ├── json/ — deepseek_v3, deepseek_v3_1, nemotron_deci/nano, jamba, mistral, phi4, llama3_json
│ │ ├── harmony/ — OpenAI gpt-oss (Harmony token stream, uses openai_harmony crate)
│ │ └── pythonic/ — Python function-call syntax (some Llama variants)
│ └── reasoning/ ← reasoning-content extraction (14 registered parsers)
│ ├── mod.rs — registry + dispatch
│ ├── base_parser.rs — BasicReasoningParser (<think> ... </think>)
│ ├── gpt_oss_parser.rs — Harmony channel parsing
│ ├── granite_parser.rs — Granite-style
│ └── minimax_append_think_parser.rs — MiniMax inline-reasoning
```
## How a request flows through the crate
```
token stream from engine
┌─────────────────────────────────┐
│ reasoning parser │ — registered by name via
│ (basic / gpt_oss / ...) │ reasoning::mod.rs get_reasoning_parser_map()
│ │ returns: (reasoning_content, non_reasoning_tail)
└─────────────────────────────────┘
▼ (non-reasoning tail)
┌─────────────────────────────────┐
│ tool-call parser │ — registered by name via
│ dispatched on parser name │ tool_calling::parsers::get_tool_parser_map()
│ which picks a ParserConfig: │
│ - Dsml(DsmlParserConfig) │ → try_tool_call_parse_dsml
│ - Json(JsonParserConfig) │ → try_tool_call_parse_json
│ - Xml(XmlParserConfig) │ → try_tool_call_parse_xml
│ - KimiK2(KimiK2ParserConfig)│ → try_tool_call_parse_kimi_k2
│ - Pythonic / Harmony │
└─────────────────────────────────┘
Vec<ToolCallResponse> + normal_text
```
Main public entry points in `tool_calling/parsers.rs`:
- `detect_and_parse_tool_call(input, parser_name, schema) -> (calls, normal_text)`
- `try_tool_call_parse(input, config) -> (calls, normal_text)` (lower-level, bypasses the registry)
- `detect_tool_call_start(chunk, parser_name)` — streaming: "is this chunk starting a tool-call block?"
- `find_tool_call_end_position(chunk, parser_name)` — streaming: "where does the block end in this chunk?"
## Parser-family cheat sheet
When adding a new model, the right parser family is usually one of:
| Family | Grammar | Shared engine | Examples |
| -- | -- | -- | -- |
| **DSML** | `<|DSML|tool_calls>...` with typed `string="true|false"` parameters | `dsml/parser.rs` | DeepSeek V3.2, V4 |
| **XML** | `<tool_call>...</tool_call>` with nested `<parameter>` or `<function>` | `xml/parser.rs` (generic) or own file for variants | hermes, qwen3_coder, minimax_m2, glm47 (own), kimi_k2 (own, special-token XML) |
| **JSON** | Start sentinel + bare JSON array of `{name, arguments}` | `json/base_json_parser.rs` | deepseek_v3, deepseek_v3_1, nemotron_deci/nano |
| **Harmony** | OpenAI Harmony token stream with `<\|channel\|>`, `<\|message\|>`, `<\|call\|>` | `harmony/harmony_parser.rs` (wraps external `openai_harmony` crate) | gpt-oss-20B / 120B |
| **Pythonic** | `[func_name(arg=value, ...)]` Python function-call syntax | `pythonic/pythonic_parser.rs` | some Llama variants |
Reasoning parsers:
| Family | Grammar | Shared engine | Examples |
| -- | -- | -- | -- |
| **Basic (think-tag)** | `<think>...</think>` | `reasoning/base_parser.rs` (BasicReasoningParser) | Qwen3, Nemotron, Kimi K2.5, DeepSeek R1 / V4, GLM-4.5+ |
| **Append-think** | `<think>...</think>` left inline as text, with `<think>` prefix on first chunk | `reasoning/minimax_append_think_parser.rs` | MiniMax M2 |
| **Harmony channel** | Hidden `analysis` channel | `reasoning/gpt_oss_parser.rs` (wraps external `openai_harmony`) | gpt-oss-20B / 120B |
| **Granite** | Custom start/end tokens | `reasoning/granite_parser.rs` | IBM Granite |
## Adding a new parser
1. **Pick the family** from the cheat sheet above. If an existing config-driven
family fits, add a `ToolCallConfig::<your_model>()` constructor in
`tool_calling/config.rs`, register it in `tool_calling/parsers.rs`. Done —
you inherit all the shared parser and tests.
2. **If the grammar is genuinely new**, add a module under `tool_calling/` and
add a `ParserConfig` variant in `config.rs`. Follow the existing parser
modules for layout.
3. **For reasoning**, prefer aliasing to `BasicReasoningParser` unless the
grammar truly diverges (append-think, Harmony channels). Most new models
use plain `<think>...</think>` and can share.
4. **Write tests.** Minimum viable set is in [`TESTING.md`](./TESTING.md) (T1–T20
taxonomy). At minimum: T1/T2/T3 for correctness, T5 for truncation
behavior, T8/T9 for streaming, T14 for interleaved text. `N/A` categories
should be explicitly called out in a comment rather than silently skipped.
## Related docs
- [`TESTING.md`](./TESTING.md) — corner-case taxonomy (T1–T20). What every
parser should be tested against, what's N/A per family, what's a universal
gap today.
- `lib/llm/tests/data/` — captured streaming fixtures per (engine × model)
that feed `test_streaming_tool_parsers.rs`. The replay side of the testing
story.
## Integration with the rest of Dynamo
- `lib/llm/src/preprocessor/prompt/` — pre-model side. Writes the prompts
that (eventually) come back and get parsed here.
- `lib/llm/src/preprocessor.rs` — top-level request/response pipeline.
Decides whether to run the reasoning parser based on
`is_reasoning_disabled_by_request`, then hands the reasoning-stripped
tail to the tool-call parser.
- `components/src/dynamo/frontend/` — Python frontend that surfaces parsed
output as OpenAI-compatible SSE chunks to the client.
# Tool-Call / Reasoning Parser Corner Cases
Reference taxonomy for unit testing tool-call and reasoning parsers. Each parser
added under `src/tool_calling/` or `src/reasoning/` should cover the generic
`CASE.<n>` categories; family-specific parsers also cover their respective
`CASE.xml<n>` / `CASE.harmony<n>` categories. `N/A` should be called out
explicitly in the test file rather than silently omitted.
Category layout:
- **`CASE.1`–`CASE.16`****Generic**. Apply to every parser regardless of grammar.
- **`CASE.xml1`–`CASE.xml2`** — XML-family only (hermes, glm47, qwen3_coder, minimax_m2, kimi_k2).
- **`CASE.harmony1`** — Harmony only (gpt-oss).
Per-model gap tracking lives elsewhere (not in this repo).
## Quick reference
### Generic (all parsers)
- **`CASE.1`** Single tool call — happy path (one complete, well-formed call). Ex: `xml::test_parse_simple_tool_call`.
- **`CASE.2`** Multiple tool calls — sequential or parallel (2+ in one response). Ex: `tool_choice::test_streaming_required_tool_parallel`.
- **`CASE.3`** No tool call (response is text only). Ex: `xml::test_parse_no_tool_calls`.
- **`CASE.4`** Malformed / partial JSON args (truncated, missing close brace, invalid syntax). Ex: `deepseek_v3::test_parse_..._with_invalid_json`, `xml::test_parse_missing_..._closing_tag`.
- **`CASE.5`** Missing end-token recovery (recover calls when `section_end` is absent due to max_tokens / EOS). Ex: `kimi_k2::test_parse_malformed_no_section_end`.
- **`CASE.6`** Empty args (`arguments={}` / no-arg call). Ex: `kimi_k2::test_parse_no_arg_call`.
- **`CASE.7`** Complex arg types (nested objects, arrays, bool, number, Unicode / newlines in values). Ex: `kimi_k2::test_parse_complex_json_arguments`.
- **`CASE.8`** Streaming — token-by-token assembly + chunk-boundary splits. Ex: `basic::test_buffer_state_persistence_across_calls`, `basic::test_partial_token_matching_closing_tag`, `basic::test_kimi_k2_one_shot_split`.
- **`CASE.9`** Paired reasoning + tool in same response. Ex: `test_reasoning_parser::test_nemotron_with_reasoning_and_tool_calls`.
- **`CASE.10`** Reasoning only (think tags, no tool call). Ex: `basic::test_detect_and_parse_reasoning_reasoning`.
- **`CASE.11`** `tool_choice` = auto / required / named / none. Ex: `tool_choice::test_named_tool_choice_parses_json` (**hermes only today**).
- **`CASE.12`** `finish_reason` semantics (`stop` / `tool_calls` / `length` mapping). Ex: `tool_choice_finish_reasons::*` (hermes only); `test_streaming_tool_parsers::test_qwen_finish_reason_length_vllm`.
- **`CASE.13`** Normal text interleaved with tool calls. Ex: `xml::test_parse_with_normal_text`.
- **`CASE.14`** Empty content / empty `tool_calls` array / null response. Ex: `parallel_tool_call_integration::test_empty_tool_calls`.
- **`CASE.15`** Duplicate tool calls (same name twice). No test anywhere in the repo; universal gap.
- **`CASE.16`** Regression for a specific customer bug (ticket ID referenced in test name or body). Ex: `kimi_k2::test_parse_malformed_no_section_end`.
### XML-family (`CASE.xml*`)
- **`CASE.xml1`** XML entity / HTML unescape handling (`&lt;`, `&amp;`, `&quot;` in parameter values). Ex: `xml::test_html_unescape`, `glm47::test_xml_entity_decoding`.
- **`CASE.xml2`** Schema-aware type coercion (string → number/bool/array based on declared parameter schema). Ex: `xml::test_schema_aware_type_conversion`, `glm47::test_type_coercion_*`.
### Harmony (`CASE.harmony*`)
- **`CASE.harmony1`** Channel / recipient parsing (analysis / commentary / final channels). Ex: `harmony_parser::test_parse_tool_calls_harmony_*`.
### Universal gaps (no test anywhere, not promoted to numbered categories)
- Unicode in function names (non-ASCII tool names, emoji).
- Numeric overflow in args (very large int / float outside JSON spec range).
- Empty function name (`"name": ""`).
- Concurrent parallel requests (process-level contention during parse).
- Guided-decoding ↔ tool-call interaction (constrained generation emits malformed args).
- Extremely long output (≥10 KB tool-call JSON in a single call).
- Mid-stream error injection / interruption (worker kill, network drop mid-parse).
- Schema arg-count mismatch (model emits extra or missing args vs declared schema).
---
## `CASE.1` — Single tool call, happy path
One complete, well-formed call in the response.
- Applies to every tool-call parser.
- Baseline correctness check. If `CASE.1` fails, nothing else below matters.
- Example: `dsml/parser.rs::test_parse_single_tool_call_string_param`.
## `CASE.2` — Multiple tool calls (sequential or parallel)
Two or more calls in one response, in the same block or back-to-back.
- Applies to every tool-call parser.
- Some grammars emit parallel calls in one block (DSML, XML); others emit
sequential top-level sentinels (JSON dialects). Either way, extract all.
- Example: `dsml/parser.rs::test_parse_multiple_tool_calls`,
`tool_choice::test_streaming_required_tool_parallel`.
## `CASE.3` — No tool call
Response is plain text, no tool-call grammar present.
- Applies to every tool-call parser.
- Must return empty `Vec<ToolCall>` and the input as `normal_text`. Zero false
positives.
- Example: `dsml/parser.rs::test_parse_no_tool_calls`.
## `CASE.4` — Malformed / partial JSON args
Truncated JSON, missing close brace, invalid syntax inside the arguments
payload.
- Applies to every tool-call parser. For parsers whose grammar never embeds
JSON (none today — all top-N families embed JSON somewhere), mark explicit
`N/A`.
- Behavior must be documented: either graceful fallback to string (DSML's
current behavior via `serde_json::from_str(...).unwrap_or_else(|_| String(...))`)
or explicit error. Silent drop is the failure mode.
- Example: `dsml/parser.rs::test_parse_deepseek_v4_malformed_json_value_falls_back_to_string`,
`deepseek_v3_parser.rs::test_parse_tool_calls_deepseek_v3_with_invalid_json`.
## `CASE.5` — Missing end-token recovery
The model's response is truncated before the closing fence arrives
(`<|tool_calls_section_end|>` for Kimi, `</|DSML|tool_calls>` for DeepSeek
DSML, etc.) — typically because the engine hit `max_tokens` or the model
emitted EOS mid-generation.
- Applies to every tool-call parser with paired start/end fences.
- Customer-facing bug class: silent drop of the in-flight call looks like a
successful HTTP 200 with no tool_calls and no error.
- Two acceptable resolutions: (a) recover completed invokes even without the
outer close fence (Kimi K2 does this post-fix), or (b) return an explicit
error. Either way, pin the behavior with a test so a future change is
intentional.
- Example (post-recovery): `kimi_k2_parser.rs::test_parse_malformed_no_section_end`.
- Example (behavior-pinning, pre-recovery): `dsml/parser.rs::test_parse_deepseek_v4_missing_end_token`.
## `CASE.6` — Empty args
Tool call with `arguments={}`, or a no-parameter invoke.
- Applies to every tool-call parser.
- Must still return the call — empty args is a valid call, not a missing one.
- Example: `kimi_k2_parser.rs::test_parse_no_arg_call`,
`dsml/parser.rs::test_parse_deepseek_v4_no_parameters`.
## `CASE.7` — Complex argument types
Nested objects, arrays, booleans, numbers, mixed types, Unicode values, and
newlines inside argument values.
- Applies to every tool-call parser.
- For grammars that carry type hints (DSML's `string="true|false"`), verify
JSON round-tripping. For XML grammars without hints, the type-coercion
half of the test is covered under `CASE.xml2` instead — here just verify
that complex values make it through without truncation or escape bugs.
- Example: `dsml/parser.rs::test_parse_mixed_types_realistic`,
`kimi_k2_parser.rs::test_parse_complex_json_arguments`.
## `CASE.8` — Streaming
Chunked input arriving over SSE. Covers two concerns that tend to fail
together:
1. **Token-by-token assembly** — the parser incrementally reconstructs the
tool-call structure across many small chunks.
2. **Chunk-boundary splits** — start fence, end fence, or parameter name /
value straddles a chunk boundary. Partial-token matching must return
`true` (keep buffering, don't flush as plain text) and complete the
match on the next chunk.
- Applies to every tool-call parser. Dominant production path.
- Example: `basic::test_buffer_state_persistence_across_calls`,
`basic::test_partial_token_matching_closing_tag`,
`basic::test_kimi_k2_one_shot_split`,
`test_streaming_tool_parsers::test_deepseek_v4_e2e_fragmented_tokens_vllm`.
## `CASE.9` — Paired reasoning + tool in same response
Model emits `<think>...</think>` (or analog) followed by a tool call. Both
must be extracted: `reasoning_content` populated AND `tool_calls` populated.
- Applies to every (tool, reasoning) parser pair.
- Watch for the "unclosed think-tag swallows tool call" bug — if the reasoning
parser is greedy it may eat the tool-call content that follows.
- Example: `test_reasoning_parser::test_nemotron_with_reasoning_and_tool_calls`,
`test_reasoning_parser::test_kimi_k25_with_reasoning_and_tool_calls`.
## `CASE.10` — Reasoning only
`<think>...</think>` or analog present, no tool call. Parser must populate
`reasoning_content` and leave `tool_calls` empty.
- Applies to every reasoning parser.
- Example: `reasoning/base_parser.rs::test_detect_and_parse_reasoning_reasoning`,
`reasoning/mod.rs::test_deepseek_v4_detect_and_parse`.
## `CASE.11` — `tool_choice` = auto / required / named / none
Each of the four OpenAI `tool_choice` modes exercised per parser.
- Applies to every tool-call parser.
- Cross-parser suites at `lib/llm/tests/tool_choice.rs` /
`parallel_tool_call_integration.rs` / `tool_choice_finish_reasons.rs`
run `hermes` only today. Adding a new parser requires parametrizing those
suites or adding a per-parser equivalent.
- Universal gap across most parsers in the repo as of 2026-04.
- Example: `tool_choice::test_named_tool_choice_parses_json`,
`tool_choice::test_required_tool_choice_parses_json_array`.
## `CASE.12` — `finish_reason` semantics
`stop` vs `tool_calls` vs `length` mapping, in both streaming and
non-streaming paths.
- Applies to every tool-call parser.
- When a tool call lands, `finish_reason` must become `tool_calls`. When
`max_tokens` truncates mid-stream, `length` must propagate — this is
often the signal that should trigger `CASE.5` recovery on the parser side.
- Example: `tool_choice_finish_reasons::test_named_tool_choice_normal_stop_becomes_tool_calls`,
`test_streaming_tool_parsers::test_qwen_finish_reason_length_vllm`.
## `CASE.13` — Normal text interleaved with tool calls
Model emits narration text before / after / between tool-call blocks. Parser
must split content correctly: text → `normal_content`, calls → `tool_calls`.
- Applies to every tool-call parser.
- Example: `dsml/parser.rs::test_parse_with_normal_text`,
`test_streaming_tool_parsers.rs::test_deepseek_v4_e2e_content_before_tool_vllm`.
## `CASE.14` — Empty content / empty `tool_calls` array / null response
Engine emits a chunk with `delta.content = ""`, or a final response with
`tool_calls: []`, or `null` values inside arguments.
- Applies to every tool-call parser.
- Null-value handling inside parameters is parser-level (`parse_parameters`
in DSML handles it via `serde_json::Value::Null`). Empty-choices /
empty-stream handling is typically at the e2e integration layer.
- Example: `dsml/parser.rs::test_parse_null_parameter`,
`parallel_tool_call_integration::test_empty_tool_calls`.
## `CASE.15` — Duplicate tool calls (same name twice)
Two calls to the same function name in one response, possibly with the same
arguments.
- Applies to every tool-call parser.
- **Zero coverage across the entire repo as of 2026-04.** Universal gap.
- Expected behavior: both calls must appear in `tool_calls` with distinct
IDs. (The runtime / client is responsible for deciding whether duplicate
invocation is intended.)
## `CASE.16` — Regression for a specific customer bug
Test named after (or containing) a ticket reference, pinning the fix for
a customer-reported failure.
- Applies per-incident. Not a category every parser needs to cover in
advance; populated as bugs are reported and fixed.
- Existing example: `kimi_k2_parser.rs::test_parse_malformed_no_section_end`.
---
## `CASE.xml1` — XML entity / HTML unescape handling
Parameter values contain XML-encoded entities (`&lt;`, `&amp;`, `&quot;`,
`&apos;`, numeric entities like `&#38;`) that must be decoded before the
value is surfaced to the client.
- Applies only to XML-family tool-call parsers: `hermes`, `glm47`,
`qwen3_coder`, `minimax_m2`, `kimi_k2` (despite its special-token outer
fence, the inner parameter payload is XML-ish).
- **N/A for DSML** — the `string="true|false"` attribute tells the parser
whether to JSON-decode or pass through verbatim; no entity decoding pass.
- **N/A for JSON-family and Harmony** — JSON has its own escape semantics
handled by `serde_json`.
- Example: `xml/parser.rs::test_html_unescape`,
`glm47_parser.rs::test_xml_entity_decoding`.
## `CASE.xml2` — Schema-aware type coercion
Parser uses the declared tool schema to coerce string args to
number / bool / array based on the declared parameter type.
- Applies only to XML-family parsers without explicit type annotations in
the wire format. `xml/parser.rs`, `glm47_parser.rs` do this.
- **N/A for DSML** — the `string="true|false"` attribute carries the type
intent per parameter, so no schema lookup is needed.
- **N/A for JSON-family** — JSON has native types.
- **N/A for Harmony** — payload is JSON inside the channel envelope.
- Example: `xml/parser.rs::test_schema_aware_type_conversion`,
`glm47_parser.rs::test_type_coercion_array_comma_separated`.
---
## `CASE.harmony1` — Channel / recipient parsing
OpenAI Harmony's token stream carries channel metadata
(`<|channel|>analysis|commentary|final<|message|>`) and recipient targets
(`to=functions.foo`). Parser must route the `commentary` channel content
into tool-call extraction while surfacing `analysis` as reasoning and
`final` as the user-visible output.
- **Harmony only.** N/A for every other family.
- Example: `harmony/harmony_parser.rs::test_parse_tool_calls_harmony_with_multi_args`,
`harmony/harmony_parser.rs::test_parse_tool_calls_harmony_with_normal_text`.
---
## Applicability summary
| Category block | Parsers | Notes |
| -- | -- | -- |
| `CASE.1``CASE.16` (generic) | All | Required contract for every parser |
| `CASE.xml1``CASE.xml2` | XML-family only | Entity decoding + schema-aware coercion |
| `CASE.harmony1` | Harmony only | Channel routing |
## Adding a new parser: what you must include
Minimum viable set for a new tool-call parser:
1. `CASE.1`, `CASE.2`, `CASE.3` — baseline correctness.
2. `CASE.4` or explicit N/A justification — handle or refuse malformed input.
3. `CASE.5` — pin behavior when the outer fence is missing. Silent drop is a
regression waiting to happen.
4. `CASE.6`, `CASE.7` — empty and complex args.
5. `CASE.8` — streaming. Essentially non-negotiable for any parser that sits
behind a streaming frontend.
6. `CASE.13` — interleaved text.
7. `CASE.15` — document whether duplicate calls are supported. Flat gap
today; landing a test with the parser establishes the contract.
8. Family-specific categories where applicable: `CASE.xml1` / `CASE.xml2`
for XML grammars, `CASE.harmony1` for Harmony.
For reasoning parsers, replace `CASE.4` / `CASE.5` / `CASE.8`-assembly with
`CASE.8`-partial-close-tag and `CASE.10` (reasoning-only).
......@@ -29,6 +29,22 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map.insert("basic", ReasoningParserType::Basic);
map.insert("gpt_oss", ReasoningParserType::GptOss);
map.insert("qwen3", ReasoningParserType::Qwen);
// DeepSeek-V4 uses the same `<think>` / `</think>` delimiters as Qwen
// (confirmed against deepseek-ai/DeepSeek-V4-Pro's encoding_dsv4.py)
// so it delegates to the same `BasicReasoningParser` config today. We
// still route through a dedicated `DeepSeekV4` variant rather than
// hard-aliasing to `Qwen` so future divergence (different special
// tokens, max-thinking mode, etc.) has a place to land without rippling
// through Qwen's own config.
//
// The three name aliases exist because callers set this via
// `--dyn-reasoning-parser` / `--reasoning-parser` with whatever string
// the HF model / vLLM recipe / chat-template author picked. We accept
// all three separator conventions (snake / kebab / concat) rather than
// force a single canonical form on users.
map.insert("deepseek_v4", ReasoningParserType::DeepSeekV4);
map.insert("deepseek-v4", ReasoningParserType::DeepSeekV4);
map.insert("deepseekv4", ReasoningParserType::DeepSeekV4);
map.insert("nemotron_deci", ReasoningParserType::NemotronDeci);
map.insert("kimi", ReasoningParserType::Kimi);
map.insert("kimi_k25", ReasoningParserType::KimiK25);
......@@ -110,6 +126,14 @@ pub enum ReasoningParserType {
Basic,
GptOss,
Qwen,
/// DeepSeek-V4-Pro / V4-Flash. Currently uses the same `<think>` /
/// `</think>` `BasicReasoningParser` config as Qwen (V4 never appends
/// `<think>` in the completion — the chat template always pre-injects it,
/// so the parser starts via `set_in_reasoning(true)` rather than
/// `force_reasoning`). A dedicated variant keeps future V4-specific
/// divergence (different delimiters, thinking-effort modes) from leaking
/// into Qwen's behavior.
DeepSeekV4,
NemotronDeci,
Kimi,
KimiK25,
......@@ -161,6 +185,12 @@ impl ReasoningParserType {
ReasoningParserType::Qwen => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
// Same `<think>` / `</think>` config as Qwen today; kept as a
// distinct variant so V4-specific divergence has somewhere to land.
// See `ReasoningParserType::DeepSeekV4` docstring for rationale.
ReasoningParserType::DeepSeekV4 => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
ReasoningParserType::NemotronDeci => ReasoningParserWrapper {
parser: Box::new(basic_parser),
},
......@@ -246,6 +276,9 @@ mod tests {
"basic",
"gpt_oss",
"qwen3",
"deepseek_v4",
"deepseek-v4",
"deepseekv4",
"nemotron_deci",
"kimi",
"kimi_k25",
......@@ -261,6 +294,45 @@ mod tests {
assert!(parsers.contains(&parser));
}
}
/// `CASE.10` — reasoning-only (V4 `<think>`/`</think>`).
#[test]
fn test_deepseek_v4_detect_and_parse() {
for parser_name in ["deepseek_v4", "deepseek-v4", "deepseekv4"] {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name(parser_name);
let result = parser.detect_and_parse_reasoning("<think>thinking</think>answer", &[]);
assert_eq!(result.reasoning_text, "thinking");
assert_eq!(result.normal_text, "answer");
}
}
/// `CASE.3` / `CASE.10` — no reasoning tags ⇒ no `reasoning_content`.
#[test]
fn test_deepseek_v4_no_forced_reasoning_without_tags() {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("deepseek_v4");
let result = parser.detect_and_parse_reasoning("answer only", &[]);
assert_eq!(result.reasoning_text, "");
assert_eq!(result.normal_text, "answer only");
}
/// `CASE.8` — streaming reasoning parse (chunked).
#[test]
fn test_deepseek_v4_streaming() {
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("deepseek_v4");
let chunks = ["<think>rea", "son</think>answer"];
let mut reasoning = String::new();
let mut normal = String::new();
for chunk in chunks {
let result = parser.parse_reasoning_streaming_incremental(chunk, &[]);
reasoning.push_str(&result.reasoning_text);
normal.push_str(&result.normal_text);
}
assert_eq!(reasoning, "reason");
assert_eq!(normal, "answer");
}
#[test]
fn test_kimi_k25_detect_and_parse() {
......
......@@ -391,6 +391,22 @@ impl ToolCallConfig {
}
}
pub fn deepseek_v4() -> Self {
// DeepSeek V4 format (DSML):
// <|DSML|tool_calls>
// <|DSML|invoke name="function_name">
// <|DSML|parameter name="param_name" string="true|false">value</|DSML|parameter>
// </|DSML|invoke>
// </|DSML|tool_calls>
Self {
parser_config: ParserConfig::Dsml(DsmlParserConfig {
function_calls_start: "<|DSML|tool_calls>".to_string(),
function_calls_end: "</|DSML|tool_calls>".to_string(),
..Default::default()
}),
}
}
pub fn minimax_m2() -> Self {
// MiniMax-M2.1 format:
// <minimax:tool_call>
......
This diff is collapsed.
......@@ -43,6 +43,9 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
map.insert("deepseek_v3", ToolCallConfig::deepseek_v3());
map.insert("deepseek_v3_1", ToolCallConfig::deepseek_v3_1());
map.insert("deepseek_v3_2", ToolCallConfig::deepseek_v3_2());
map.insert("deepseek_v4", ToolCallConfig::deepseek_v4());
map.insert("deepseek-v4", ToolCallConfig::deepseek_v4());
map.insert("deepseekv4", ToolCallConfig::deepseek_v4());
map.insert("qwen3_coder", ToolCallConfig::qwen3_coder());
map.insert("jamba", ToolCallConfig::jamba());
map.insert("minimax_m2", ToolCallConfig::minimax_m2());
......@@ -241,6 +244,9 @@ mod tests {
"deepseek_v3",
"deepseek_v3_1",
"deepseek_v3_2",
"deepseek_v4",
"deepseek-v4",
"deepseekv4",
"qwen3_coder",
"jamba",
"nemotron_nano",
......@@ -1701,6 +1707,56 @@ Remember, San Francisco weather can be quite unpredictable, particularly with it
assert_eq!(args["topn"], 10); // Should be number, not string
assert_eq!(args["source"], "web");
}
/// `CASE.1` — single-call happy path (V4).
#[tokio::test]
async fn test_deepseek_v4_single_tool_call() {
let input = r#"<|DSML|tool_calls>
<|DSML|invoke name="get_datetime">
<|DSML|parameter name="timezone" string="true">Asia/Shanghai</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>"#;
let (tool_calls, normal_text) =
detect_and_parse_tool_call(input, Some("deepseek_v4"), None)
.await
.expect("Failed to parse");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "get_datetime");
assert_eq!(normal_text, Some("".to_string()));
let args: serde_json::Value =
serde_json::from_str(&tool_calls[0].function.arguments).unwrap();
assert_eq!(args["timezone"], "Asia/Shanghai");
}
/// Alias registration: verifies `deepseek-v4` and `deepseekv4` route to the same parser as `deepseek_v4`. Not a CASE.*; covers registry plumbing.
#[tokio::test]
async fn test_deepseek_v4_compatibility_aliases() {
let input = r#"<|DSML|tool_calls>
<|DSML|invoke name="search">
<|DSML|parameter name="query" string="true">search agent benchmark 2024</|DSML|parameter>
<|DSML|parameter name="topn" string="false">10</|DSML|parameter>
<|DSML|parameter name="source" string="true">web</|DSML|parameter>
</|DSML|invoke>
</|DSML|tool_calls>"#;
for parser_name in ["deepseek_v4", "deepseek-v4", "deepseekv4"] {
let (tool_calls, _) = detect_and_parse_tool_call(input, Some(parser_name), None)
.await
.expect("Failed to parse");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].function.name, "search");
let args: serde_json::Value =
serde_json::from_str(&tool_calls[0].function.arguments).unwrap();
assert_eq!(args["query"], "search agent benchmark 2024");
assert_eq!(args["topn"], 10);
assert_eq!(args["source"], "web");
}
}
#[tokio::test]
async fn test_hermes_parser_without_new_line() {
......
......@@ -794,24 +794,6 @@ class TestToolCallingProtocol:
names.add(tc["function"]["name"])
assert len(names) >= 2, f"expected at least 2 distinct tools, got {names}"
def test_tool_call_ids_unique_in_single_response(self, client: OpenAI, model: str):
result = stream_chat(
client,
model,
messages=[
{
"role": "user",
"content": "Get weather for New York, London, and Tokyo.",
}
],
tools=TOOLS_WEATHER,
tool_choice="required",
parallel_tool_calls=True,
)
assert_finish_reason(result, {"tool_calls"})
ids = [tc["id"] for tc in result.tool_calls]
assert len(ids) == len(set(ids)), f"duplicate tool ids: {ids}"
def test_array_argument_schema_valid(self, client: OpenAI, model: str):
tools = [
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment