// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! # Reasoning and Tool Call Interplay
//!
//! Models like GLM-4.5/4.7 and Qwen3 interleave reasoning blocks with tool calls:
//!
//! ```text
//! reasoning about what tool to call
//! get_weathercityBeijing
//! reasoning about the result
//! summarizetext...
//! ```
//!
//! The reasoning parser and the tool call parser are **independent, sequential** stages:
//!
//! 1. **Reasoning parser** (`BasicReasoningParser`) splits the stream into:
//! - `reasoning_content`: everything inside `...` blocks
//! - `normal_text`: everything outside (including tool call tags)
//! 2. **Tool call parser** (`glm47` / others) then processes `normal_text` to extract
//! `...` blocks.
//!
//! This means tool calls **must** appear outside `` blocks to be detected.
//! If a model erroneously emits a tool call inside a `` block (observed in
//! GLM-4.7 under very long contexts), the tool call parser will not see it.
//!
//! ## `force_reasoning` and tokenizer behavior
//!
//! Some models (e.g. GLM-5-FP8 served via ZAI) consume `` as a special
//! tokenizer token and never emit it as literal text. In that case use
//! `force_reasoning=true` (`deepseek_r1` parser), which treats all output as
//! reasoning until `` is seen. Models that do emit `` as text
//! (standard serving, Qwen3, GLM-4.5) should use `force_reasoning=false`
//! (`glm45`, `nemotron_deci`, `qwen3` parsers).
use crate::{ParserResult, ReasoningParser};
/// Returns the length of the longest suffix of `s` that is also a prefix of `delim`.
///
/// Ported from ollama's `thinking/parser.go::overlap()`. Used to detect partial
/// tags split across streaming chunk boundaries (e.g., `"Hello world | `).
fn overlap(s: &str, delim: &str) -> usize {
let max = delim.len().min(s.len());
for i in (1..=max).rev() {
if !delim.is_char_boundary(i) {
continue; // Skip mid-codepoint positions (e.g., multi-byte `◁` in Kimi tags)
}
if s.ends_with(&delim[..i]) {
return i;
}
}
0
}
#[derive(Default, Debug, Clone)]
pub struct BasicReasoningParser {
think_start_token: String,
think_end_token: String,
_in_reasoning: bool,
stream_reasoning: bool,
_buffer: String,
stripped_think_start: bool,
}
impl BasicReasoningParser {
pub fn new(
think_start_token: String,
think_end_token: String,
force_reasoning: bool,
stream_reasoning: bool,
) -> Self {
Self {
think_start_token,
think_end_token,
_in_reasoning: force_reasoning,
stream_reasoning,
_buffer: String::new(),
stripped_think_start: false,
}
}
}
impl ReasoningParser for BasicReasoningParser {
fn detect_and_parse_reasoning(&mut self, text: &str, _token_ids: &[u32]) -> ParserResult {
let has_think_tag = text.contains(&self.think_start_token);
let in_reasoning = self._in_reasoning || has_think_tag;
if !in_reasoning {
return ParserResult {
normal_text: text.to_string(),
reasoning_text: String::new(),
};
}
// If force_reasoning and no start tag, treat entire text as reasoning
if self._in_reasoning && !has_think_tag && !text.contains(&self.think_end_token) {
return ParserResult {
normal_text: String::new(),
reasoning_text: text.to_string(),
};
}
// Extract all ... pairs using cursor-based iteration
let mut reasoning_parts = Vec::new();
let mut normal_parts = Vec::new();
let mut cursor = 0;
let mut currently_reasoning = self._in_reasoning;
while cursor < text.len() {
if currently_reasoning {
// We're inside a reasoning block — look for end token
if let Some(end_offset) = text[cursor..].find(&self.think_end_token) {
reasoning_parts.push(&text[cursor..cursor + end_offset]);
cursor += end_offset + self.think_end_token.len();
currently_reasoning = false;
} else {
// No end token — rest is reasoning (truncated)
reasoning_parts.push(&text[cursor..]);
cursor = text.len();
}
} else {
// We're in normal text — look for start token
if let Some(start_offset) = text[cursor..].find(&self.think_start_token) {
normal_parts.push(&text[cursor..cursor + start_offset]);
cursor += start_offset + self.think_start_token.len();
currently_reasoning = true;
} else {
// No more think blocks — rest is normal text
normal_parts.push(&text[cursor..]);
cursor = text.len();
}
}
}
let reasoning_text = reasoning_parts.join("").trim().to_string();
let normal_text = normal_parts.join("").trim().to_string();
// Note: self._in_reasoning is intentionally NOT updated here. This method is
// documented to "reset or ignore internal streaming state" (see trait doc). Callers
// should not mix detect_and_parse_reasoning with parse_reasoning_streaming_incremental
// on the same parser instance.
ParserResult {
normal_text,
reasoning_text,
}
}
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
_token_ids: &[u32],
) -> ParserResult {
self._buffer.push_str(text);
let mut accumulated_normal = String::new();
let mut accumulated_reasoning = String::new();
// Loop to exhaust all state transitions within a single chunk. Without this,
// a chunk containing two complete ... blocks would process only
// the first transition and buffer the rest, risking content loss at end-of-stream.
loop {
let current_text = self._buffer.clone();
// Strip leading tag if not yet stripped. Handles two cases:
// 1. force_reasoning=true where the model also emits as text
// 2. First call where arrives at buffer position 0
// Mid-text (position > 0) falls through to the find() branch below.
if !self.stripped_think_start
&& current_text.starts_with(self.think_start_token.as_str())
{
self._buffer = current_text[self.think_start_token.len()..].to_string();
self.stripped_think_start = true;
self._in_reasoning = true;
continue;
}
if self._in_reasoning {
if let Some(end_idx) = current_text.find(self.think_end_token.as_str()) {
// End of reasoning block: accumulate content and transition out.
accumulated_reasoning.push_str(¤t_text[..end_idx]);
let after_end = end_idx + self.think_end_token.len();
self._buffer = current_text[after_end..].to_string();
self._in_reasoning = false;
self.stripped_think_start = false; // Allow detecting next block
continue; // Process remainder — may contain further blocks
} else {
// No complete end token — check for partial at end of buffer
// (e.g., "reasoning content | ").
if self.stream_reasoning {
let ol = overlap(¤t_text, &self.think_end_token);
if ol >= 2 {
let safe_end = current_text.len() - ol;
if safe_end > 0 {
accumulated_reasoning.push_str(¤t_text[..safe_end]);
}
self._buffer = current_text[safe_end..].to_string();
} else {
accumulated_reasoning.push_str(¤t_text);
self._buffer.clear();
}
}
// When stream_reasoning=false, buffer retains all content until
// arrives — no overlap check needed.
break;
}
} else {
// Not in reasoning — look for the next block.
if let Some(think_pos) = current_text.find(self.think_start_token.as_str()) {
accumulated_normal.push_str(¤t_text[..think_pos]);
let after_start = think_pos + self.think_start_token.len();
self._buffer = current_text[after_start..].to_string();
self._in_reasoning = true;
self.stripped_think_start = true;
continue; // Process reasoning content
} else {
// No complete start token — check for partial at end of buffer
// (e.g., "Hello world ").
// Require overlap >= 2 so a lone `<` passes through for tool call
// XML tags like `` or ``.
let ol = overlap(¤t_text, &self.think_start_token);
if ol >= 2 {
let safe_end = current_text.len() - ol;
if safe_end > 0 {
accumulated_normal.push_str(¤t_text[..safe_end]);
}
self._buffer = current_text[safe_end..].to_string();
} else {
accumulated_normal.push_str(¤t_text);
self._buffer.clear();
}
break;
}
}
}
ParserResult {
normal_text: accumulated_normal,
reasoning_text: accumulated_reasoning,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_and_parse_reasoning_reasoning() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result =
parser.detect_and_parse_reasoning("with reasoning and more text.", &[]);
assert_eq!(result.normal_text, "and more text.");
assert_eq!(result.reasoning_text, "with reasoning");
}
#[test]
fn test_detect_and_parse_reasoning_reasoning_no_reasoning() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("This is a test without reasoning.", &[]);
assert_eq!(result.normal_text, "This is a test without reasoning.");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_detect_and_parse_reasoning_reasoning_truncated_reasoning() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("with truncated reasoning", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "with truncated reasoning");
}
#[test]
fn test_parse_reasoning_streaming_incremental() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.parse_reasoning_streaming_incremental("".to_string(), "".to_string(), false, true);
let result = parser.parse_reasoning_streaming_incremental(
"with reasoning and more text.",
&[],
);
assert_eq!(result.normal_text, " and more text.");
assert_eq!(result.reasoning_text, "with reasoning");
}
#[test]
fn test_parse_reasoning_streaming_incremental_no_end_token() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), true, true);
let result = parser.parse_reasoning_streaming_incremental("with reasoning", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "with reasoning");
}
#[test]
fn test_detect_and_parse_reasoning_multiple_reasoning_blocks() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(
"first reasoning middle second reasoning end",
&[],
);
assert_eq!(result.normal_text, "middle end");
assert_eq!(result.reasoning_text, "first reasoningsecond reasoning");
}
#[test]
fn test_streaming_multiple_reasoning_blocks() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, false);
let result1 = parser
.parse_reasoning_streaming_incremental("first reasoning middle", &[]);
assert_eq!(result1.normal_text, " middle");
assert_eq!(result1.reasoning_text, "first reasoning");
// Second reasoning block: space before is normal prefix, reasoning extracted
let result2 = parser
.parse_reasoning_streaming_incremental(" second reasoning end", &[]);
assert_eq!(result2.reasoning_text, "second reasoning");
assert_eq!(result2.normal_text, " end"); // " " prefix + " end" suffix
}
#[test]
fn test_partial_token_matching_opening_tag() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
// Feed partial opening tag
let result1 = parser.parse_reasoning_streaming_incremental("| reasoning content normal text",
&[],
);
assert_eq!(result2.normal_text, " normal text");
assert_eq!(result2.reasoning_text, "reasoning content");
}
#[test]
fn test_partial_token_matching_closing_tag() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, false);
// Start with complete opening and partial content
let result1 =
parser.parse_reasoning_streaming_incremental("reasoning content | normal text", &[]);
assert_eq!(result2.normal_text, " normal text");
assert_eq!(result2.reasoning_text, "reasoning content");
}
#[test]
fn test_buffer_state_persistence_across_calls() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, false);
// First call - partial opening tag
let result1 = parser.parse_reasoning_streaming_incremental("part1 ", &[]);
assert_eq!(result2.normal_text, "");
assert_eq!(result2.reasoning_text, "");
// Third call - more reasoning content
let result3 = parser.parse_reasoning_streaming_incremental("part2 ", &[]);
assert_eq!(result3.normal_text, "");
assert_eq!(result3.reasoning_text, "");
// Fourth call - end reasoning and normal text
let result4 = parser.parse_reasoning_streaming_incremental("part3 normal", &[]);
assert_eq!(result4.normal_text, " normal");
assert_eq!(result4.reasoning_text, "part1 part2 part3");
}
#[test]
fn test_streaming_with_stream_reasoning_enabled() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
// Start reasoning block
let result1 = parser.parse_reasoning_streaming_incremental("reasoning ", &[]);
assert_eq!(result1.normal_text, "");
assert_eq!(result1.reasoning_text, "reasoning ");
// Continue streaming reasoning
let result2 = parser.parse_reasoning_streaming_incremental("content ", &[]);
assert_eq!(result2.normal_text, "");
assert_eq!(result2.reasoning_text, "content ");
// End reasoning block
let result3 = parser.parse_reasoning_streaming_incremental("more normal", &[]);
assert_eq!(result3.normal_text, " normal");
assert_eq!(result3.reasoning_text, "more");
}
#[test]
fn test_nested_reasoning_blocks() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(
"outer inner reasoning normal",
&[],
);
// Cursor-based parsing: first starts reasoning, first ends it.
// "outer inner" is reasoning (inner is just text within reasoning).
// " reasoning normal" is normal text (stray passes through).
assert_eq!(result.reasoning_text, "outer inner");
assert_eq!(result.normal_text, "reasoning normal");
}
#[test]
fn test_malformed_missing_closing_tag() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("reasoning without closing tag", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "reasoning without closing tag");
}
#[test]
fn test_malformed_stray_closing_tag() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning("normal text more normal", &[]);
assert_eq!(result.normal_text, "normal text more normal");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_malformed_multiple_opening_tags() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser
.detect_and_parse_reasoning("first second reasoning normal", &[]);
// Cursor-based: first opens reasoning, finds first .
// Inner is just text within the reasoning block.
assert_eq!(result.reasoning_text, "first second reasoning");
assert_eq!(result.normal_text, "normal");
}
#[test]
fn test_empty_reasoning_block() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(" normal text", &[]);
assert_eq!(result.normal_text, "normal text");
assert_eq!(result.reasoning_text, "");
}
#[test]
fn test_whitespace_only_reasoning_block() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(" \n\t normal text", &[]);
assert_eq!(result.normal_text, "normal text");
assert_eq!(result.reasoning_text, ""); // Should be empty after trim
}
#[test]
fn test_force_reasoning_mode() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), true, true);
let result = parser.detect_and_parse_reasoning("no think tags here", &[]);
assert_eq!(result.normal_text, "");
assert_eq!(result.reasoning_text, "no think tags here");
}
#[test]
fn test_streaming_reset_state_after_complete_block() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
// Process complete reasoning block
let result1 =
parser.parse_reasoning_streaming_incremental("reasoning normal", &[]);
assert_eq!(result1.normal_text, " normal");
assert_eq!(result1.reasoning_text, "reasoning");
// Process normal text - should not be affected by previous state
let result2 = parser.parse_reasoning_streaming_incremental(" more normal text", &[]);
assert_eq!(result2.normal_text, " more normal text");
assert_eq!(result2.reasoning_text, "");
// Subsequent reasoning blocks should now be parsed (interleaved thinking)
// The leading " " before is normal-text prefix; " final" is suffix.
let result3 = parser
.parse_reasoning_streaming_incremental(" new reasoning final", &[]);
assert_eq!(result3.reasoning_text, "new reasoning");
assert_eq!(result3.normal_text, " final"); // " " prefix + " final" suffix
// Same test with separate chunks for clarity
let mut parser2 =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser2.parse_reasoning_streaming_incremental("first normal", &[]);
assert_eq!(r1.reasoning_text, "first");
assert_eq!(r1.normal_text, " normal");
let r2 = parser2.parse_reasoning_streaming_incremental(" between", &[]);
assert_eq!(r2.normal_text, " between");
assert_eq!(r2.reasoning_text, "");
let r3 = parser2.parse_reasoning_streaming_incremental("second final", &[]);
assert_eq!(r3.reasoning_text, "second");
assert_eq!(r3.normal_text, " final");
}
#[test]
fn test_post_reasoning_angle_bracket_not_buffered() {
// After reasoning ends, a standalone `<` should pass through immediately
// as normal text. It must NOT be buffered as a potential prefix of
// or , because that would cause the downstream tool call jail to
// miss the `<` (e.g., `".to_string(), "".to_string(), false, true);
// Process a complete reasoning block
let r1 =
parser.parse_reasoning_streaming_incremental("reasoning content", &[]);
assert_eq!(r1.reasoning_text, "reasoning content");
assert_eq!(r1.normal_text, "");
// After reasoning ends, a lone `<` must pass through as normal text
let r2 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r2.normal_text, "<");
assert_eq!(r2.reasoning_text, "");
// The next token should arrive independently (not merged with buffered `<`)
let r3 = parser.parse_reasoning_streaming_incremental("invoke name=\"get_weather\">", &[]);
assert_eq!(r3.normal_text, "invoke name=\"get_weather\">");
assert_eq!(r3.reasoning_text, "");
}
#[test]
fn test_post_reasoning_tool_call_xml_preserved() {
// Simulates the MiniMax tool call scenario: reasoning followed by XML tool call.
// The `<` in `".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("let me check", &[]);
assert_eq!(r1.reasoning_text, "let me check");
let r2 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r2.normal_text, "");
assert_eq!(r2.reasoning_text, "");
// Tool call markers should pass through completely
let r3 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r3.normal_text, "");
let r4 = parser.parse_reasoning_streaming_incremental("\n", &[]);
assert_eq!(r4.normal_text, "\n");
// `<` arriving as a separate token after reasoning must NOT be buffered
let r5 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r5.normal_text, "<");
let r6 = parser.parse_reasoning_streaming_incremental("invoke name=\"get_weather\">", &[]);
assert_eq!(r6.normal_text, "invoke name=\"get_weather\">");
}
#[test]
fn test_interleaved_streaming_across_chunks() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("thought 1", &[]);
assert_eq!(r1.reasoning_text, "thought 1");
assert_eq!(r1.normal_text, "");
let r2 = parser.parse_reasoning_streaming_incremental(" answer 1 ", &[]);
assert_eq!(r2.normal_text, " answer 1 ");
assert_eq!(r2.reasoning_text, "");
let r3 = parser.parse_reasoning_streaming_incremental("thought 2", &[]);
assert_eq!(r3.reasoning_text, "thought 2");
assert_eq!(r3.normal_text, "");
let r4 = parser.parse_reasoning_streaming_incremental(" answer 2", &[]);
assert_eq!(r4.normal_text, " answer 2");
assert_eq!(r4.reasoning_text, "");
let r5 = parser.parse_reasoning_streaming_incremental("thought 3", &[]);
assert_eq!(r5.reasoning_text, "thought 3");
assert_eq!(r5.normal_text, "");
let r6 = parser.parse_reasoning_streaming_incremental(" final answer", &[]);
assert_eq!(r6.normal_text, " final answer");
assert_eq!(r6.reasoning_text, "");
}
#[test]
fn test_three_reasoning_blocks_non_streaming() {
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let result = parser.detect_and_parse_reasoning(
"A one B two C three",
&[],
);
assert_eq!(result.reasoning_text, "ABC");
assert_eq!(result.normal_text, "one two three");
}
#[test]
fn test_streaming_transition_chunk() {
// and arrive in the same chunk.
// With loop-based processing, the second block's opening content is emitted
// immediately (stream_reasoning=true) rather than buffered until the next call.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("first", &[]);
assert_eq!(r1.reasoning_text, "first");
// Mid-chunk transition: then normal text then with more content.
// The loop transitions out of reasoning, emits " middle " as normal text, enters
// the next reasoning block, and streams "second" immediately.
let r2 = parser.parse_reasoning_streaming_incremental(" middle second", &[]);
assert_eq!(r2.reasoning_text, "second");
assert_eq!(r2.normal_text, " middle ");
// Continuation of second reasoning block
let r3 = parser.parse_reasoning_streaming_incremental(" more end", &[]);
assert_eq!(r3.reasoning_text, " more");
assert_eq!(r3.normal_text, " end");
}
#[test]
fn test_interleaved_with_force_reasoning() {
// deepseek_r1 mode: force_reasoning=true, first tokens are reasoning without
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), true, true);
// No tag — treated as reasoning because force_reasoning=true
let r1 = parser.parse_reasoning_streaming_incremental("initial reasoning", &[]);
assert_eq!(r1.reasoning_text, "initial reasoning");
assert_eq!(r1.normal_text, "");
// End of forced reasoning block
let r2 = parser.parse_reasoning_streaming_incremental(" answer", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, " answer");
// Second reasoning block with explicit
let r3 =
parser.parse_reasoning_streaming_incremental("second thought done", &[]);
assert_eq!(r3.reasoning_text, "second thought");
assert_eq!(r3.normal_text, " done");
}
#[test]
fn test_interleaved_partial_think_tag_between_blocks() {
// After first reasoning block, partial tag arrives across chunks
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("first normal", &[]);
assert_eq!(r1.reasoning_text, "first");
assert_eq!(r1.normal_text, " normal");
// Partial prefix: "| second end", &[]);
assert_eq!(r3.reasoning_text, "second");
assert_eq!(r3.normal_text, " end");
}
#[test]
fn test_lone_angle_bracket_between_reasoning_blocks() {
// A lone `<` between reasoning blocks should pass through (not buffer)
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("thought", &[]);
assert_eq!(r1.reasoning_text, "thought");
// Lone `<` must not be buffered — could be a tool call
let r2 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r2.normal_text, "<");
assert_eq!(r2.reasoning_text, "");
let r3 = parser.parse_reasoning_streaming_incremental("tool_call>", &[]);
assert_eq!(r3.normal_text, "tool_call>");
assert_eq!(r3.reasoning_text, "");
// But a real should still work after
let r4 =
parser.parse_reasoning_streaming_incremental("more thought done", &[]);
assert_eq!(r4.reasoning_text, "more thought");
assert_eq!(r4.normal_text, " done");
}
#[test]
fn test_force_reasoning_stream_false_buffers_until_end_token() {
// force_reasoning=true, stream_reasoning=false: content is buffered until
// arrives, then returned as a single chunk. This is the expected behavior.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), true, false);
// No — forced into reasoning, stream_reasoning=false means buffer silently
let r1 = parser.parse_reasoning_streaming_incremental("chunk one", &[]);
assert_eq!(r1.reasoning_text, "");
assert_eq!(r1.normal_text, "");
let r2 = parser.parse_reasoning_streaming_incremental(" chunk two", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
// arrives — entire buffered reasoning is flushed
let r3 = parser.parse_reasoning_streaming_incremental(" answer", &[]);
assert_eq!(r3.reasoning_text, "chunk one chunk two");
assert_eq!(r3.normal_text, " answer");
}
#[test]
fn test_multiple_full_blocks_in_single_streaming_chunk() {
// Two complete ... blocks arrive in one chunk.
// The loop exhausts all transitions in a single call — both blocks are fully
// processed and no follow-up call is needed to flush buffered content.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental(
"A mid B end",
&[],
);
assert_eq!(r1.reasoning_text, "AB");
assert_eq!(r1.normal_text, " mid end");
// Buffer is fully drained; empty follow-up returns nothing
let r2 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
}
#[test]
fn test_partial_end_token_stream_reasoning_true() {
// Partial split across chunks with stream_reasoning=true.
// The partial-end-token buffer check only fires when the parser is ALREADY in
// reasoning mode from a prior call. If and | must arrive as its own chunk first.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("reasoning", &[]);
assert_eq!(r1.reasoning_text, "reasoning");
assert_eq!(r1.normal_text, "");
// Partial end token while already in reasoning — buffered, nothing emitted
let r2 = parser.parse_reasoning_streaming_incremental(" | normal", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, " normal");
}
#[test]
fn test_empty_string_input_various_states() {
// Empty string input should always return empty results without changing state
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
// State: idle
let r1 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r1.reasoning_text, "");
assert_eq!(r1.normal_text, "");
// Enter reasoning
parser.parse_reasoning_streaming_incremental("content", &[]);
// State: in reasoning
let r2 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
// Complete and exit reasoning
parser.parse_reasoning_streaming_incremental("", &[]);
// State: post-reasoning (normal text)
let r3 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "");
}
#[test]
fn test_force_reasoning_stream_false_multiple_blocks() {
// force_reasoning=true (deepseek_r1 mode), stream_reasoning=false.
// First block uses forced-reasoning (no explicit ); subsequent blocks
// use explicit tags.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), true, false);
// Forced reasoning without open tag, flushed on
let r1 =
parser.parse_reasoning_streaming_incremental("initial reasoning normal1 ", &[]);
assert_eq!(r1.reasoning_text, "initial reasoning");
assert_eq!(r1.normal_text, " normal1 ");
// Subsequent explicit block works correctly
let r2 = parser
.parse_reasoning_streaming_incremental("second block normal2", &[]);
assert_eq!(r2.reasoning_text, "second block");
assert_eq!(r2.normal_text, " normal2");
}
#[test]
fn test_glm5_pattern_a_burst_single_chunk() {
// GLM-5 Pattern A: the entire completion arrives in one SSE event.
// Format: T1AT2B
//
// Both reasoning blocks must be extracted into reasoning_text; both tool calls
// must land in normal_text for the downstream tool call parser. No follow-up
// call should be needed — the loop fully drains the buffer in a single call.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental(
"T1AT2B",
&[],
);
assert_eq!(r1.reasoning_text, "T1T2");
assert_eq!(
r1.normal_text,
"AB"
);
// Buffer is fully drained; stream can end here with no content loss
let r2 = parser.parse_reasoning_streaming_incremental("", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
}
#[test]
fn test_tool_call_xml_between_reasoning_blocks_streaming() {
// GLM-5 Pattern A chunk-by-chunk: verifies that tool call XML between reasoning
// blocks lands in normal_text, not reasoning_text, across separate SSE events.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("T1", &[]);
assert_eq!(r1.reasoning_text, "T1");
assert_eq!(r1.normal_text, "");
let r2 = parser.parse_reasoning_streaming_incremental("A", &[]);
assert_eq!(r2.normal_text, "A");
assert_eq!(r2.reasoning_text, "");
let r3 = parser.parse_reasoning_streaming_incremental("T2", &[]);
assert_eq!(r3.reasoning_text, "T2");
assert_eq!(r3.normal_text, "");
let r4 = parser.parse_reasoning_streaming_incremental("B", &[]);
assert_eq!(r4.normal_text, "B");
assert_eq!(r4.reasoning_text, "");
}
// =========================================================================
// Mid-string partial tag tests (overlap-based buffering)
//
// These test scenarios where a or tag is split mid-string
// (not at the start of the buffer). Backends that batch multiple forward-pass
// tokens into a single chunked response can produce these patterns.
//
// Ported from PR #6448 (ryanolson) with additional fakeout tests.
// =========================================================================
#[test]
fn test_mid_string_partial_opening_tag_batched() {
// Backend batches tokens: "Hello world ".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("Hello world | reasoning content answer", &[]);
assert_eq!(r2.reasoning_text, "reasoning content");
assert_eq!(r2.normal_text, " answer");
}
#[test]
fn test_batched_tag_boundary_split() {
// Aggressive batching: tag split with normal text prefix
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("The answer is let me think42", &[]);
assert_eq!(r2.reasoning_text, "let me think");
assert_eq!(r2.normal_text, "42");
}
#[test]
fn test_mid_string_partial_closing_tag_stream_reasoning_false() {
// With stream_reasoning=false, content stays buffered until .
// Partial split mid-string while in reasoning mode.
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, false);
let r1 =
parser.parse_reasoning_streaming_incremental("reasoning content and | normal text", &[]);
assert_eq!(r2.reasoning_text, "reasoning content and ");
assert_eq!(r2.normal_text, " normal text");
}
#[test]
fn test_mid_string_partial_closing_tag_stream_reasoning_true() {
// With stream_reasoning=true, reasoning content is emitted incrementally.
// The partial " | ".to_string(), "".to_string(), false, true);
let r1 =
parser.parse_reasoning_streaming_incremental("reasoning content and normal text", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, " normal text");
}
#[test]
fn test_batched_interleaved_with_mid_string_partial() {
// First block complete in chunk 1, second block's split at boundary
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 =
parser.parse_reasoning_streaming_incremental("thought1answer1thought2answer2", &[]);
assert_eq!(r2.reasoning_text, "thought2");
assert_eq!(r2.normal_text, "answer2");
}
#[test]
fn test_partial_tag_false_positive() {
// " but "thesis" is not
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("value " — all emitted
let r2 = parser.parse_reasoning_streaming_incremental(" AI> is great", &[]);
let combined_normal = format!("{}{}", r1.normal_text, r2.normal_text);
assert_eq!(combined_normal, "value is great");
assert_eq!(r1.reasoning_text, "");
assert_eq!(r2.reasoning_text, "");
}
#[test]
fn test_partial_closing_tag_fakeout() {
// Ollama-style fakeout: " | " completes "" not ""
let mut parser =
BasicReasoningParser::new("".to_string(), "".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("abcdef" completes the partial as "def" — not a closing tag
let r2 = parser.parse_reasoning_streaming_incremental("ing>def", &[]);
assert_eq!(r2.reasoning_text, "def");
assert_eq!(r2.normal_text, "");
// Real closing tag arrives
let r3 = parser.parse_reasoning_streaming_incremental("done", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "done");
}
#[test]
fn test_overlap_helper_function() {
// Direct tests for the overlap utility
assert_eq!(overlap("abc"), 4);
assert_eq!(overlap("abcdef", ""), 0);
assert_eq!(overlap("<", ""), 1);
assert_eq!(overlap(""), 3);
assert_eq!(overlap("", ""), 7); // full match
assert_eq!(overlap("no match", ""), 0);
assert_eq!(overlap("", ""), 0);
assert_eq!(overlap("Hello world "), 4);
// Multi-byte delimiters (Kimi parser uses ◁think▷ / ◁/think▷)
assert_eq!(overlap("text◁", "◁think▷"), 3); // ◁ is 3 bytes
assert_eq!(overlap("text◁th", "◁think▷"), 5);
assert_eq!(overlap("text◁/thi", "◁/think▷"), 7);
assert_eq!(overlap("no match", "◁think▷"), 0);
}
}
|