Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d82b0050
Unverified
Commit
d82b0050
authored
Feb 20, 2026
by
MatejKosec
Committed by
GitHub
Feb 20, 2026
Browse files
feat: interleaved thinking support in reasoning parser (#6422)
Signed-off-by:
Matej Kosec
<
mkosec@nvidia.com
>
parent
7409bd3a
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
662 additions
and
105 deletions
+662
-105
lib/parsers/src/reasoning/base_parser.rs
lib/parsers/src/reasoning/base_parser.rs
+660
-105
lib/parsers/src/reasoning/mod.rs
lib/parsers/src/reasoning/mod.rs
+2
-0
No files found.
lib/parsers/src/reasoning/base_parser.rs
View file @
d82b0050
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! # Reasoning and Tool Call Interplay
//!
//! Models like GLM-4.5/4.7 and Qwen3 interleave reasoning blocks with tool calls:
//!
//! ```text
//! <think>reasoning about what tool to call</think>
//! <tool_call>get_weather<arg_key>city</arg_key><arg_value>Beijing</arg_value></tool_call>
//! <think>reasoning about the result</think>
//! <tool_call>summarize<arg_key>text</arg_key><arg_value>...</arg_value></tool_call>
//! ```
//!
//! The reasoning parser and the tool call parser are **independent, sequential** stages:
//!
//! 1. **Reasoning parser** (`BasicReasoningParser`) splits the stream into:
//! - `reasoning_content`: everything inside `<think>...</think>` blocks
//! - `normal_text`: everything outside (including tool call tags)
//! 2. **Tool call parser** (`glm47` / others) then processes `normal_text` to extract
//! `<tool_call>...</tool_call>` blocks.
//!
//! This means tool calls **must** appear outside `<think>` blocks to be detected.
//! If a model erroneously emits a tool call inside a `<think>` block (observed in
//! GLM-4.7 under very long contexts), the tool call parser will not see it.
//!
//! ## `force_reasoning` and tokenizer behavior
//!
//! Some models (e.g. GLM-5-FP8 served via ZAI) consume `<think>` as a special
//! tokenizer token and never emit it as literal text. In that case use
//! `force_reasoning=true` (`deepseek_r1` parser), which treats all output as
//! reasoning until `</think>` is seen. Models that do emit `<think>` as text
//! (standard serving, Qwen3, GLM-4.5) should use `force_reasoning=false`
//! (`glm45`, `nemotron_deci`, `qwen3` parsers).
use
crate
::{
ParserResult
,
ReasoningParser
};
/// Returns the length of the longest suffix of `s` that is also a prefix of `delim`.
///
/// Ported from ollama's `thinking/parser.go::overlap()`. Used to detect partial
/// tags split across streaming chunk boundaries (e.g., `"Hello world <th"` where
/// `<th` is a prefix of `<think>`).
fn
overlap
(
s
:
&
str
,
delim
:
&
str
)
->
usize
{
let
max
=
delim
.len
()
.min
(
s
.len
());
for
i
in
(
1
..=
max
)
.rev
()
{
if
!
delim
.is_char_boundary
(
i
)
{
continue
;
// Skip mid-codepoint positions (e.g., multi-byte `◁` in Kimi tags)
}
if
s
.ends_with
(
&
delim
[
..
i
])
{
return
i
;
}
}
0
}
#[derive(Default,
Debug,
Clone)]
pub
struct
BasicReasoningParser
{
think_start_token
:
String
,
...
...
@@ -33,7 +83,8 @@ impl BasicReasoningParser {
impl
ReasoningParser
for
BasicReasoningParser
{
fn
detect_and_parse_reasoning
(
&
mut
self
,
text
:
&
str
,
_
token_ids
:
&
[
u32
])
->
ParserResult
{
let
in_reasoning
=
self
._in_reasoning
||
text
.contains
(
&
self
.think_start_token
);
let
has_think_tag
=
text
.contains
(
&
self
.think_start_token
);
let
in_reasoning
=
self
._in_reasoning
||
has_think_tag
;
if
!
in_reasoning
{
return
ParserResult
{
normal_text
:
text
.to_string
(),
...
...
@@ -41,24 +92,53 @@ impl ReasoningParser for BasicReasoningParser {
};
}
// The text is considered to be in a reasoning block.
let
processed_text
=
text
.replace
(
&
self
.think_start_token
,
""
)
.trim
()
.to_string
();
if
!
processed_text
.contains
(
&
self
.think_end_token
)
{
// Assume reasoning was truncated before `think_end_token`
// If force_reasoning and no start tag, treat entire text as reasoning
if
self
._in_reasoning
&&
!
has_think_tag
&&
!
text
.contains
(
&
self
.think_end_token
)
{
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
processed_text
,
reasoning_text
:
text
.to_string
()
,
};
}
// Extract reasoning content
let
splits
:
Vec
<&
str
>
=
processed_text
.splitn
(
2
,
&
self
.think_end_token
)
.collect
();
let
reasoning_text
=
splits
.first
()
.unwrap_or
(
&
""
)
.to_string
();
let
normal_text
=
splits
.get
(
1
)
.map
(|
s
|
s
.trim
()
.to_string
())
.unwrap_or_default
();
// Extract all <think>...</think> pairs using cursor-based iteration
let
mut
reasoning_parts
=
Vec
::
new
();
let
mut
normal_parts
=
Vec
::
new
();
let
mut
cursor
=
0
;
let
mut
currently_reasoning
=
self
._in_reasoning
;
while
cursor
<
text
.len
()
{
if
currently_reasoning
{
// We're inside a reasoning block — look for end token
if
let
Some
(
end_offset
)
=
text
[
cursor
..
]
.find
(
&
self
.think_end_token
)
{
reasoning_parts
.push
(
&
text
[
cursor
..
cursor
+
end_offset
]);
cursor
+=
end_offset
+
self
.think_end_token
.len
();
currently_reasoning
=
false
;
}
else
{
// No end token — rest is reasoning (truncated)
reasoning_parts
.push
(
&
text
[
cursor
..
]);
cursor
=
text
.len
();
}
}
else
{
// We're in normal text — look for start token
if
let
Some
(
start_offset
)
=
text
[
cursor
..
]
.find
(
&
self
.think_start_token
)
{
normal_parts
.push
(
&
text
[
cursor
..
cursor
+
start_offset
]);
cursor
+=
start_offset
+
self
.think_start_token
.len
();
currently_reasoning
=
true
;
}
else
{
// No more think blocks — rest is normal text
normal_parts
.push
(
&
text
[
cursor
..
]);
cursor
=
text
.len
();
}
}
}
let
reasoning_text
=
reasoning_parts
.join
(
""
)
.trim
()
.to_string
();
let
normal_text
=
normal_parts
.join
(
""
)
.trim
()
.to_string
();
// Note: self._in_reasoning is intentionally NOT updated here. This method is
// documented to "reset or ignore internal streaming state" (see trait doc). Callers
// should not mix detect_and_parse_reasoning with parse_reasoning_streaming_incremental
// on the same parser instance.
ParserResult
{
normal_text
,
...
...
@@ -71,85 +151,92 @@ impl ReasoningParser for BasicReasoningParser {
text
:
&
str
,
_
token_ids
:
&
[
u32
],
)
->
ParserResult
{
// Incrementally parse the streaming text
self
._buffer
.push_str
(
text
);
let
mut
current_text
=
self
._buffer
.to_string
();
// If the current text is a prefix of the think token, keep buffering.
// Only buffer for start token if we haven't found it yet.
// Only buffer for end token if we're currently inside a reasoning block.
// After reasoning ends, all content passes through as normal text.
let
mut
accumulated_normal
=
String
::
new
();
let
mut
accumulated_reasoning
=
String
::
new
();
// Loop to exhaust all state transitions within a single chunk. Without this,
// a chunk containing two complete <think>...</think> blocks would process only
// the first transition and buffer the rest, risking content loss at end-of-stream.
loop
{
let
current_text
=
self
._buffer
.clone
();
// Strip leading <think> tag if not yet stripped. Handles two cases:
// 1. force_reasoning=true where the model also emits <think> as text
// 2. First call where <think> arrives at buffer position 0
// Mid-text <think> (position > 0) falls through to the find() branch below.
if
!
self
.stripped_think_start
&&
self
.think_start_token
.starts_with
(
&
current_text
)
&&
self
.think_start_token
.as_str
()
!=
current_text
.as_str
()
&&
current_text
.starts_with
(
self
.think_start_token
.as_str
())
{
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
};
}
if
self
._in_reasoning
&&
self
.think_end_token
.starts_with
(
&
current_text
)
&&
self
.think_end_token
.as_str
()
!=
current_text
.as_str
()
{
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
};
}
// Strip `<think>` token if present
if
!
self
.stripped_think_start
&&
current_text
.contains
(
&
self
.think_start_token
)
{
current_text
=
current_text
.replace
(
&
self
.think_start_token
,
""
);
self
._buffer
=
current_text
.to_string
();
self
._buffer
=
current_text
[
self
.think_start_token
.len
()
..
]
.to_string
();
self
.stripped_think_start
=
true
;
self
._in_reasoning
=
true
;
continue
;
}
// Handle end of reasoning block
let
mut
think_end_idx
=
current_text
.len
();
if
self
._in_reasoning
{
think_end_idx
=
current_text
.find
(
&
self
.think_end_token
)
.unwrap_or
(
current_text
.len
());
}
if
self
._in_reasoning
&&
think_end_idx
<
current_text
.len
()
{
let
reasoning_text
=
&
current_text
[
..
think_end_idx
];
self
._buffer
.clear
();
if
let
Some
(
end_idx
)
=
current_text
.find
(
self
.think_end_token
.as_str
())
{
// End of reasoning block: accumulate content and transition out.
accumulated_reasoning
.push_str
(
&
current_text
[
..
end_idx
]);
let
after_end
=
end_idx
+
self
.think_end_token
.len
();
self
._buffer
=
current_text
[
after_end
..
]
.to_string
();
self
._in_reasoning
=
false
;
let
start_idx
=
think_end_idx
+
self
.think_end_token
.len
();
let
normal_text
=
if
start_idx
<
current_text
.len
()
{
&
current_text
[
start_idx
..
]
self
.stripped_think_start
=
false
;
// Allow detecting next <think> block
continue
;
// Process remainder — may contain further blocks
}
else
{
""
};
return
ParserResult
{
normal_text
:
normal_text
.to_string
(),
reasoning_text
:
reasoning_text
.to_string
(),
};
// No complete end token — check for partial at end of buffer
// (e.g., "reasoning content</th" where "</th" is a prefix of "</think>").
if
self
.stream_reasoning
{
let
ol
=
overlap
(
&
current_text
,
&
self
.think_end_token
);
if
ol
>=
2
{
let
safe_end
=
current_text
.len
()
-
ol
;
if
safe_end
>
0
{
accumulated_reasoning
.push_str
(
&
current_text
[
..
safe_end
]);
}
// Continue with reasoning content
if
self
._in_reasoning
&&
self
.stream_reasoning
{
// Stream the content immediately
let
reasoning_text
=
current_text
;
self
._buffer
=
current_text
[
safe_end
..
]
.to_string
();
}
else
{
accumulated_reasoning
.push_str
(
&
current_text
);
self
._buffer
.clear
();
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
,
}
}
else
if
!
self
._in_reasoning
{
// If we're not in a reasoning block return as normal text
let
normal_text
=
current_text
;
self
._buffer
.clear
();
ParserResult
{
normal_text
,
reasoning_text
:
String
::
new
(),
}
// When stream_reasoning=false, buffer retains all content until
// </think> arrives — no overlap check needed.
break
;
}
}
else
{
// If we are in a reasoning block but no end token is found, return the current buffer
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
// Not in reasoning — look for the next <think> block.
if
let
Some
(
think_pos
)
=
current_text
.find
(
self
.think_start_token
.as_str
())
{
accumulated_normal
.push_str
(
&
current_text
[
..
think_pos
]);
let
after_start
=
think_pos
+
self
.think_start_token
.len
();
self
._buffer
=
current_text
[
after_start
..
]
.to_string
();
self
._in_reasoning
=
true
;
self
.stripped_think_start
=
true
;
continue
;
// Process reasoning content
}
else
{
// No complete start token — check for partial at end of buffer
// (e.g., "Hello world <th" where "<th" is a prefix of "<think>").
// Require overlap >= 2 so a lone `<` passes through for tool call
// XML tags like `<invoke>` or `<minimax:tool_call>`.
let
ol
=
overlap
(
&
current_text
,
&
self
.think_start_token
);
if
ol
>=
2
{
let
safe_end
=
current_text
.len
()
-
ol
;
if
safe_end
>
0
{
accumulated_normal
.push_str
(
&
current_text
[
..
safe_end
]);
}
self
._buffer
=
current_text
[
safe_end
..
]
.to_string
();
}
else
{
accumulated_normal
.push_str
(
&
current_text
);
self
._buffer
.clear
();
}
break
;
}
}
}
ParserResult
{
normal_text
:
accumulated_normal
,
reasoning_text
:
accumulated_reasoning
,
}
}
}
...
...
@@ -222,9 +309,8 @@ mod tests {
"<think>first reasoning</think> middle <think>second reasoning</think> end"
,
&
[],
);
// The current implementation only handles the first occurrence properly
assert_eq!
(
result
.normal_text
,
"middle second reasoning</think> end"
);
assert_eq!
(
result
.reasoning_text
,
"first reasoning"
);
assert_eq!
(
result
.normal_text
,
"middle end"
);
assert_eq!
(
result
.reasoning_text
,
"first reasoningsecond reasoning"
);
}
#[test]
...
...
@@ -236,11 +322,11 @@ mod tests {
assert_eq!
(
result1
.normal_text
,
" middle"
);
assert_eq!
(
result1
.reasoning_text
,
"first reasoning"
);
//
Basic parser assumes only one reasoning block at a time
//
Second reasoning block: space before <think> is normal prefix, reasoning extracted
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
" <think>second reasoning</think> end"
,
&
[]);
assert_eq!
(
result2
.
normal_text
,
" <think>
second reasoning
</think> end
"
);
assert_eq!
(
result2
.
reasoning_text
,
""
);
assert_eq!
(
result2
.
reasoning_text
,
"
second reasoning"
);
assert_eq!
(
result2
.
normal_text
,
" end"
);
// " " prefix + " end" suffix
}
#[test]
...
...
@@ -334,10 +420,11 @@ mod tests {
"<think>outer <think>inner</think> reasoning</think> normal"
,
&
[],
);
// Current implementation should handle this by finding the first closing tag
// Cursor-based parsing: first <think> starts reasoning, first </think> ends it.
// "outer <think>inner" is reasoning (inner <think> is just text within reasoning).
// " reasoning</think> normal" is normal text (stray </think> passes through).
assert_eq!
(
result
.reasoning_text
,
"outer <think>inner"
);
assert_eq!
(
result
.normal_text
,
"reasoning</think> normal"
);
// All <think> tags are stripped, so <think>inner is not included
assert_eq!
(
result
.reasoning_text
,
"outer inner"
);
}
#[test]
...
...
@@ -364,9 +451,10 @@ mod tests {
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>first <think>second reasoning</think> normal"
,
&
[]);
// Should handle by replacing all opening tags and using first closing tag
// Cursor-based: first <think> opens reasoning, finds first </think>.
// Inner <think> is just text within the reasoning block.
assert_eq!
(
result
.reasoning_text
,
"first <think>second reasoning"
);
assert_eq!
(
result
.normal_text
,
"normal"
);
assert_eq!
(
result
.reasoning_text
,
"first second reasoning"
);
}
#[test]
...
...
@@ -399,7 +487,7 @@ mod tests {
#[test]
fn
test_streaming_reset_state_after_complete_block
()
{
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
fals
e
);
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
tru
e
);
// Process complete reasoning block
let
result1
=
...
...
@@ -412,12 +500,28 @@ mod tests {
assert_eq!
(
result2
.normal_text
,
" more normal text"
);
assert_eq!
(
result2
.reasoning_text
,
""
);
//
Basic parser does not expect more than one reasoning block at a time
//
So this should not affect the state
//
Subsequent reasoning blocks should now be parsed (interleaved thinking)
//
The leading " " before <think> is normal-text prefix; " final" is suffix.
let
result3
=
parser
.parse_reasoning_streaming_incremental
(
" <think>new reasoning</think> final"
,
&
[]);
assert_eq!
(
result3
.normal_text
,
" <think>new reasoning</think> final"
);
assert_eq!
(
result3
.reasoning_text
,
""
);
assert_eq!
(
result3
.reasoning_text
,
"new reasoning"
);
assert_eq!
(
result3
.normal_text
,
" final"
);
// " " prefix + " final" suffix
// Same test with separate chunks for clarity
let
mut
parser2
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser2
.parse_reasoning_streaming_incremental
(
"<think>first</think> normal"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"first"
);
assert_eq!
(
r1
.normal_text
,
" normal"
);
let
r2
=
parser2
.parse_reasoning_streaming_incremental
(
" between"
,
&
[]);
assert_eq!
(
r2
.normal_text
,
" between"
);
assert_eq!
(
r2
.reasoning_text
,
""
);
let
r3
=
parser2
.parse_reasoning_streaming_incremental
(
"<think>second</think> final"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"second"
);
assert_eq!
(
r3
.normal_text
,
" final"
);
}
#[test]
...
...
@@ -474,4 +578,455 @@ mod tests {
let
r6
=
parser
.parse_reasoning_streaming_incremental
(
"invoke name=
\"
get_weather
\"
>"
,
&
[]);
assert_eq!
(
r6
.normal_text
,
"invoke name=
\"
get_weather
\"
>"
);
}
#[test]
fn
test_interleaved_streaming_across_chunks
()
{
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>thought 1</think>"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"thought 1"
);
assert_eq!
(
r1
.normal_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
" answer 1 "
,
&
[]);
assert_eq!
(
r2
.normal_text
,
" answer 1 "
);
assert_eq!
(
r2
.reasoning_text
,
""
);
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"<think>thought 2</think>"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"thought 2"
);
assert_eq!
(
r3
.normal_text
,
""
);
let
r4
=
parser
.parse_reasoning_streaming_incremental
(
" answer 2"
,
&
[]);
assert_eq!
(
r4
.normal_text
,
" answer 2"
);
assert_eq!
(
r4
.reasoning_text
,
""
);
let
r5
=
parser
.parse_reasoning_streaming_incremental
(
"<think>thought 3</think>"
,
&
[]);
assert_eq!
(
r5
.reasoning_text
,
"thought 3"
);
assert_eq!
(
r5
.normal_text
,
""
);
let
r6
=
parser
.parse_reasoning_streaming_incremental
(
" final answer"
,
&
[]);
assert_eq!
(
r6
.normal_text
,
" final answer"
);
assert_eq!
(
r6
.reasoning_text
,
""
);
}
#[test]
fn
test_three_reasoning_blocks_non_streaming
()
{
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>A</think> one <think>B</think> two <think>C</think> three"
,
&
[],
);
assert_eq!
(
result
.reasoning_text
,
"ABC"
);
assert_eq!
(
result
.normal_text
,
"one two three"
);
}
#[test]
fn
test_streaming_transition_chunk
()
{
// </think> and <think> arrive in the same chunk.
// With loop-based processing, the second block's opening content is emitted
// immediately (stream_reasoning=true) rather than buffered until the next call.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>first"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"first"
);
// Mid-chunk transition: </think> then normal text then <think> with more content.
// The loop transitions out of reasoning, emits " middle " as normal text, enters
// the next reasoning block, and streams "second" immediately.
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"</think> middle <think>second"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"second"
);
assert_eq!
(
r2
.normal_text
,
" middle "
);
// Continuation of second reasoning block
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
" more</think> end"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
" more"
);
assert_eq!
(
r3
.normal_text
,
" end"
);
}
#[test]
fn
test_interleaved_with_force_reasoning
()
{
// deepseek_r1 mode: force_reasoning=true, first tokens are reasoning without <think>
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
true
);
// No <think> tag — treated as reasoning because force_reasoning=true
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"initial reasoning"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"initial reasoning"
);
assert_eq!
(
r1
.normal_text
,
""
);
// End of forced reasoning block
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"</think> answer"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
" answer"
);
// Second reasoning block with explicit <think>
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"<think>second thought</think> done"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"second thought"
);
assert_eq!
(
r3
.normal_text
,
" done"
);
}
#[test]
fn
test_interleaved_partial_think_tag_between_blocks
()
{
// After first reasoning block, partial <think> tag arrives across chunks
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>first</think> normal"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"first"
);
assert_eq!
(
r1
.normal_text
,
" normal"
);
// Partial <think> prefix: "<th" (2 chars, meets threshold)
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"<th"
,
&
[]);
assert_eq!
(
r2
.normal_text
,
""
);
assert_eq!
(
r2
.reasoning_text
,
""
);
// Complete the tag
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"ink>second</think> end"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"second"
);
assert_eq!
(
r3
.normal_text
,
" end"
);
}
#[test]
fn
test_lone_angle_bracket_between_reasoning_blocks
()
{
// A lone `<` between reasoning blocks should pass through (not buffer)
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>thought</think>"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"thought"
);
// Lone `<` must not be buffered — could be a tool call
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"<"
,
&
[]);
assert_eq!
(
r2
.normal_text
,
"<"
);
assert_eq!
(
r2
.reasoning_text
,
""
);
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"tool_call>"
,
&
[]);
assert_eq!
(
r3
.normal_text
,
"tool_call>"
);
assert_eq!
(
r3
.reasoning_text
,
""
);
// But a real <think> should still work after
let
r4
=
parser
.parse_reasoning_streaming_incremental
(
"<think>more thought</think> done"
,
&
[]);
assert_eq!
(
r4
.reasoning_text
,
"more thought"
);
assert_eq!
(
r4
.normal_text
,
" done"
);
}
#[test]
fn
test_force_reasoning_stream_false_buffers_until_end_token
()
{
// force_reasoning=true, stream_reasoning=false: content is buffered until </think>
// arrives, then returned as a single chunk. This is the expected behavior.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
false
);
// No <think> — forced into reasoning, stream_reasoning=false means buffer silently
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"chunk one"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
""
);
assert_eq!
(
r1
.normal_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
" chunk two"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
""
);
// </think> arrives — entire buffered reasoning is flushed
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"</think> answer"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"chunk one chunk two"
);
assert_eq!
(
r3
.normal_text
,
" answer"
);
}
#[test]
fn
test_multiple_full_blocks_in_single_streaming_chunk
()
{
// Two complete <think>...</think> blocks arrive in one chunk.
// The loop exhausts all transitions in a single call — both blocks are fully
// processed and no follow-up call is needed to flush buffered content.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>A</think> mid <think>B</think> end"
,
&
[],
);
assert_eq!
(
r1
.reasoning_text
,
"AB"
);
assert_eq!
(
r1
.normal_text
,
" mid end"
);
// Buffer is fully drained; empty follow-up returns nothing
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
""
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
""
);
}
#[test]
fn
test_partial_end_token_stream_reasoning_true
()
{
// Partial </think> split across chunks with stream_reasoning=true.
// The partial-end-token buffer check only fires when the parser is ALREADY in
// reasoning mode from a prior call. If <think> and </th arrive in the same chunk,
// stream_reasoning=true emits the reasoning content immediately (including </th).
// So <think> must arrive as its own chunk first.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"reasoning"
);
assert_eq!
(
r1
.normal_text
,
""
);
// Partial end token while already in reasoning — buffered, nothing emitted
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"</th"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
""
);
// Complete the end token
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"ink> normal"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
""
);
assert_eq!
(
r3
.normal_text
,
" normal"
);
}
#[test]
fn
test_empty_string_input_various_states
()
{
// Empty string input should always return empty results without changing state
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
// State: idle
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
""
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
""
);
assert_eq!
(
r1
.normal_text
,
""
);
// Enter reasoning
parser
.parse_reasoning_streaming_incremental
(
"<think>content"
,
&
[]);
// State: in reasoning
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
""
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
""
);
// Complete and exit reasoning
parser
.parse_reasoning_streaming_incremental
(
"</think>"
,
&
[]);
// State: post-reasoning (normal text)
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
""
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
""
);
assert_eq!
(
r3
.normal_text
,
""
);
}
#[test]
fn
test_force_reasoning_stream_false_multiple_blocks
()
{
// force_reasoning=true (deepseek_r1 mode), stream_reasoning=false.
// First block uses forced-reasoning (no explicit <think>); subsequent blocks
// use explicit tags.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
false
);
// Forced reasoning without open tag, flushed on </think>
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"initial reasoning</think> normal1 "
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"initial reasoning"
);
assert_eq!
(
r1
.normal_text
,
" normal1 "
);
// Subsequent explicit <think> block works correctly
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"<think>second block</think> normal2"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"second block"
);
assert_eq!
(
r2
.normal_text
,
" normal2"
);
}
#[test]
fn
test_glm5_pattern_a_burst_single_chunk
()
{
// GLM-5 Pattern A: the entire completion arrives in one SSE event.
// Format: <think>T1</think><tool_call>A</tool_call><think>T2</think><tool_call>B</tool_call>
//
// Both reasoning blocks must be extracted into reasoning_text; both tool calls
// must land in normal_text for the downstream tool call parser. No follow-up
// call should be needed — the loop fully drains the buffer in a single call.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>T1</think><tool_call>A</tool_call><think>T2</think><tool_call>B</tool_call>"
,
&
[],
);
assert_eq!
(
r1
.reasoning_text
,
"T1T2"
);
assert_eq!
(
r1
.normal_text
,
"<tool_call>A</tool_call><tool_call>B</tool_call>"
);
// Buffer is fully drained; stream can end here with no content loss
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
""
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
""
);
}
#[test]
fn
test_tool_call_xml_between_reasoning_blocks_streaming
()
{
// GLM-5 Pattern A chunk-by-chunk: verifies that tool call XML between reasoning
// blocks lands in normal_text, not reasoning_text, across separate SSE events.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>T1</think>"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"T1"
);
assert_eq!
(
r1
.normal_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"<tool_call>A</tool_call>"
,
&
[]);
assert_eq!
(
r2
.normal_text
,
"<tool_call>A</tool_call>"
);
assert_eq!
(
r2
.reasoning_text
,
""
);
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"<think>T2</think>"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
"T2"
);
assert_eq!
(
r3
.normal_text
,
""
);
let
r4
=
parser
.parse_reasoning_streaming_incremental
(
"<tool_call>B</tool_call>"
,
&
[]);
assert_eq!
(
r4
.normal_text
,
"<tool_call>B</tool_call>"
);
assert_eq!
(
r4
.reasoning_text
,
""
);
}
// =========================================================================
// Mid-string partial tag tests (overlap-based buffering)
//
// These test scenarios where a <think> or </think> tag is split mid-string
// (not at the start of the buffer). Backends that batch multiple forward-pass
// tokens into a single chunked response can produce these patterns.
//
// Ported from PR #6448 (ryanolson) with additional fakeout tests.
// =========================================================================
#[test]
fn
test_mid_string_partial_opening_tag_batched
()
{
// Backend batches tokens: "Hello world <th" arrives as one chunk
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"Hello world <th"
,
&
[]);
// "Hello world " emitted as normal, "<th" held in buffer
assert_eq!
(
r1
.normal_text
,
"Hello world "
);
assert_eq!
(
r1
.reasoning_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"ink>reasoning content</think> answer"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"reasoning content"
);
assert_eq!
(
r2
.normal_text
,
" answer"
);
}
#[test]
fn
test_batched_tag_boundary_split
()
{
// Aggressive batching: <think> tag split with normal text prefix
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"The answer is <thi"
,
&
[]);
assert_eq!
(
r1
.normal_text
,
"The answer is "
);
assert_eq!
(
r1
.reasoning_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"nk>let me think</think>42"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"let me think"
);
assert_eq!
(
r2
.normal_text
,
"42"
);
}
#[test]
fn
test_mid_string_partial_closing_tag_stream_reasoning_false
()
{
// With stream_reasoning=false, content stays buffered until </think>.
// Partial </think> split mid-string while in reasoning mode.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
false
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning content and </th"
,
&
[]);
assert_eq!
(
r1
.normal_text
,
""
);
assert_eq!
(
r1
.reasoning_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"ink> normal text"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"reasoning content and "
);
assert_eq!
(
r2
.normal_text
,
" normal text"
);
}
#[test]
fn
test_mid_string_partial_closing_tag_stream_reasoning_true
()
{
// With stream_reasoning=true, reasoning content is emitted incrementally.
// The partial "</th" at the end must NOT be emitted as reasoning text.
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning content and </th"
,
&
[]);
// "reasoning content and " emitted as reasoning, "</th" held
assert_eq!
(
r1
.reasoning_text
,
"reasoning content and "
);
assert_eq!
(
r1
.normal_text
,
""
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"ink> normal text"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
""
);
assert_eq!
(
r2
.normal_text
,
" normal text"
);
}
#[test]
fn
test_batched_interleaved_with_mid_string_partial
()
{
// First block complete in chunk 1, second block's <think> split at boundary
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>thought1</think>answer1<thi"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"thought1"
);
assert_eq!
(
r1
.normal_text
,
"answer1"
);
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"nk>thought2</think>answer2"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"thought2"
);
assert_eq!
(
r2
.normal_text
,
"answer2"
);
}
#[test]
fn
test_partial_tag_false_positive
()
{
// "<th" looks like partial <think> but "thesis" is not <think>
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"value <thesis on"
,
&
[]);
// No suffix of "value <thesis on" is a prefix of "<think>" — all emitted
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
" AI> is great"
,
&
[]);
let
combined_normal
=
format!
(
"{}{}"
,
r1
.normal_text
,
r2
.normal_text
);
assert_eq!
(
combined_normal
,
"value <thesis on AI> is great"
);
assert_eq!
(
r1
.reasoning_text
,
""
);
assert_eq!
(
r2
.reasoning_text
,
""
);
}
#[test]
fn
test_partial_closing_tag_fakeout
()
{
// Ollama-style fakeout: "</th" buffered, but "ing>" completes "</thing>" not "</think>"
let
mut
parser
=
BasicReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
r1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>abc</th"
,
&
[]);
assert_eq!
(
r1
.reasoning_text
,
"abc"
);
assert_eq!
(
r1
.normal_text
,
""
);
// "ing>def" completes the partial as "</thing>def" — not a closing tag
let
r2
=
parser
.parse_reasoning_streaming_incremental
(
"ing>def"
,
&
[]);
assert_eq!
(
r2
.reasoning_text
,
"</thing>def"
);
assert_eq!
(
r2
.normal_text
,
""
);
// Real closing tag arrives
let
r3
=
parser
.parse_reasoning_streaming_incremental
(
"</think>done"
,
&
[]);
assert_eq!
(
r3
.reasoning_text
,
""
);
assert_eq!
(
r3
.normal_text
,
"done"
);
}
#[test]
fn
test_overlap_helper_function
()
{
// Direct tests for the overlap utility
assert_eq!
(
overlap
(
"abc</th"
,
"</think>"
),
4
);
assert_eq!
(
overlap
(
"abc</thing>def"
,
"</think>"
),
0
);
assert_eq!
(
overlap
(
"<"
,
"<think>"
),
1
);
assert_eq!
(
overlap
(
"<th"
,
"<think>"
),
3
);
assert_eq!
(
overlap
(
"<think>"
,
"<think>"
),
7
);
// full match
assert_eq!
(
overlap
(
"no match"
,
"<think>"
),
0
);
assert_eq!
(
overlap
(
""
,
"<think>"
),
0
);
assert_eq!
(
overlap
(
"Hello world <thi"
,
"<think>"
),
4
);
// Multi-byte delimiters (Kimi parser uses ◁think▷ / ◁/think▷)
assert_eq!
(
overlap
(
"text◁"
,
"◁think▷"
),
3
);
// ◁ is 3 bytes
assert_eq!
(
overlap
(
"text◁th"
,
"◁think▷"
),
5
);
assert_eq!
(
overlap
(
"text◁/thi"
,
"◁/think▷"
),
7
);
assert_eq!
(
overlap
(
"no match"
,
"◁think▷"
),
0
);
}
}
lib/parsers/src/reasoning/mod.rs
View file @
d82b0050
...
...
@@ -30,6 +30,7 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map
.insert
(
"mistral"
,
ReasoningParserType
::
Mistral
);
map
.insert
(
"granite"
,
ReasoningParserType
::
Granite
);
map
.insert
(
"nemotron_nano"
,
ReasoningParserType
::
NemotronDeci
);
// nemotron nano is <think>...</think>
map
.insert
(
"glm45"
,
ReasoningParserType
::
NemotronDeci
);
// GLM-4.5/5 is <think>...</think>, no force_reasoning
map
.insert
(
"minimax_append_think"
,
ReasoningParserType
::
MiniMaxAppendThink
,
...
...
@@ -225,6 +226,7 @@ mod tests {
"mistral"
,
"granite"
,
"nemotron_nano"
,
"glm45"
,
"minimax_append_think"
,
];
for
parser
in
available_parsers
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment