Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
b5cf1ad3
Unverified
Commit
b5cf1ad3
authored
Aug 15, 2025
by
nachiketb-nvidia
Committed by
GitHub
Aug 15, 2025
Browse files
feat: add basic implementation of reasoning parsers (#2410)
parent
537759f1
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
495 additions
and
0 deletions
+495
-0
lib/llm/src/postprocessor/mod.rs
lib/llm/src/postprocessor/mod.rs
+1
-0
lib/llm/src/postprocessor/reasoning_parser.rs
lib/llm/src/postprocessor/reasoning_parser.rs
+19
-0
lib/llm/src/postprocessor/reasoning_parser/parsers/base_reasoning_parser.rs
...ocessor/reasoning_parser/parsers/base_reasoning_parser.rs
+436
-0
lib/llm/src/postprocessor/reasoning_parser/parsers/deepseek_r1_reasoning_parser.rs
.../reasoning_parser/parsers/deepseek_r1_reasoning_parser.rs
+34
-0
lib/llm/src/postprocessor/reasoning_parser/parsers/mod.rs
lib/llm/src/postprocessor/reasoning_parser/parsers/mod.rs
+5
-0
No files found.
lib/llm/src/postprocessor/mod.rs
View file @
b5cf1ad3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
reasoning_parser
;
pub
mod
tool_calling
;
lib/llm/src/postprocessor/reasoning_parser.rs
0 → 100644
View file @
b5cf1ad3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
parsers
;
pub
struct
ParserResult
{
/// The normal text outside of reasoning blocks.
pub
normal_text
:
String
,
/// The extracted reasoning text from within reasoning blocks.
pub
reasoning_text
:
String
,
}
pub
trait
ReasoningParser
{
/// Detects and parses reasoning from the input text.
fn
detect_and_parse_reasoning
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
;
/// Parses reasoning incrementally from streaming input.
fn
parse_reasoning_streaming_incremental
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
;
}
lib/llm/src/postprocessor/reasoning_parser/parsers/base_reasoning_parser.rs
0 → 100644
View file @
b5cf1ad3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
crate
::
postprocessor
::
reasoning_parser
::{
ParserResult
,
ReasoningParser
};
use
tracing
as
log
;
#[derive(Default)]
pub
struct
BaseReasoningParser
{
think_start_token
:
String
,
think_end_token
:
String
,
_
in_reasoning
:
bool
,
stream_reasoning
:
bool
,
_
buffer
:
String
,
stripped_think_start
:
bool
,
}
impl
BaseReasoningParser
{
pub
fn
new
(
think_start_token
:
String
,
think_end_token
:
String
,
force_reasoning
:
bool
,
stream_reasoning
:
bool
,
)
->
Self
{
Self
{
think_start_token
,
think_end_token
,
_
in_reasoning
:
force_reasoning
,
stream_reasoning
,
_
buffer
:
String
::
new
(),
stripped_think_start
:
false
,
}
}
}
impl
ReasoningParser
for
BaseReasoningParser
{
fn
detect_and_parse_reasoning
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
{
log
::
debug!
(
"detect_and_parse_reasoning called with text: {:?}"
,
text
);
let
in_reasoning
=
self
._in_reasoning
||
text
.contains
(
&
self
.think_start_token
);
log
::
debug!
(
"in_reasoning: {}"
,
in_reasoning
);
if
!
in_reasoning
{
log
::
debug!
(
"No reasoning detected, returning normal text."
);
return
ParserResult
{
normal_text
:
text
.to_string
(),
reasoning_text
:
String
::
new
(),
};
}
// The text is considered to be in a reasoning block.
let
processed_text
=
text
.replace
(
&
self
.think_start_token
,
""
)
.trim
()
.to_string
();
log
::
debug!
(
"Processed text after removing think_start_token: {:?}"
,
processed_text
);
if
!
processed_text
.contains
(
&
self
.think_end_token
)
{
log
::
debug!
(
"Reasoning truncated, think_end_token not found. Returning reasoning text."
);
// Assume reasoning was truncated before `think_end_token`
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
processed_text
,
};
}
// Extract reasoning content
let
splits
:
Vec
<&
str
>
=
processed_text
.splitn
(
2
,
&
self
.think_end_token
)
.collect
();
let
reasoning_text
=
splits
.first
()
.unwrap_or
(
&
""
)
.to_string
();
let
normal_text
=
splits
.get
(
1
)
.map
(|
s
|
s
.trim
()
.to_string
())
.unwrap_or_default
();
log
::
debug!
(
"Extracted reasoning_text: {:?}"
,
reasoning_text
);
log
::
debug!
(
"Extracted normal_text: {:?}"
,
normal_text
);
ParserResult
{
normal_text
,
reasoning_text
,
}
}
fn
parse_reasoning_streaming_incremental
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
{
// Incrementally parse the streaming text
self
._buffer
.push_str
(
text
);
let
mut
current_text
=
self
._buffer
.to_string
();
// If the current text is a prefix of the think token, keep buffering
log
::
debug!
(
"parse_reasoning_streaming_incremental called with text: {:?}"
,
text
);
log
::
debug!
(
"current buffer: {:?}"
,
self
._buffer
);
log
::
debug!
(
"current_text: {:?}"
,
current_text
);
log
::
debug!
(
"in_reasoning: {}, stripped_think_start: {}, stream_reasoning: {}"
,
self
._in_reasoning
,
self
.stripped_think_start
,
self
.stream_reasoning
);
if
self
.think_start_token
.starts_with
(
&
current_text
)
&&
self
.think_start_token
.as_str
()
!=
current_text
.as_str
()
{
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
};
}
if
self
.think_end_token
.starts_with
(
&
current_text
)
&&
self
.think_end_token
.as_str
()
!=
current_text
.as_str
()
{
return
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
};
}
// Strip `<think>` token if present
if
!
self
.stripped_think_start
&&
current_text
.contains
(
&
self
.think_start_token
)
{
current_text
=
current_text
.replace
(
&
self
.think_start_token
,
""
);
self
._buffer
=
current_text
.to_string
();
self
.stripped_think_start
=
true
;
self
._in_reasoning
=
true
;
}
// Handle end of reasoning block
let
mut
think_end_idx
=
current_text
.len
();
if
self
._in_reasoning
{
think_end_idx
=
current_text
.find
(
&
self
.think_end_token
)
.unwrap_or
(
current_text
.len
());
}
if
self
._in_reasoning
&&
think_end_idx
<
current_text
.len
()
{
let
reasoning_text
=
&
current_text
[
..
think_end_idx
];
self
._buffer
.clear
();
self
._in_reasoning
=
false
;
let
start_idx
=
think_end_idx
+
self
.think_end_token
.len
();
let
normal_text
=
if
start_idx
<
current_text
.len
()
{
&
current_text
[
start_idx
..
]
}
else
{
""
};
return
ParserResult
{
normal_text
:
normal_text
.to_string
(),
reasoning_text
:
reasoning_text
.trim
()
.to_string
(),
};
}
// Continue with reasoning content
if
self
._in_reasoning
&&
self
.stream_reasoning
{
// Stream the content immediately
let
reasoning_text
=
current_text
;
self
._buffer
.clear
();
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
,
}
}
else
if
!
self
._in_reasoning
{
// If we're not in a reasoning block return as normal text
let
normal_text
=
current_text
;
self
._buffer
.clear
();
ParserResult
{
normal_text
,
reasoning_text
:
String
::
new
(),
}
}
else
{
// If we are in a reasoning block but no end token is found, return the current buffer
ParserResult
{
normal_text
:
String
::
new
(),
reasoning_text
:
String
::
new
(),
}
}
}
}
#[cfg(test)]
mod
tests
{
use
super
::
*
;
#[test]
fn
test_detect_and_parse_reasoning_reasoning
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>with reasoning</think> and more text."
);
assert_eq!
(
result
.normal_text
,
"and more text."
);
assert_eq!
(
result
.reasoning_text
,
"with reasoning"
);
}
#[test]
fn
test_detect_and_parse_reasoning_reasoning_no_reasoning
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"This is a test without reasoning."
);
assert_eq!
(
result
.normal_text
,
"This is a test without reasoning."
);
assert_eq!
(
result
.reasoning_text
,
""
);
}
#[test]
fn
test_detect_and_parse_reasoning_reasoning_truncated_reasoning
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>with truncated reasoning"
);
assert_eq!
(
result
.normal_text
,
""
);
assert_eq!
(
result
.reasoning_text
,
"with truncated reasoning"
);
}
#[test]
fn
test_parse_reasoning_streaming_incremental
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.parse_reasoning_streaming_incremental
(
"<thi"
);
assert_eq!
(
result
.normal_text
,
""
);
assert_eq!
(
result
.reasoning_text
,
""
);
}
#[test]
fn
test_parse_reasoning_streaming_incremental_complete
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.parse_reasoning_streaming_incremental
(
"<think>with reasoning</think> and more text."
);
assert_eq!
(
result
.normal_text
,
" and more text."
);
assert_eq!
(
result
.reasoning_text
,
"with reasoning"
);
}
#[test]
fn
test_parse_reasoning_streaming_incremental_no_end_token
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
true
);
let
result
=
parser
.parse_reasoning_streaming_incremental
(
"<think>with reasoning"
);
assert_eq!
(
result
.normal_text
,
""
);
assert_eq!
(
result
.reasoning_text
,
"with reasoning"
);
}
#[test]
fn
test_detect_and_parse_reasoning_multiple_reasoning_blocks
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>first reasoning</think> middle <think>second reasoning</think> end"
,
);
// The current implementation only handles the first occurrence properly
assert_eq!
(
result
.normal_text
,
"middle second reasoning</think> end"
);
assert_eq!
(
result
.reasoning_text
,
"first reasoning"
);
}
#[test]
fn
test_streaming_multiple_reasoning_blocks
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
false
);
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>first reasoning</think> middle"
);
assert_eq!
(
result1
.normal_text
,
" middle"
);
assert_eq!
(
result1
.reasoning_text
,
"first reasoning"
);
// Basic parser assumes only one reasoning block at a time
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
" <think>second reasoning</think> end"
);
assert_eq!
(
result2
.normal_text
,
" <think>second reasoning</think> end"
);
assert_eq!
(
result2
.reasoning_text
,
""
);
}
#[test]
fn
test_partial_token_matching_opening_tag
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
// Feed partial opening tag
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<th"
);
assert_eq!
(
result1
.normal_text
,
""
);
assert_eq!
(
result1
.reasoning_text
,
""
);
// Complete the opening tag and add content
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
"ink>reasoning content</think> normal text"
);
assert_eq!
(
result2
.normal_text
,
" normal text"
);
assert_eq!
(
result2
.reasoning_text
,
"reasoning content"
);
}
#[test]
fn
test_partial_token_matching_closing_tag
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
false
);
// Start with complete opening and partial content
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning content</th"
);
assert_eq!
(
result1
.normal_text
,
""
);
assert_eq!
(
result1
.reasoning_text
,
""
);
// Complete the closing tag
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
"ink> normal text"
);
assert_eq!
(
result2
.normal_text
,
" normal text"
);
assert_eq!
(
result2
.reasoning_text
,
"reasoning content"
);
}
#[test]
fn
test_buffer_state_persistence_across_calls
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
false
);
// First call - partial opening tag
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<th"
);
assert_eq!
(
result1
.normal_text
,
""
);
assert_eq!
(
result1
.reasoning_text
,
""
);
// Second call - complete opening tag, start reasoning
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
"ink>part1 "
);
assert_eq!
(
result2
.normal_text
,
""
);
assert_eq!
(
result2
.reasoning_text
,
""
);
// Third call - more reasoning content
let
result3
=
parser
.parse_reasoning_streaming_incremental
(
"part2 "
);
assert_eq!
(
result3
.normal_text
,
""
);
assert_eq!
(
result3
.reasoning_text
,
""
);
// Fourth call - end reasoning and normal text
let
result4
=
parser
.parse_reasoning_streaming_incremental
(
"part3</think> normal"
);
assert_eq!
(
result4
.normal_text
,
" normal"
);
assert_eq!
(
result4
.reasoning_text
,
"part1 part2 part3"
);
}
#[test]
fn
test_streaming_with_stream_reasoning_enabled
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
// Start reasoning block
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning "
);
assert_eq!
(
result1
.normal_text
,
""
);
assert_eq!
(
result1
.reasoning_text
,
"reasoning "
);
// Continue streaming reasoning
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
"content "
);
assert_eq!
(
result2
.normal_text
,
""
);
assert_eq!
(
result2
.reasoning_text
,
"content "
);
// End reasoning block
let
result3
=
parser
.parse_reasoning_streaming_incremental
(
"more</think> normal"
);
assert_eq!
(
result3
.normal_text
,
" normal"
);
assert_eq!
(
result3
.reasoning_text
,
"more"
);
}
#[test]
fn
test_nested_reasoning_blocks
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>outer <think>inner</think> reasoning</think> normal"
,
);
// Current implementation should handle this by finding the first closing tag
assert_eq!
(
result
.normal_text
,
"reasoning</think> normal"
);
// All <think> tags are stripped, so <think>inner is not included
assert_eq!
(
result
.reasoning_text
,
"outer inner"
);
}
#[test]
fn
test_malformed_missing_closing_tag
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>reasoning without closing tag"
);
assert_eq!
(
result
.normal_text
,
""
);
assert_eq!
(
result
.reasoning_text
,
"reasoning without closing tag"
);
}
#[test]
fn
test_malformed_stray_closing_tag
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"normal text</think> more normal"
);
assert_eq!
(
result
.normal_text
,
"normal text</think> more normal"
);
assert_eq!
(
result
.reasoning_text
,
""
);
}
#[test]
fn
test_malformed_multiple_opening_tags
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>first <think>second reasoning</think> normal"
);
// Should handle by replacing all opening tags and using first closing tag
assert_eq!
(
result
.normal_text
,
"normal"
);
assert_eq!
(
result
.reasoning_text
,
"first second reasoning"
);
}
#[test]
fn
test_empty_reasoning_block
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think></think> normal text"
);
assert_eq!
(
result
.normal_text
,
"normal text"
);
assert_eq!
(
result
.reasoning_text
,
""
);
}
#[test]
fn
test_whitespace_only_reasoning_block
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"<think>
\n\t
</think> normal text"
);
assert_eq!
(
result
.normal_text
,
"normal text"
);
assert_eq!
(
result
.reasoning_text
,
""
);
// Should be empty after trim
}
#[test]
fn
test_force_reasoning_mode
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
true
);
let
result
=
parser
.detect_and_parse_reasoning
(
"no think tags here"
);
assert_eq!
(
result
.normal_text
,
""
);
assert_eq!
(
result
.reasoning_text
,
"no think tags here"
);
}
#[test]
fn
test_streaming_reset_state_after_complete_block
()
{
let
mut
parser
=
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
false
,
false
);
// Process complete reasoning block
let
result1
=
parser
.parse_reasoning_streaming_incremental
(
"<think>reasoning</think> normal"
);
assert_eq!
(
result1
.normal_text
,
" normal"
);
assert_eq!
(
result1
.reasoning_text
,
"reasoning"
);
// Process normal text - should not be affected by previous state
let
result2
=
parser
.parse_reasoning_streaming_incremental
(
" more normal text"
);
assert_eq!
(
result2
.normal_text
,
" more normal text"
);
assert_eq!
(
result2
.reasoning_text
,
""
);
// Basic parser does not expect more than one reasoning block at a time
// So this should not affect the state
let
result3
=
parser
.parse_reasoning_streaming_incremental
(
" <think>new reasoning</think> final"
);
assert_eq!
(
result3
.normal_text
,
" <think>new reasoning</think> final"
);
assert_eq!
(
result3
.reasoning_text
,
""
);
}
}
lib/llm/src/postprocessor/reasoning_parser/parsers/deepseek_r1_reasoning_parser.rs
0 → 100644
View file @
b5cf1ad3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use
crate
::
postprocessor
::
reasoning_parser
::
parsers
::
base_reasoning_parser
::
BaseReasoningParser
;
use
crate
::
postprocessor
::
reasoning_parser
::{
ParserResult
,
ReasoningParser
};
#[derive(Default)]
pub
struct
DeepseekR1ReasoningParser
{
base
:
BaseReasoningParser
,
}
impl
DeepseekR1ReasoningParser
{
pub
fn
new
()
->
Self
{
Self
{
base
:
BaseReasoningParser
::
new
(
"<think>"
.to_string
(),
"</think>"
.to_string
(),
true
,
true
,
),
}
}
}
impl
ReasoningParser
for
DeepseekR1ReasoningParser
{
fn
parse_reasoning_streaming_incremental
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
{
self
.base
.parse_reasoning_streaming_incremental
(
text
)
}
fn
detect_and_parse_reasoning
(
&
mut
self
,
text
:
&
str
)
->
ParserResult
{
self
.base
.detect_and_parse_reasoning
(
text
)
}
}
lib/llm/src/postprocessor/reasoning_parser/parsers/mod.rs
0 → 100644
View file @
b5cf1ad3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
pub
mod
base_reasoning_parser
;
pub
mod
deepseek_r1_reasoning_parser
;
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment