// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 use tracing as log; use crate::{ParserResult, ReasoningParser}; #[derive(Default, Debug, Clone)] pub struct BasicReasoningParser { think_start_token: String, think_end_token: String, _in_reasoning: bool, stream_reasoning: bool, _buffer: String, stripped_think_start: bool, } impl BasicReasoningParser { pub fn new( think_start_token: String, think_end_token: String, force_reasoning: bool, stream_reasoning: bool, ) -> Self { Self { think_start_token, think_end_token, _in_reasoning: force_reasoning, stream_reasoning, _buffer: String::new(), stripped_think_start: false, } } } impl ReasoningParser for BasicReasoningParser { fn detect_and_parse_reasoning(&mut self, text: &str, _token_ids: &[u32]) -> ParserResult { log::debug!("detect_and_parse_reasoning called with text: {:?}", text); let in_reasoning = self._in_reasoning || text.contains(&self.think_start_token); log::debug!("in_reasoning: {}", in_reasoning); if !in_reasoning { log::debug!("No reasoning detected, returning normal text."); return ParserResult { normal_text: text.to_string(), reasoning_text: String::new(), }; } // The text is considered to be in a reasoning block. let processed_text = text.replace(&self.think_start_token, "").trim().to_string(); log::debug!( "Processed text after removing think_start_token: {:?}", processed_text ); if !processed_text.contains(&self.think_end_token) { log::debug!( "Reasoning truncated, think_end_token not found. Returning reasoning text." ); // Assume reasoning was truncated before `think_end_token` return ParserResult { normal_text: String::new(), reasoning_text: processed_text, }; } // Extract reasoning content let splits: Vec<&str> = processed_text.splitn(2, &self.think_end_token).collect(); let reasoning_text = splits.first().unwrap_or(&"").to_string(); let normal_text = splits .get(1) .map(|s| s.trim().to_string()) .unwrap_or_default(); log::debug!("Extracted reasoning_text: {:?}", reasoning_text); log::debug!("Extracted normal_text: {:?}", normal_text); ParserResult { normal_text, reasoning_text, } } fn parse_reasoning_streaming_incremental( &mut self, text: &str, _token_ids: &[u32], ) -> ParserResult { // Incrementally parse the streaming text self._buffer.push_str(text); let mut current_text = self._buffer.to_string(); // If the current text is a prefix of the think token, keep buffering log::debug!( "parse_reasoning_streaming_incremental called with text: {:?}", text ); log::debug!("current buffer: {:?}", self._buffer); log::debug!("current_text: {:?}", current_text); log::debug!( "in_reasoning: {}, stripped_think_start: {}, stream_reasoning: {}", self._in_reasoning, self.stripped_think_start, self.stream_reasoning ); if self.think_start_token.starts_with(¤t_text) && self.think_start_token.as_str() != current_text.as_str() { return ParserResult { normal_text: String::new(), reasoning_text: String::new(), }; } if self.think_end_token.starts_with(¤t_text) && self.think_end_token.as_str() != current_text.as_str() { return ParserResult { normal_text: String::new(), reasoning_text: String::new(), }; } // Strip `` token if present if !self.stripped_think_start && current_text.contains(&self.think_start_token) { current_text = current_text.replace(&self.think_start_token, ""); self._buffer = current_text.to_string(); self.stripped_think_start = true; self._in_reasoning = true; } // Handle end of reasoning block let mut think_end_idx = current_text.len(); if self._in_reasoning { think_end_idx = current_text .find(&self.think_end_token) .unwrap_or(current_text.len()); } if self._in_reasoning && think_end_idx < current_text.len() { let reasoning_text = ¤t_text[..think_end_idx]; self._buffer.clear(); self._in_reasoning = false; let start_idx = think_end_idx + self.think_end_token.len(); let normal_text = if start_idx < current_text.len() { ¤t_text[start_idx..] } else { "" }; return ParserResult { normal_text: normal_text.to_string(), reasoning_text: reasoning_text.trim().to_string(), }; } // Continue with reasoning content if self._in_reasoning && self.stream_reasoning { // Stream the content immediately let reasoning_text = current_text; self._buffer.clear(); ParserResult { normal_text: String::new(), reasoning_text, } } else if !self._in_reasoning { // If we're not in a reasoning block return as normal text let normal_text = current_text; self._buffer.clear(); ParserResult { normal_text, reasoning_text: String::new(), } } else { // If we are in a reasoning block but no end token is found, return the current buffer ParserResult { normal_text: String::new(), reasoning_text: String::new(), } } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_detect_and_parse_reasoning_reasoning() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning("with reasoning and more text.", &[]); assert_eq!(result.normal_text, "and more text."); assert_eq!(result.reasoning_text, "with reasoning"); } #[test] fn test_detect_and_parse_reasoning_reasoning_no_reasoning() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning("This is a test without reasoning.", &[]); assert_eq!(result.normal_text, "This is a test without reasoning."); assert_eq!(result.reasoning_text, ""); } #[test] fn test_detect_and_parse_reasoning_reasoning_truncated_reasoning() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning("with truncated reasoning", &[]); assert_eq!(result.normal_text, ""); assert_eq!(result.reasoning_text, "with truncated reasoning"); } #[test] fn test_parse_reasoning_streaming_incremental() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.parse_reasoning_streaming_incremental("".to_string(), "".to_string(), false, true); let result = parser.parse_reasoning_streaming_incremental( "with reasoning and more text.", &[], ); assert_eq!(result.normal_text, " and more text."); assert_eq!(result.reasoning_text, "with reasoning"); } #[test] fn test_parse_reasoning_streaming_incremental_no_end_token() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), true, true); let result = parser.parse_reasoning_streaming_incremental("with reasoning", &[]); assert_eq!(result.normal_text, ""); assert_eq!(result.reasoning_text, "with reasoning"); } #[test] fn test_detect_and_parse_reasoning_multiple_reasoning_blocks() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning( "first reasoning middle second reasoning end", &[], ); // The current implementation only handles the first occurrence properly assert_eq!(result.normal_text, "middle second reasoning end"); assert_eq!(result.reasoning_text, "first reasoning"); } #[test] fn test_streaming_multiple_reasoning_blocks() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, false); let result1 = parser .parse_reasoning_streaming_incremental("first reasoning middle", &[]); assert_eq!(result1.normal_text, " middle"); assert_eq!(result1.reasoning_text, "first reasoning"); // Basic parser assumes only one reasoning block at a time let result2 = parser .parse_reasoning_streaming_incremental(" second reasoning end", &[]); assert_eq!(result2.normal_text, " second reasoning end"); assert_eq!(result2.reasoning_text, ""); } #[test] fn test_partial_token_matching_opening_tag() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); // Feed partial opening tag let result1 = parser.parse_reasoning_streaming_incremental("reasoning content normal text", &[], ); assert_eq!(result2.normal_text, " normal text"); assert_eq!(result2.reasoning_text, "reasoning content"); } #[test] fn test_partial_token_matching_closing_tag() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, false); // Start with complete opening and partial content let result1 = parser.parse_reasoning_streaming_incremental("reasoning content normal text", &[]); assert_eq!(result2.normal_text, " normal text"); assert_eq!(result2.reasoning_text, "reasoning content"); } #[test] fn test_buffer_state_persistence_across_calls() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, false); // First call - partial opening tag let result1 = parser.parse_reasoning_streaming_incremental("part1 ", &[]); assert_eq!(result2.normal_text, ""); assert_eq!(result2.reasoning_text, ""); // Third call - more reasoning content let result3 = parser.parse_reasoning_streaming_incremental("part2 ", &[]); assert_eq!(result3.normal_text, ""); assert_eq!(result3.reasoning_text, ""); // Fourth call - end reasoning and normal text let result4 = parser.parse_reasoning_streaming_incremental("part3 normal", &[]); assert_eq!(result4.normal_text, " normal"); assert_eq!(result4.reasoning_text, "part1 part2 part3"); } #[test] fn test_streaming_with_stream_reasoning_enabled() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); // Start reasoning block let result1 = parser.parse_reasoning_streaming_incremental("reasoning ", &[]); assert_eq!(result1.normal_text, ""); assert_eq!(result1.reasoning_text, "reasoning "); // Continue streaming reasoning let result2 = parser.parse_reasoning_streaming_incremental("content ", &[]); assert_eq!(result2.normal_text, ""); assert_eq!(result2.reasoning_text, "content "); // End reasoning block let result3 = parser.parse_reasoning_streaming_incremental("more normal", &[]); assert_eq!(result3.normal_text, " normal"); assert_eq!(result3.reasoning_text, "more"); } #[test] fn test_nested_reasoning_blocks() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning( "outer inner reasoning normal", &[], ); // Current implementation should handle this by finding the first closing tag assert_eq!(result.normal_text, "reasoning normal"); // All tags are stripped, so inner is not included assert_eq!(result.reasoning_text, "outer inner"); } #[test] fn test_malformed_missing_closing_tag() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning("reasoning without closing tag", &[]); assert_eq!(result.normal_text, ""); assert_eq!(result.reasoning_text, "reasoning without closing tag"); } #[test] fn test_malformed_stray_closing_tag() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning("normal text more normal", &[]); assert_eq!(result.normal_text, "normal text more normal"); assert_eq!(result.reasoning_text, ""); } #[test] fn test_malformed_multiple_opening_tags() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser .detect_and_parse_reasoning("first second reasoning normal", &[]); // Should handle by replacing all opening tags and using first closing tag assert_eq!(result.normal_text, "normal"); assert_eq!(result.reasoning_text, "first second reasoning"); } #[test] fn test_empty_reasoning_block() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning(" normal text", &[]); assert_eq!(result.normal_text, "normal text"); assert_eq!(result.reasoning_text, ""); } #[test] fn test_whitespace_only_reasoning_block() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, true); let result = parser.detect_and_parse_reasoning(" \n\t normal text", &[]); assert_eq!(result.normal_text, "normal text"); assert_eq!(result.reasoning_text, ""); // Should be empty after trim } #[test] fn test_force_reasoning_mode() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), true, true); let result = parser.detect_and_parse_reasoning("no think tags here", &[]); assert_eq!(result.normal_text, ""); assert_eq!(result.reasoning_text, "no think tags here"); } #[test] fn test_streaming_reset_state_after_complete_block() { let mut parser = BasicReasoningParser::new("".to_string(), "".to_string(), false, false); // Process complete reasoning block let result1 = parser.parse_reasoning_streaming_incremental("reasoning normal", &[]); assert_eq!(result1.normal_text, " normal"); assert_eq!(result1.reasoning_text, "reasoning"); // Process normal text - should not be affected by previous state let result2 = parser.parse_reasoning_streaming_incremental(" more normal text", &[]); assert_eq!(result2.normal_text, " more normal text"); assert_eq!(result2.reasoning_text, ""); // Basic parser does not expect more than one reasoning block at a time // So this should not affect the state let result3 = parser .parse_reasoning_streaming_incremental(" new reasoning final", &[]); assert_eq!(result3.normal_text, " new reasoning final"); assert_eq!(result3.reasoning_text, ""); } }