"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "93ca9df1d1e07b5dc6a5707f97707da2e0133896"
Unverified Commit 8245633a authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

feat: add MiniMax-M2 reasoning parser and fix tool call parsing (#6294)


Co-authored-by: default avatarClaude <noreply@anthropic.com>
parent b6603d90
...@@ -72,8 +72,6 @@ impl PromptFormatter { ...@@ -72,8 +72,6 @@ impl PromptFormatter {
std::fs::read_to_string(chat_template_file).with_context(|| { std::fs::read_to_string(chat_template_file).with_context(|| {
format!("fs:read_to_string '{}'", chat_template_file.display()) format!("fs:read_to_string '{}'", chat_template_file.display())
})?; })?;
// clean up the string to remove newlines
let chat_template = chat_template.replace('\n', "");
config.chat_template = Some(ChatTemplateValue(either::Left(chat_template))); config.chat_template = Some(ChatTemplateValue(either::Left(chat_template)));
} }
Self::from_parts( Self::from_parts(
......
...@@ -196,11 +196,6 @@ impl ChoiceJailState { ...@@ -196,11 +196,6 @@ impl ChoiceJailState {
if should_end { if should_end {
// Complete tool call found in this chunk // Complete tool call found in this chunk
tracing::debug!(
"Choice {} complete tool call detected in single chunk",
choice.index
);
let (jailed_part, trailing_part) = full_content.split_at(split_pos); let (jailed_part, trailing_part) = full_content.split_at(split_pos);
// Create the tool call choice // Create the tool call choice
...@@ -238,11 +233,6 @@ impl ChoiceJailState { ...@@ -238,11 +233,6 @@ impl ChoiceJailState {
} }
} else { } else {
// Start jailing with the marker and suffix // Start jailing with the marker and suffix
tracing::debug!(
"Choice {} start marker '{}' detected, starting jail",
choice.index,
marker
);
self.is_jailed = true; self.is_jailed = true;
self.accumulated_content = full_content; self.accumulated_content = full_content;
} }
...@@ -291,10 +281,6 @@ impl ChoiceJailState { ...@@ -291,10 +281,6 @@ impl ChoiceJailState {
if jail_stream.should_start_jail(&combined_content) { if jail_stream.should_start_jail(&combined_content) {
// Start jailing with the combined content // Start jailing with the combined content
tracing::debug!(
"Choice {} tool call start detected via parser, starting jail",
choice.index
);
self.is_jailed = true; self.is_jailed = true;
self.accumulated_content = combined_content; self.accumulated_content = combined_content;
self.partial_match_buffer.clear(); self.partial_match_buffer.clear();
...@@ -325,11 +311,6 @@ impl ChoiceJailState { ...@@ -325,11 +311,6 @@ impl ChoiceJailState {
jail_stream.should_end_jail(&self.accumulated_content).await; jail_stream.should_end_jail(&self.accumulated_content).await;
if should_end { if should_end {
tracing::debug!(
"Choice {} jail exit detected, releasing accumulated content",
choice.index
);
// Split the content // Split the content
let (jailed_part, trailing_part) = self.accumulated_content.split_at(split_pos); let (jailed_part, trailing_part) = self.accumulated_content.split_at(split_pos);
...@@ -379,11 +360,6 @@ impl ChoiceJailState { ...@@ -379,11 +360,6 @@ impl ChoiceJailState {
/// Finalize any remaining content when stream ends /// Finalize any remaining content when stream ends
async fn finalize(&mut self, jail_stream: &JailedStream) -> Option<ChoiceEmission> { async fn finalize(&mut self, jail_stream: &JailedStream) -> Option<ChoiceEmission> {
if self.is_jailed && !self.accumulated_content.is_empty() { if self.is_jailed && !self.accumulated_content.is_empty() {
tracing::debug!(
"Choice {} stream ended while jailed, releasing accumulated content",
self.index
);
// Create a dummy choice for the method call // Create a dummy choice for the method call
#[allow(deprecated)] #[allow(deprecated)]
let dummy_choice = create_choice_stream( let dummy_choice = create_choice_stream(
...@@ -736,14 +712,6 @@ impl JailedStream { ...@@ -736,14 +712,6 @@ impl JailedStream {
let tool_call_match = self.tool_call_parser.is_some() let tool_call_match = self.tool_call_parser.is_some()
&& detect_tool_call_start(content, self.tool_call_parser.as_deref()).unwrap_or(false); && detect_tool_call_start(content, self.tool_call_parser.as_deref()).unwrap_or(false);
tracing::debug!(
"should_start_jail: content={:?}, sequence_match={}, tool_call_match={}, sequences={:?}",
content,
sequence_match,
tool_call_match,
self.jail_start_sequences
);
sequence_match || tool_call_match sequence_match || tool_call_match
} }
...@@ -832,12 +800,13 @@ impl JailedStream { ...@@ -832,12 +800,13 @@ impl JailedStream {
JailMode::MarkerBased => { JailMode::MarkerBased => {
// Traditional marker-based tool call parsing // Traditional marker-based tool call parsing
let tools_slice = self.tool_definitions.as_deref(); let tools_slice = self.tool_definitions.as_deref();
if let Ok((tool_calls, normal_text)) = try_tool_call_parse_aggregate( let parse_result = try_tool_call_parse_aggregate(
accumulated_content, accumulated_content,
self.tool_call_parser.as_deref(), self.tool_call_parser.as_deref(),
tools_slice, tools_slice,
) )
.await .await;
if let Ok((tool_calls, normal_text)) = parse_result
&& !tool_calls.is_empty() && !tool_calls.is_empty()
{ {
// Convert to streaming format // Convert to streaming format
......
...@@ -74,9 +74,13 @@ impl ReasoningParser for BasicReasoningParser { ...@@ -74,9 +74,13 @@ impl ReasoningParser for BasicReasoningParser {
// Incrementally parse the streaming text // Incrementally parse the streaming text
self._buffer.push_str(text); self._buffer.push_str(text);
let mut current_text = self._buffer.to_string(); let mut current_text = self._buffer.to_string();
// If the current text is a prefix of the think token, keep buffering // If the current text is a prefix of the think token, keep buffering.
// Only buffer for start token if we haven't found it yet.
// Only buffer for end token if we're currently inside a reasoning block.
// After reasoning ends, all content passes through as normal text.
if self.think_start_token.starts_with(&current_text) if !self.stripped_think_start
&& self.think_start_token.starts_with(&current_text)
&& self.think_start_token.as_str() != current_text.as_str() && self.think_start_token.as_str() != current_text.as_str()
{ {
return ParserResult { return ParserResult {
...@@ -84,7 +88,8 @@ impl ReasoningParser for BasicReasoningParser { ...@@ -84,7 +88,8 @@ impl ReasoningParser for BasicReasoningParser {
reasoning_text: String::new(), reasoning_text: String::new(),
}; };
} }
if self.think_end_token.starts_with(&current_text) if self._in_reasoning
&& self.think_end_token.starts_with(&current_text)
&& self.think_end_token.as_str() != current_text.as_str() && self.think_end_token.as_str() != current_text.as_str()
{ {
return ParserResult { return ParserResult {
...@@ -414,4 +419,59 @@ mod tests { ...@@ -414,4 +419,59 @@ mod tests {
assert_eq!(result3.normal_text, " <think>new reasoning</think> final"); assert_eq!(result3.normal_text, " <think>new reasoning</think> final");
assert_eq!(result3.reasoning_text, ""); assert_eq!(result3.reasoning_text, "");
} }
#[test]
fn test_post_reasoning_angle_bracket_not_buffered() {
// After reasoning ends, a standalone `<` should pass through immediately
// as normal text. It must NOT be buffered as a potential prefix of <think>
// or </think>, because that would cause the downstream tool call jail to
// miss the `<` (e.g., `<invoke` becomes `invoke`).
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
// Process a complete reasoning block
let r1 =
parser.parse_reasoning_streaming_incremental("<think>reasoning content</think>", &[]);
assert_eq!(r1.reasoning_text, "reasoning content");
assert_eq!(r1.normal_text, "");
// After reasoning ends, a lone `<` must pass through as normal text
let r2 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r2.normal_text, "<");
assert_eq!(r2.reasoning_text, "");
// The next token should arrive independently (not merged with buffered `<`)
let r3 = parser.parse_reasoning_streaming_incremental("invoke name=\"get_weather\">", &[]);
assert_eq!(r3.normal_text, "invoke name=\"get_weather\">");
assert_eq!(r3.reasoning_text, "");
}
#[test]
fn test_post_reasoning_tool_call_xml_preserved() {
// Simulates the MiniMax tool call scenario: reasoning followed by XML tool call.
// The `<` in `<invoke` must not be consumed by the reasoning parser.
let mut parser =
BasicReasoningParser::new("<think>".to_string(), "</think>".to_string(), false, true);
let r1 = parser.parse_reasoning_streaming_incremental("<think>let me check", &[]);
assert_eq!(r1.reasoning_text, "let me check");
let r2 = parser.parse_reasoning_streaming_incremental("</think>", &[]);
assert_eq!(r2.normal_text, "");
assert_eq!(r2.reasoning_text, "");
// Tool call markers should pass through completely
let r3 = parser.parse_reasoning_streaming_incremental("<minimax:tool_call>", &[]);
assert_eq!(r3.normal_text, "<minimax:tool_call>");
let r4 = parser.parse_reasoning_streaming_incremental("\n", &[]);
assert_eq!(r4.normal_text, "\n");
// `<` arriving as a separate token after reasoning must NOT be buffered
let r5 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r5.normal_text, "<");
let r6 = parser.parse_reasoning_streaming_incremental("invoke name=\"get_weather\">", &[]);
assert_eq!(r6.normal_text, "invoke name=\"get_weather\">");
}
} }
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::{ParserResult, ReasoningParser};
use super::base_parser::BasicReasoningParser;
/// MiniMax Append-Think Reasoning Parser.
///
/// The MiniMax model starts generating reasoning content immediately WITHOUT
/// a `<think>` prefix. The model output looks like:
/// `reasoning content here...</think>actual response`
///
/// This parser prepends `<think>` to the first chunk, transforming the stream into:
/// `<think>reasoning content here...</think>actual response`
///
/// It then delegates to `BasicReasoningParser` for standard `<think>...</think>`
/// extraction, splitting output into `reasoning_text` and `normal_text`.
///
/// Reference: SGLang MiniMaxAppendThinkDetector
/// https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/parser/reasoning_parser.py
#[derive(Debug)]
pub struct MiniMaxAppendThinkParser {
inner: BasicReasoningParser,
is_first_chunk: bool,
}
impl Default for MiniMaxAppendThinkParser {
fn default() -> Self {
Self {
inner: BasicReasoningParser::new(
"<think>".into(),
"</think>".into(),
false, // force_reasoning=false; we synthesize <think> ourselves
true, // stream_reasoning=true
),
is_first_chunk: true,
}
}
}
impl MiniMaxAppendThinkParser {
pub fn new() -> Self {
Self::default()
}
}
impl ReasoningParser for MiniMaxAppendThinkParser {
fn detect_and_parse_reasoning(&mut self, text: &str, token_ids: &[u32]) -> ParserResult {
// Prepend <think> and delegate to the inner parser
let augmented = format!("<think>{}", text);
self.inner.detect_and_parse_reasoning(&augmented, token_ids)
}
fn parse_reasoning_streaming_incremental(
&mut self,
text: &str,
token_ids: &[u32],
) -> ParserResult {
if self.is_first_chunk {
self.is_first_chunk = false;
let augmented = format!("<think>{}", text);
self.inner
.parse_reasoning_streaming_incremental(&augmented, token_ids)
} else {
self.inner
.parse_reasoning_streaming_incremental(text, token_ids)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_detect_and_parse_no_end_token() {
let mut parser = MiniMaxAppendThinkParser::new();
let result = parser.detect_and_parse_reasoning("reasoning content here", &[]);
assert_eq!(result.reasoning_text, "reasoning content here");
assert_eq!(result.normal_text, "");
}
#[test]
fn test_detect_and_parse_with_end_token() {
let mut parser = MiniMaxAppendThinkParser::new();
let result =
parser.detect_and_parse_reasoning("reasoning content</think>normal response", &[]);
assert_eq!(result.reasoning_text, "reasoning content");
assert_eq!(result.normal_text, "normal response");
}
#[test]
fn test_streaming_basic_flow() {
let mut parser = MiniMaxAppendThinkParser::new();
// First chunk: model starts reasoning without <think>
let r1 = parser.parse_reasoning_streaming_incremental("I need to ", &[]);
assert_eq!(r1.reasoning_text, "I need to ");
assert_eq!(r1.normal_text, "");
// Middle chunk: still reasoning
let r2 = parser.parse_reasoning_streaming_incremental("check the weather", &[]);
assert_eq!(r2.reasoning_text, "check the weather");
assert_eq!(r2.normal_text, "");
// End of reasoning
let r3 = parser.parse_reasoning_streaming_incremental("</think>The weather is sunny.", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "The weather is sunny.");
}
#[test]
fn test_streaming_end_token_split_across_chunks() {
let mut parser = MiniMaxAppendThinkParser::new();
// With stream_reasoning=true, reasoning is emitted immediately
let r1 = parser.parse_reasoning_streaming_incremental("reasoning", &[]);
assert_eq!(r1.reasoning_text, "reasoning");
assert_eq!(r1.normal_text, "");
// </think> split across chunks - partial match should buffer
let r2 = parser.parse_reasoning_streaming_incremental("</thi", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
// Complete the end token - reasoning already streamed in r1,
// so r3 only contains the normal text after </think>
let r3 = parser.parse_reasoning_streaming_incremental("nk>normal text", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "normal text");
}
#[test]
fn test_streaming_only_reasoning_no_end() {
let mut parser = MiniMaxAppendThinkParser::new();
let r1 = parser.parse_reasoning_streaming_incremental("still thinking", &[]);
assert_eq!(r1.reasoning_text, "still thinking");
assert_eq!(r1.normal_text, "");
let r2 = parser.parse_reasoning_streaming_incremental(" more thought", &[]);
assert_eq!(r2.reasoning_text, " more thought");
assert_eq!(r2.normal_text, "");
}
#[test]
fn test_streaming_with_tool_call_after_reasoning() {
let mut parser = MiniMaxAppendThinkParser::new();
let r1 = parser.parse_reasoning_streaming_incremental("let me call a tool", &[]);
assert_eq!(r1.reasoning_text, "let me call a tool");
let r2 = parser.parse_reasoning_streaming_incremental(
"</think><minimax:tool_call><invoke name=\"get_weather\">",
&[],
);
assert_eq!(r2.reasoning_text, "");
assert!(
r2.normal_text
.contains("<minimax:tool_call><invoke name=\"get_weather\">")
);
}
#[test]
fn test_streaming_tool_call_angle_bracket_split_tokens() {
// Reproduces the bug where `<` before `<invoke` is consumed by the
// reasoning parser's prefix matching after reasoning ends.
let mut parser = MiniMaxAppendThinkParser::new();
// Reasoning phase
let r1 = parser.parse_reasoning_streaming_incremental("let me check the weather", &[]);
assert_eq!(r1.reasoning_text, "let me check the weather");
// End reasoning
let r2 = parser.parse_reasoning_streaming_incremental("</think>", &[]);
assert_eq!(r2.reasoning_text, "");
assert_eq!(r2.normal_text, "");
// Tool call start marker
let r3 = parser.parse_reasoning_streaming_incremental("<minimax:tool_call>", &[]);
assert_eq!(r3.normal_text, "<minimax:tool_call>");
// Newline
let r4 = parser.parse_reasoning_streaming_incremental("\n", &[]);
assert_eq!(r4.normal_text, "\n");
// `<` as a separate token must NOT be buffered after reasoning ends
let r5 = parser.parse_reasoning_streaming_incremental("<", &[]);
assert_eq!(r5.normal_text, "<");
// Rest of the invoke tag
let r6 = parser.parse_reasoning_streaming_incremental("invoke name=\"get_weather\">", &[]);
assert_eq!(r6.normal_text, "invoke name=\"get_weather\">");
}
}
...@@ -6,11 +6,13 @@ use std::sync::OnceLock; ...@@ -6,11 +6,13 @@ use std::sync::OnceLock;
mod base_parser; mod base_parser;
mod gpt_oss_parser; mod gpt_oss_parser;
mod granite_parser; mod granite_parser;
mod minimax_append_think_parser;
// Re-export main types and functions for convenience // Re-export main types and functions for convenience
pub use base_parser::BasicReasoningParser; pub use base_parser::BasicReasoningParser;
pub use gpt_oss_parser::GptOssReasoningParser; pub use gpt_oss_parser::GptOssReasoningParser;
pub use granite_parser::GraniteReasoningParser; pub use granite_parser::GraniteReasoningParser;
pub use minimax_append_think_parser::MiniMaxAppendThinkParser;
static REASONING_PARSER_MAP: OnceLock<HashMap<&'static str, ReasoningParserType>> = OnceLock::new(); static REASONING_PARSER_MAP: OnceLock<HashMap<&'static str, ReasoningParserType>> = OnceLock::new();
...@@ -28,6 +30,10 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT ...@@ -28,6 +30,10 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map.insert("mistral", ReasoningParserType::Mistral); map.insert("mistral", ReasoningParserType::Mistral);
map.insert("granite", ReasoningParserType::Granite); map.insert("granite", ReasoningParserType::Granite);
map.insert("nemotron_nano", ReasoningParserType::NemotronDeci); // nemotron nano is <think>...</think> map.insert("nemotron_nano", ReasoningParserType::NemotronDeci); // nemotron nano is <think>...</think>
map.insert(
"minimax_append_think",
ReasoningParserType::MiniMaxAppendThink,
);
map map
}) })
} }
...@@ -92,6 +98,7 @@ pub enum ReasoningParserType { ...@@ -92,6 +98,7 @@ pub enum ReasoningParserType {
Kimi, Kimi,
Mistral, Mistral,
Granite, Granite,
MiniMaxAppendThink,
} }
#[derive(std::fmt::Debug)] #[derive(std::fmt::Debug)]
...@@ -173,6 +180,9 @@ impl ReasoningParserType { ...@@ -173,6 +180,9 @@ impl ReasoningParserType {
ReasoningParserType::Granite => ReasoningParserWrapper { ReasoningParserType::Granite => ReasoningParserWrapper {
parser: Box::new(GraniteReasoningParser::new()), parser: Box::new(GraniteReasoningParser::new()),
}, },
ReasoningParserType::MiniMaxAppendThink => ReasoningParserWrapper {
parser: Box::new(MiniMaxAppendThinkParser::new()),
},
} }
} }
...@@ -215,6 +225,7 @@ mod tests { ...@@ -215,6 +225,7 @@ mod tests {
"mistral", "mistral",
"granite", "granite",
"nemotron_nano", "nemotron_nano",
"minimax_append_think",
]; ];
for parser in available_parsers { for parser in available_parsers {
assert!(parsers.contains(&parser)); assert!(parsers.contains(&parser));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment