minimax_append_think_parser.rs 5.72 KB
Newer Older
1
2
3
4
5
6
7
8
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

use crate::{ParserResult, ReasoningParser};

/// MiniMax Append-Think Reasoning Parser.
///
/// The MiniMax model starts generating reasoning content immediately WITHOUT
9
10
11
12
13
14
/// emitting a `<think>` opener in its output. SGLang's `MiniMaxAppendThinkDetector`
/// and vLLM's `MiniMaxM2AppendThinkReasoningParser` both handle this by simply
/// prepending `<think>` to the emitted text and classifying the whole stream
/// as `normal_text`/content — neither extracts reasoning based on a `</think>`
/// marker. The tag is left inline for downstream consumers that want to render
/// or post-process it.
15
///
16
17
18
/// This parser matches those upstream implementations verbatim: a pass-through
/// with a one-time `<think>` prefix on the first streamed chunk. Reasoning
/// content is never populated.
19
///
20
21
22
23
24
25
/// References:
/// - SGLang MiniMaxAppendThinkDetector:
///   https://github.com/sgl-project/sglang/blob/main/python/sglang/srt/parser/reasoning_parser.py
/// - vLLM MiniMaxM2AppendThinkReasoningParser:
///   https://github.com/vllm-project/vllm/blob/main/vllm/reasoning/minimax_m2_reasoning_parser.py
#[derive(Debug, Default)]
26
pub struct MiniMaxAppendThinkParser {
27
28
29
    /// Flips to true after the first streamed chunk has received the `<think>`
    /// prefix so subsequent chunks pass through unchanged.
    prefix_emitted: bool,
30
31
32
33
34
35
36
37
}

impl MiniMaxAppendThinkParser {
    pub fn new() -> Self {
        Self::default()
    }
}

38
39
const THINK_START_TOKEN: &str = "<think>";

40
impl ReasoningParser for MiniMaxAppendThinkParser {
41
42
43
44
45
46
47
    fn detect_and_parse_reasoning(&mut self, text: &str, _token_ids: &[u32]) -> ParserResult {
        // Non-streaming: return the full text with a single `<think>` prefix,
        // all as normal_text.  Reasoning extraction is intentionally a no-op.
        ParserResult {
            normal_text: format!("{THINK_START_TOKEN}{text}"),
            reasoning_text: String::new(),
        }
48
49
50
51
52
    }

    fn parse_reasoning_streaming_incremental(
        &mut self,
        text: &str,
53
        _token_ids: &[u32],
54
    ) -> ParserResult {
55
56
57
        let normal_text = if !self.prefix_emitted {
            self.prefix_emitted = true;
            format!("{THINK_START_TOKEN}{text}")
58
        } else {
59
60
61
62
63
            text.to_string()
        };
        ParserResult {
            normal_text,
            reasoning_text: String::new(),
64
65
66
67
68
69
70
71
72
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
73
    fn test_detect_and_parse_prepends_think_all_as_normal_text() {
74
75
        let mut parser = MiniMaxAppendThinkParser::new();
        let result = parser.detect_and_parse_reasoning("reasoning content here", &[]);
76
77
78
        // Matches SGLang: everything is normal_text with a `<think>` prefix.
        assert_eq!(result.normal_text, "<think>reasoning content here");
        assert_eq!(result.reasoning_text, "");
79
80
81
    }

    #[test]
82
    fn test_detect_and_parse_with_end_token_is_still_normal_text() {
83
84
85
        let mut parser = MiniMaxAppendThinkParser::new();
        let result =
            parser.detect_and_parse_reasoning("reasoning content</think>normal response", &[]);
86
87
88
89
90
91
92
        // SGLang does not split on `</think>` — the whole string (with the
        // prepended `<think>`) flows through as normal_text.
        assert_eq!(
            result.normal_text,
            "<think>reasoning content</think>normal response"
        );
        assert_eq!(result.reasoning_text, "");
93
94
95
    }

    #[test]
96
    fn test_streaming_first_chunk_gets_prefix_rest_pass_through() {
97
98
99
        let mut parser = MiniMaxAppendThinkParser::new();

        let r1 = parser.parse_reasoning_streaming_incremental("I need to ", &[]);
100
101
        assert_eq!(r1.normal_text, "<think>I need to ");
        assert_eq!(r1.reasoning_text, "");
102
103

        let r2 = parser.parse_reasoning_streaming_incremental("check the weather", &[]);
104
        assert_eq!(r2.normal_text, "check the weather");
105
106
        assert_eq!(r2.reasoning_text, "");

107
108
109
        let r3 = parser.parse_reasoning_streaming_incremental("</think>The weather is sunny.", &[]);
        // No split — `</think>` passes through verbatim in normal_text.
        assert_eq!(r3.normal_text, "</think>The weather is sunny.");
110
111
112
113
        assert_eq!(r3.reasoning_text, "");
    }

    #[test]
114
115
116
117
118
    fn test_streaming_bare_json_tool_call_is_normal_text() {
        // Regression: under SGLang guided decoding the model emits a bare
        // JSON array with no `</think>`. The parser must not capture it as
        // reasoning — it must pass through so the tool-call jail can extract
        // it into structured tool_calls.
119
        let mut parser = MiniMaxAppendThinkParser::new();
120
121
122
123
124
125
126
127
128
        let r = parser.parse_reasoning_streaming_incremental(
            r#"[{"name":"get_weather","parameters":{"location":"San Francisco"}}]"#,
            &[],
        );
        assert_eq!(
            r.normal_text,
            r#"<think>[{"name":"get_weather","parameters":{"location":"San Francisco"}}]"#
        );
        assert_eq!(r.reasoning_text, "");
129
130
131
    }

    #[test]
132
    fn test_streaming_tool_call_after_reasoning_is_all_normal_text() {
133
134
135
        let mut parser = MiniMaxAppendThinkParser::new();

        let r1 = parser.parse_reasoning_streaming_incremental("let me call a tool", &[]);
136
        assert_eq!(r1.normal_text, "<think>let me call a tool");
137
138
139
140
141

        let r2 = parser.parse_reasoning_streaming_incremental(
            "</think><minimax:tool_call><invoke name=\"get_weather\">",
            &[],
        );
142
143
144
145
        // Entire chunk is normal_text — `</think>` is not consumed.
        assert_eq!(
            r2.normal_text,
            "</think><minimax:tool_call><invoke name=\"get_weather\">"
146
147
148
149
        );
        assert_eq!(r2.reasoning_text, "");
    }
}