feat: Kimi K2/K2.5 tool and reasoning parsers (#6407)

Signed-off-by: Nikita Sukharev <kaonael@gmail.com>

feat: Kimi K2/K2.5 tool and reasoning parsers (#6407)
Signed-off-by: Nikita Sukharev <kaonael@gmail.com>
d1bd210f · Nikita · GitHub · ff06b17e · d1bd210f · d1bd210f
Unverified Commit d1bd210f authored Feb 25, 2026 by Nikita Committed by GitHub Feb 25, 2026
11 changed files
--- a/docs/pages/agents/tool-calling.md
+++ b/docs/pages/agents/tool-calling.md
@@ -45,7 +45,14 @@ Parser to Model Mapping
 | pythonic |  meta-llama/Llama-4-* |
 | jamba |  ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, |
 | glm47 | zai-org/GLM-4.7 |
+| kimi_k2 | moonshotai/Kimi-K2-Thinking*, moonshotai/Kimi-K2-Instruct*, moonshotai/Kimi-K2.5* |
+\* Currently requires converting `tiktoken.model` to `tokenizers.json`.
+> [!TIP]
+> For Kimi K2.5 thinking models, pair `--dyn-tool-call-parser kimi_k2` with
+> `--dyn-reasoning-parser kimi_k25` so that both `<think>` blocks and tool calls
+> are parsed correctly from the same response.
 ## Examples

--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -147,8 +147,8 @@ galil-seiferas = { version = "0.1" }
 # preprocessor
 bs62 = { version = "0.1" }
-minijinja = { version = "2.14.0", features = ["loader"] }
+minijinja = { version = "2.15.1", features = ["loader", "loop_controls"] }
-minijinja-contrib = { version = "2.14.0", features = ["pycompat"] }
+minijinja-contrib = { version = "2.15.1", features = ["pycompat"] }
 json-five = { version = "0.3" }
 # media loading in the preprocessor

--- a/lib/llm/src/preprocessor.rs
+++ b/lib/llm/src/preprocessor.rs
@@ -946,6 +946,25 @@ impl OpenAIPreprocessor {
        jail.apply_with_finish_reason(stream)
    }
+    /// Check if reasoning parsing should be disabled based on per-request parameters.
+    /// For kimi_k25: disabled when chat_template_args contains "thinking": false.
+    fn is_reasoning_disabled_by_request(
+        reasoning_parser: Option<&str>,
+        chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>,
+    ) -> bool {
+        match reasoning_parser {
+            Some("kimi_k25") => {
+                if let Some(args) = chat_template_args
+                    && let Some(thinking) = args.get("thinking")
+                {
+                    return thinking == &serde_json::Value::Bool(false);
+                }
+                false
+            }
+            _ => false,
+        }
+    }
    // Motivation: Each transformation on the stream should be a separate step to allow for more flexibility
    // Earlier reasoning parser logic was nested under delta generation logic in choice_from_postprocessor
    // Since we have tool calling parsing as separate step, it makes sense to have reasoning parser as separate step as well
@@ -1094,7 +1113,11 @@ impl
        );
        // Try to parse reasoning content only if parser is configured
-        let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some();
+        let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some()
+            && !Self::is_reasoning_disabled_by_request(
+                self.runtime_config.reasoning_parser.as_deref(),
+                request.chat_template_args.as_ref(),
+            );
        // Reasoning Content Parsing Transformation Step
        // Current Solution:
@@ -1329,3 +1352,77 @@ impl
 }
 // Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs`
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_is_reasoning_disabled_by_request() {
+        let thinking_true = {
+            let mut m = std::collections::HashMap::new();
+            m.insert("thinking".to_string(), serde_json::Value::Bool(true));
+            m
+        };
+        let thinking_false = {
+            let mut m = std::collections::HashMap::new();
+            m.insert("thinking".to_string(), serde_json::Value::Bool(false));
+            m
+        };
+        let empty_args = std::collections::HashMap::new();
+        // (parser, args, expected_disabled, description)
+        let cases = [
+            (
+                Some("kimi_k25"),
+                Some(&thinking_false),
+                true,
+                "kimi_k25 + thinking=false → disabled",
+            ),
+            (
+                Some("kimi_k25"),
+                Some(&thinking_true),
+                false,
+                "kimi_k25 + thinking=true → enabled",
+            ),
+            (
+                Some("kimi_k25"),
+                None,
+                false,
+                "kimi_k25 + no args → enabled",
+            ),
+            (
+                Some("kimi_k25"),
+                Some(&empty_args),
+                false,
+                "kimi_k25 + empty args → enabled",
+            ),
+            (
+                Some("deepseek_r1"),
+                Some(&thinking_false),
+                false,
+                "deepseek_r1 → never disabled",
+            ),
+            (
+                Some("basic"),
+                Some(&thinking_false),
+                false,
+                "basic → never disabled",
+            ),
+            (
+                None,
+                Some(&thinking_false),
+                false,
+                "no parser → never disabled",
+            ),
+        ];
+        for (parser, args, expected, desc) in cases {
+            assert_eq!(
+                OpenAIPreprocessor::is_reasoning_disabled_by_request(parser, args),
+                expected,
+                "FAILED: {desc}",
+            );
+        }
+    }
+}
--- a/lib/llm/tests/test_reasoning_parser.rs
+++ b/lib/llm/tests/test_reasoning_parser.rs
@@ -105,6 +105,38 @@ mod tests {
        }
    }
+    /// Shorthand for creating a mock chunk with content only
+    fn chunk(content: &str) -> Annotated<NvCreateChatCompletionStreamResponse> {
+        create_mock_response_chunk(content.to_string(), None)
+    }
+    /// Run chunks through a reasoning parser, return aggregated (reasoning, content)
+    async fn run_parser(
+        chunks: Vec<Annotated<NvCreateChatCompletionStreamResponse>>,
+        parser: &str,
+    ) -> (String, String) {
+        let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
+            stream::iter(chunks),
+            parser.to_string(),
+        );
+        let mut output_stream = std::pin::pin!(output_stream);
+        let mut all_reasoning = String::new();
+        let mut all_content = String::new();
+        while let Some(item) = output_stream.next().await {
+            if let Some(ref data) = item.data {
+                for choice in &data.choices {
+                    if let Some(ref r) = choice.delta.reasoning_content {
+                        all_reasoning.push_str(r);
+                    }
+                    if let Some(ref c) = choice.delta.content {
+                        all_content.push_str(get_text(c));
+                    }
+                }
+            }
+        }
+        (all_reasoning, all_content)
+    }
    #[tokio::test]
    async fn test_reasoning_parser_with_basic_parser() {
        // Basic Parser test <think> </think> tags
@@ -414,57 +446,69 @@ mod tests {
    }
    #[tokio::test]
-    async fn test_reasoning_parser_with_kimi_parser() {
+    async fn test_reasoning_parser_with_kimi_k25() {
-        // Create a mock runtime config with Kimi reasoning parser
+        // (description, input_chunks, expected_reasoning, expected_content)
-        let runtime_config = dynamo_llm::local_model::runtime_config::ModelRuntimeConfig {
+        let cases = vec![
-            reasoning_parser: Some("kimi".to_string()),
+            (
-            ..Default::default()
+                "thinking mode",
-        };
+                vec![
+                    chunk("<think>Let me"),
-        // Create test input stream with Kimi-style reasoning tags
+                    chunk(" think about this carefully."),
-        let input_chunks = vec![
+                    chunk("</think>Bonjour!"),
-            create_mock_response_chunk("Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.".to_string(), None),
+                ],
+                "Let me think about this carefully.",
+                "Bonjour!",
+            ),
+            (
+                "instant mode (empty think)",
+                vec![
+                    chunk("<think>"),
+                    chunk("</think>"),
+                    chunk("Direct answer without thinking."),
+                ],
+                "",
+                "Direct answer without thinking.",
+            ),
+            (
+                "token-by-token",
+                vec![
+                    chunk("<think>"),
+                    chunk("The user"),
+                    chunk(" asked me"),
+                    chunk(" to say hello."),
+                    chunk("</think>"),
+                    chunk("Hello"),
+                    chunk("!"),
+                ],
+                "The user asked me to say hello.",
+                "Hello!",
+            ),
        ];
-        let input_stream = stream::iter(input_chunks);
-        // Apply the reasoning parser transformation
-        let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
-            input_stream,
-            runtime_config.reasoning_parser.unwrap(),
-        );
-        // Pin the stream and collect all output chunks
+        for (desc, chunks, expected_reasoning, expected_content) in cases {
-        let mut output_stream = std::pin::pin!(output_stream);
+            let (reasoning, content) = run_parser(chunks, "kimi_k25").await;
-        let mut output_chunks = Vec::new();
+            assert_eq!(reasoning, expected_reasoning, "FAILED reasoning: {desc}");
-        while let Some(chunk) = output_stream.next().await {
+            assert_eq!(content, expected_content, "FAILED content: {desc}");
-            output_chunks.push(chunk);
        }
+    }
-        // Verify that Kimi-style reasoning is parsed correctly
+    #[tokio::test]
-        assert_eq!(output_chunks.len(), 1);
+    async fn test_reasoning_parser_with_kimi_parser() {
-        let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
+        let (reasoning, content) = run_parser(
+            vec![chunk(
-        assert!(
+                "Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.",
-            output_choice.delta.reasoning_content.is_some(),
+            )],
-            "Should extract Kimi reasoning content"
+            "kimi",
-        );
+        )
-        assert!(
+        .await;
-            output_choice.delta.content.is_some(),
-            "Should have normal content"
-        );
-        let reasoning_content = output_choice.delta.reasoning_content.as_ref().unwrap();
-        let normal_content = output_choice.delta.content.as_ref().unwrap();
-        // Verify the content was parsed with Kimi tags
        assert!(
-            reasoning_content.contains("Kimi reasoning"),
+            reasoning.contains("Kimi reasoning"),
-            "Should contain Kimi reasoning content"
+            "Should contain Kimi reasoning, got: {reasoning}"
        );
        assert!(
-            get_text(normal_content).contains("Let me analyze")
+            content.contains("Let me analyze") || content.contains("Here's my conclusion"),
-                || get_text(normal_content).contains("Here's my conclusion"),
+            "Should contain normal content, got: {content}"
-            "Should contain normal content"
        );
    }
@@ -586,6 +630,103 @@ mod tests {
        );
    }
+    #[tokio::test]
+    async fn test_kimi_k25_with_reasoning_and_tool_calls() {
+        // Simulates a real Kimi K2.5 response: <think> block followed by tool calls.
+        // Verifies that reasoning and tool_calling parsers don't interfere with each other.
+        let input_chunks = vec![
+            chunk("<think>I should check the weather"),
+            chunk(" before answering.</think>"),
+            chunk("<|tool_calls_section_begin|>"),
+            chunk("<|tool_call_begin|>functions.get_weather:0"),
+            chunk("<|tool_call_argument_begin|>"),
+            chunk(r#"{"location":"NYC"}"#),
+            chunk("<|tool_call_end|>"),
+            chunk("<|tool_calls_section_end|>"),
+        ];
+        let input_stream = stream::iter(input_chunks);
+        // Step 1: reasoning parser (kimi_k25) extracts <think> into reasoning_content
+        let reasoning_parsed_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
+            input_stream,
+            "kimi_k25".to_string(),
+        );
+        // Step 2: tool calling jail (kimi_k2) extracts tool calls from remaining content
+        let tool_parsed_stream = OpenAIPreprocessor::apply_tool_calling_jail(
+            Some("kimi_k2".to_string()),
+            None,
+            None,
+            reasoning_parsed_stream,
+        );
+        let mut tool_parsed_stream = std::pin::pin!(tool_parsed_stream);
+        let mut output_chunks = Vec::new();
+        while let Some(chunk) = tool_parsed_stream.next().await {
+            output_chunks.push(chunk);
+        }
+        assert!(!output_chunks.is_empty(), "Should have output chunks");
+        let mut all_reasoning = String::new();
+        let mut all_normal_content = String::new();
+        let mut found_tool_calls = false;
+        let mut tool_call_function_name: Option<String> = None;
+        let mut tool_call_arguments: Option<serde_json::Value> = None;
+        for chunk in output_chunks.iter() {
+            if let Some(ref data) = chunk.data {
+                for choice in &data.choices {
+                    if let Some(ref r) = choice.delta.reasoning_content {
+                        all_reasoning.push_str(r);
+                    }
+                    if let Some(ref c) = choice.delta.content {
+                        all_normal_content.push_str(get_text(c));
+                    }
+                    if let Some(ref tool_calls) = choice.delta.tool_calls
+                        && !tool_calls.is_empty()
+                    {
+                        found_tool_calls = true;
+                        for tc in tool_calls {
+                            if let Some(ref f) = tc.function {
+                                if let Some(ref name) = f.name {
+                                    tool_call_function_name = Some(name.clone());
+                                }
+                                if let Some(ref args) = f.arguments {
+                                    tool_call_arguments = Some(serde_json::from_str(args).unwrap());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        assert_eq!(
+            all_reasoning, "I should check the weather before answering.",
+            "Reasoning mismatch"
+        );
+        assert!(
+            found_tool_calls,
+            "Should have found tool calls in the output"
+        );
+        assert_eq!(
+            tool_call_function_name.as_deref(),
+            Some("get_weather"),
+            "Tool call function name should be 'get_weather'"
+        );
+        assert_eq!(
+            tool_call_arguments.as_ref(),
+            Some(&serde_json::json!({"location": "NYC"})),
+            "Tool call arguments mismatch"
+        );
+        // No normal content expected — everything is either reasoning or tool calls
+        assert!(
+            all_normal_content.trim().is_empty(),
+            "Expected no normal content, got: {all_normal_content:?}"
+        );
+    }
    #[tokio::test]
    #[ignore]
    // (TODO: Ayush) Fix this test

--- a/lib/parsers/src/reasoning/base_parser.rs
+++ b/lib/parsers/src/reasoning/base_parser.rs
@@ -108,6 +108,10 @@ impl ReasoningParser for BasicReasoningParser {
        while cursor < text.len() {
            if currently_reasoning {
+                // Skip leading start token if present (handles force_reasoning + explicit <think>)
+                if text[cursor..].starts_with(&self.think_start_token) {
+                    cursor += self.think_start_token.len();
+                }
                // We're inside a reasoning block — look for end token
                if let Some(end_offset) = text[cursor..].find(&self.think_end_token) {
                    reasoning_parts.push(&text[cursor..cursor + end_offset]);
@@ -175,6 +179,17 @@ impl ReasoningParser for BasicReasoningParser {
                continue;
            }
+            // Buffer is a prefix of the start token (e.g., "<thi" for "<think>") — wait
+            // for more data before deciding whether to strip it or emit as reasoning.
+            // Only applies when force_reasoning=true and we haven't stripped the tag yet.
+            if !self.stripped_think_start
+                && self._in_reasoning
+                && !current_text.is_empty()
+                && self.think_start_token.starts_with(current_text.as_str())
+            {
+                break;
+            }
            if self._in_reasoning {
                if let Some(end_idx) = current_text.find(self.think_end_token.as_str()) {
                    // End of reasoning block: accumulate content and transition out.

--- a/lib/parsers/src/reasoning/mod.rs
+++ b/lib/parsers/src/reasoning/mod.rs
@@ -26,6 +26,7 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
        map.insert("qwen3", ReasoningParserType::Qwen);
        map.insert("nemotron_deci", ReasoningParserType::NemotronDeci);
        map.insert("kimi", ReasoningParserType::Kimi);
+        map.insert("kimi_k25", ReasoningParserType::KimiK25);
        map.insert("step3", ReasoningParserType::Step3);
        map.insert("mistral", ReasoningParserType::Mistral);
        map.insert("granite", ReasoningParserType::Granite);
@@ -97,6 +98,7 @@ pub enum ReasoningParserType {
    Qwen,
    NemotronDeci,
    Kimi,
+    KimiK25,
    Mistral,
    Granite,
    MiniMaxAppendThink,
@@ -152,6 +154,14 @@ impl ReasoningParserType {
                    true,
                )),
            },
+            ReasoningParserType::KimiK25 => ReasoningParserWrapper {
+                parser: Box::new(BasicReasoningParser::new(
+                    "<think>".into(),
+                    "</think>".into(),
+                    true,
+                    true,
+                )),
+            },
            ReasoningParserType::Mistral => ReasoningParserWrapper {
                parser: Box::new(BasicReasoningParser::new(
                    "[THINK]".into(),
@@ -222,6 +232,7 @@ mod tests {
            "qwen3",
            "nemotron_deci",
            "kimi",
+            "kimi_k25",
            "step3",
            "mistral",
            "granite",
@@ -233,4 +244,135 @@ mod tests {
            assert!(parsers.contains(&parser));
        }
    }
+    #[test]
+    fn test_kimi_k25_detect_and_parse() {
+        // (description, input, expected_reasoning, expected_normal)
+        let cases = [
+            (
+                "force reasoning: no think tags",
+                "no think tags here",
+                "no think tags here",
+                "",
+            ),
+            (
+                "standard think tags",
+                "<think>Let me reason about this.</think>Hello!",
+                "Let me reason about this.",
+                "Hello!",
+            ),
+            (
+                "empty think block (instant mode)",
+                "<think></think>Hello from instant mode!",
+                "",
+                "Hello from instant mode!",
+            ),
+            (
+                "empty think block with newline",
+                "<think>\n</think>Hello from instant mode!",
+                "",
+                "Hello from instant mode!",
+            ),
+        ];
+        for (desc, input, expected_reasoning, expected_normal) in cases {
+            let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
+            let result = parser.detect_and_parse_reasoning(input, &[]);
+            assert_eq!(
+                result.reasoning_text, expected_reasoning,
+                "FAILED reasoning: {desc}"
+            );
+            assert_eq!(result.normal_text, expected_normal, "FAILED normal: {desc}");
+        }
+    }
+    #[test]
+    fn test_kimi_k25_streaming_force_reasoning() {
+        // Streaming: force_reasoning means tokens before <think> are treated as reasoning
+        let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
+        // First chunk: partial think tag — buffered because it's a prefix of "<think>"
+        let r1 = parser.parse_reasoning_streaming_incremental("<thi", &[]);
+        assert_eq!(r1.reasoning_text, "");
+        assert_eq!(r1.normal_text, "");
+        // Second chunk: completes the think tag + reasoning content
+        let r2 = parser.parse_reasoning_streaming_incremental("nk>reasoning here", &[]);
+        assert_eq!(r2.reasoning_text, "reasoning here");
+        assert_eq!(r2.normal_text, "");
+        // Third chunk: close tag + normal content
+        let r3 = parser.parse_reasoning_streaming_incremental("</think>Hello!", &[]);
+        assert_eq!(r3.reasoning_text, "");
+        assert_eq!(r3.normal_text, "Hello!");
+    }
+    #[test]
+    fn test_kimi_k25_streaming() {
+        // (description, tokens, expected_reasoning, expected_content)
+        let cases: Vec<(&str, &[&str], &str, &str)> = vec![
+            (
+                "complete response",
+                &[
+                    "<think>",
+                    "I need to",
+                    " think about",
+                    " this carefully.",
+                    "</think>",
+                    "Bonjour",
+                    "!",
+                ],
+                "I need to think about this carefully.",
+                "Bonjour!",
+            ),
+            (
+                "empty think (instant mode)",
+                &["<think>", "</think>", "Direct answer."],
+                "",
+                "Direct answer.",
+            ),
+        ];
+        for (desc, tokens, expected_reasoning, expected_content) in cases {
+            let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
+            let mut all_reasoning = String::new();
+            let mut all_content = String::new();
+            for token in tokens {
+                let r = parser.parse_reasoning_streaming_incremental(token, &[]);
+                all_reasoning.push_str(&r.reasoning_text);
+                all_content.push_str(&r.normal_text);
+            }
+            assert_eq!(
+                all_reasoning, expected_reasoning,
+                "FAILED reasoning: {desc}"
+            );
+            assert_eq!(all_content, expected_content, "FAILED content: {desc}");
+        }
+    }
+    #[test]
+    fn test_kimi_k25_parser_lookup_by_name() {
+        // Verify the parser can be looked up by name
+        let mut parser = ReasoningParserType::get_reasoning_parser_from_name("kimi_k25");
+        let result = parser.detect_and_parse_reasoning("<think>thinking</think>answer", &[]);
+        assert_eq!(result.reasoning_text, "thinking");
+        assert_eq!(result.normal_text, "answer");
+    }
+    #[test]
+    fn test_kimi_vs_kimi_k25_different_tags() {
+        // Kimi (original) uses ◁think▷/◁/think▷, KimiK25 uses <think>/</think>
+        let mut kimi = ReasoningParserType::Kimi.get_reasoning_parser();
+        let mut kimi_k25 = ReasoningParserType::KimiK25.get_reasoning_parser();
+        // Kimi original does NOT parse <think> tags
+        let r_kimi = kimi.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
+        assert_eq!(r_kimi.normal_text, "<think>reasoning</think>answer");
+        assert_eq!(r_kimi.reasoning_text, "");
+        // KimiK25 does parse <think> tags
+        let r_k25 = kimi_k25.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
+        assert_eq!(r_k25.reasoning_text, "reasoning");
+        assert_eq!(r_k25.normal_text, "answer");
+    }
 }
--- a/lib/parsers/src/tool_calling/config.rs
+++ b/lib/parsers/src/tool_calling/config.rs
@@ -130,6 +130,57 @@ impl Default for Glm47ParserConfig {
    }
 }
+/// Configuration for Kimi K2 tool call parser
+///
+/// Format:
+/// ```text
+/// <|tool_calls_section_begin|>
+/// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+/// <|tool_calls_section_end|>
+/// ```
+///
+/// The model may emit either plural or singular forms of section tokens
+/// (e.g., `<|tool_calls_section_begin|>` or `<|tool_call_section_begin|>`).
+/// Both forms are supported via the `section_start_variants` and `section_end_variants` fields.
+/// See vllm `kimi_k2_tool_parser.py` for reference.
+#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
+pub struct KimiK2ParserConfig {
+    /// Primary start token for the tool calls section
+    pub section_start: String,
+    /// Primary end token for the tool calls section
+    pub section_end: String,
+    /// All recognized start tokens for the tool calls section (includes singular variants)
+    pub section_start_variants: Vec<String>,
+    /// All recognized end tokens for the tool calls section (includes singular variants)
+    pub section_end_variants: Vec<String>,
+    /// Start token for an individual tool call (e.g., "<|tool_call_begin|>")
+    pub call_start: String,
+    /// End token for an individual tool call (e.g., "<|tool_call_end|>")
+    pub call_end: String,
+    /// Token separating function ID from JSON arguments (e.g., "<|tool_call_argument_begin|>")
+    pub argument_begin: String,
+}
+impl Default for KimiK2ParserConfig {
+    fn default() -> Self {
+        Self {
+            section_start: "<|tool_calls_section_begin|>".to_string(),
+            section_end: "<|tool_calls_section_end|>".to_string(),
+            section_start_variants: vec![
+                "<|tool_calls_section_begin|>".to_string(),
+                "<|tool_call_section_begin|>".to_string(),
+            ],
+            section_end_variants: vec![
+                "<|tool_calls_section_end|>".to_string(),
+                "<|tool_call_section_end|>".to_string(),
+            ],
+            call_start: "<|tool_call_begin|>".to_string(),
+            call_end: "<|tool_call_end|>".to_string(),
+            argument_begin: "<|tool_call_argument_begin|>".to_string(),
+        }
+    }
+}
 /// Parser-specific configuration
 #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
@@ -140,6 +191,7 @@ pub enum ParserConfig {
    Harmony(JsonParserConfig),
    Typescript,
    Dsml(DsmlParserConfig),
+    KimiK2(KimiK2ParserConfig),
    Glm47(Glm47ParserConfig),
 }
@@ -155,6 +207,7 @@ impl ParserConfig {
            ParserConfig::Typescript => vec![],
            ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()],
            ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()],
+            ParserConfig::KimiK2(config) => config.section_start_variants.clone(),
        }
    }
@@ -169,6 +222,7 @@ impl ParserConfig {
            ParserConfig::Typescript => vec![],
            ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()],
            ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()],
+            ParserConfig::KimiK2(config) => config.section_end_variants.clone(),
        }
    }
 }
@@ -357,4 +411,15 @@ impl ToolCallConfig {
            parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()),
        }
    }
+    pub fn kimi_k2() -> Self {
+        // Kimi K2 format:
+        // <|tool_calls_section_begin|>
+        // <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
+        // <|tool_calls_section_end|>
+        // Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md
+        Self {
+            parser_config: ParserConfig::KimiK2(KimiK2ParserConfig::default()),
+        }
+    }
 }
--- a/lib/parsers/src/tool_calling/mod.rs
+++ b/lib/parsers/src/tool_calling/mod.rs
@@ -23,7 +23,9 @@ pub struct ToolDefinition {
 }
 // Re-export main types and functions for convenience
-pub use config::{JsonParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig};
+pub use config::{
+    JsonParserConfig, KimiK2ParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig,
+};
 pub use dsml::try_tool_call_parse_dsml;
 pub use harmony::parse_tool_calls_harmony_complete;
 pub use json::try_tool_call_parse_json;
@@ -34,4 +36,5 @@ pub use parsers::{
 pub use pythonic::try_tool_call_parse_pythonic;
 pub use response::{CalledFunction, ToolCallResponse, ToolCallType};
 pub use tools::{try_tool_call_parse_aggregate, try_tool_call_parse_stream};
+pub use xml::try_tool_call_parse_kimi_k2;
 pub use xml::try_tool_call_parse_xml;
--- a/lib/parsers/src/tool_calling/parsers.rs
+++ b/lib/parsers/src/tool_calling/parsers.rs
@@ -19,8 +19,10 @@ use super::pythonic::{
 };
 use super::response::ToolCallResponse;
 use super::xml::{
-    detect_tool_call_start_glm47, detect_tool_call_start_xml, find_tool_call_end_position_glm47,
+    detect_tool_call_start_glm47, detect_tool_call_start_kimi_k2, detect_tool_call_start_xml,
-    find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_xml,
+    find_tool_call_end_position_glm47, find_tool_call_end_position_kimi_k2,
+    find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_kimi_k2,
+    try_tool_call_parse_xml,
 };
 use std::collections::HashMap;
 use std::sync::OnceLock;
@@ -45,6 +47,7 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
        map.insert("jamba", ToolCallConfig::jamba());
        map.insert("minimax_m2", ToolCallConfig::minimax_m2());
        map.insert("glm47", ToolCallConfig::glm47());
+        map.insert("kimi_k2", ToolCallConfig::kimi_k2());
        map.insert("default", ToolCallConfig::default());
        map.insert("nemotron_nano", ToolCallConfig::qwen3_coder()); // nemotron nano follows qwen3_coder format
        map
@@ -91,6 +94,11 @@ pub async fn try_tool_call_parse(
                try_tool_call_parse_glm47(message, glm47_config, tools)?;
            Ok((results, normal_content))
        }
+        ParserConfig::KimiK2(kimi_config) => {
+            let (results, normal_content) =
+                try_tool_call_parse_kimi_k2(message, kimi_config, tools)?;
+            Ok((results, normal_content))
+        }
    }
 }
@@ -144,6 +152,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::
            ParserConfig::Glm47(glm47_config) => {
                Ok(detect_tool_call_start_glm47(chunk, glm47_config))
            }
+            ParserConfig::KimiK2(kimi_config) => {
+                Ok(detect_tool_call_start_kimi_k2(chunk, kimi_config))
+            }
        },
        None => anyhow::bail!(
            "Parser '{}' is not implemented. Available parsers: {:?}",
@@ -184,6 +195,9 @@ pub fn find_tool_call_end_position(chunk: &str, parser_str: Option<&str>) -> usi
            ParserConfig::Glm47(glm47_config) => {
                find_tool_call_end_position_glm47(chunk, glm47_config)
            }
+            ParserConfig::KimiK2(kimi_config) => {
+                find_tool_call_end_position_kimi_k2(chunk, kimi_config)
+            }
        },
        None => {
            // Unknown parser, return full content length
@@ -225,6 +239,7 @@ mod tests {
            "nemotron_nano",
            "minimax_m2",
            "glm47",
+            "kimi_k2",
        ];
        for parser in available_parsers {
            assert!(parsers.contains(&parser));

--- a/lib/parsers/src/tool_calling/xml/kimi_k2_parser.rs
+++ b/lib/parsers/src/tool_calling/xml/kimi_k2_parser.rs
--- a/lib/parsers/src/tool_calling/xml/mod.rs
+++ b/lib/parsers/src/tool_calling/xml/mod.rs
@@ -2,12 +2,17 @@
 // SPDX-License-Identifier: Apache-2.0
 mod glm47_parser;
+mod kimi_k2_parser;
 mod parser;
 pub use super::response;
 pub use glm47_parser::{
    detect_tool_call_start_glm47, find_tool_call_end_position_glm47, try_tool_call_parse_glm47,
 };
+pub use kimi_k2_parser::{
+    detect_tool_call_start_kimi_k2, find_tool_call_end_position_kimi_k2,
+    try_tool_call_parse_kimi_k2,
+};
 pub use parser::{
    detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml,
 };