Unverified Commit d1bd210f authored by Nikita's avatar Nikita Committed by GitHub
Browse files

feat: Kimi K2/K2.5 tool and reasoning parsers (#6407)


Signed-off-by: default avatarNikita Sukharev <kaonael@gmail.com>
parent ff06b17e
......@@ -45,7 +45,14 @@ Parser to Model Mapping
| pythonic | meta-llama/Llama-4-* |
| jamba | ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, |
| glm47 | zai-org/GLM-4.7 |
| kimi_k2 | moonshotai/Kimi-K2-Thinking*, moonshotai/Kimi-K2-Instruct*, moonshotai/Kimi-K2.5* |
\* Currently requires converting `tiktoken.model` to `tokenizers.json`.
> [!TIP]
> For Kimi K2.5 thinking models, pair `--dyn-tool-call-parser kimi_k2` with
> `--dyn-reasoning-parser kimi_k25` so that both `<think>` blocks and tool calls
> are parsed correctly from the same response.
## Examples
......
......@@ -147,8 +147,8 @@ galil-seiferas = { version = "0.1" }
# preprocessor
bs62 = { version = "0.1" }
minijinja = { version = "2.14.0", features = ["loader"] }
minijinja-contrib = { version = "2.14.0", features = ["pycompat"] }
minijinja = { version = "2.15.1", features = ["loader", "loop_controls"] }
minijinja-contrib = { version = "2.15.1", features = ["pycompat"] }
json-five = { version = "0.3" }
# media loading in the preprocessor
......
......@@ -946,6 +946,25 @@ impl OpenAIPreprocessor {
jail.apply_with_finish_reason(stream)
}
/// Check if reasoning parsing should be disabled based on per-request parameters.
/// For kimi_k25: disabled when chat_template_args contains "thinking": false.
fn is_reasoning_disabled_by_request(
reasoning_parser: Option<&str>,
chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>,
) -> bool {
match reasoning_parser {
Some("kimi_k25") => {
if let Some(args) = chat_template_args
&& let Some(thinking) = args.get("thinking")
{
return thinking == &serde_json::Value::Bool(false);
}
false
}
_ => false,
}
}
// Motivation: Each transformation on the stream should be a separate step to allow for more flexibility
// Earlier reasoning parser logic was nested under delta generation logic in choice_from_postprocessor
// Since we have tool calling parsing as separate step, it makes sense to have reasoning parser as separate step as well
......@@ -1094,7 +1113,11 @@ impl
);
// Try to parse reasoning content only if parser is configured
let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some();
let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some()
&& !Self::is_reasoning_disabled_by_request(
self.runtime_config.reasoning_parser.as_deref(),
request.chat_template_args.as_ref(),
);
// Reasoning Content Parsing Transformation Step
// Current Solution:
......@@ -1329,3 +1352,77 @@ impl
}
// Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs`
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_reasoning_disabled_by_request() {
let thinking_true = {
let mut m = std::collections::HashMap::new();
m.insert("thinking".to_string(), serde_json::Value::Bool(true));
m
};
let thinking_false = {
let mut m = std::collections::HashMap::new();
m.insert("thinking".to_string(), serde_json::Value::Bool(false));
m
};
let empty_args = std::collections::HashMap::new();
// (parser, args, expected_disabled, description)
let cases = [
(
Some("kimi_k25"),
Some(&thinking_false),
true,
"kimi_k25 + thinking=false → disabled",
),
(
Some("kimi_k25"),
Some(&thinking_true),
false,
"kimi_k25 + thinking=true → enabled",
),
(
Some("kimi_k25"),
None,
false,
"kimi_k25 + no args → enabled",
),
(
Some("kimi_k25"),
Some(&empty_args),
false,
"kimi_k25 + empty args → enabled",
),
(
Some("deepseek_r1"),
Some(&thinking_false),
false,
"deepseek_r1 → never disabled",
),
(
Some("basic"),
Some(&thinking_false),
false,
"basic → never disabled",
),
(
None,
Some(&thinking_false),
false,
"no parser → never disabled",
),
];
for (parser, args, expected, desc) in cases {
assert_eq!(
OpenAIPreprocessor::is_reasoning_disabled_by_request(parser, args),
expected,
"FAILED: {desc}",
);
}
}
}
......@@ -105,6 +105,38 @@ mod tests {
}
}
/// Shorthand for creating a mock chunk with content only
fn chunk(content: &str) -> Annotated<NvCreateChatCompletionStreamResponse> {
create_mock_response_chunk(content.to_string(), None)
}
/// Run chunks through a reasoning parser, return aggregated (reasoning, content)
async fn run_parser(
chunks: Vec<Annotated<NvCreateChatCompletionStreamResponse>>,
parser: &str,
) -> (String, String) {
let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
stream::iter(chunks),
parser.to_string(),
);
let mut output_stream = std::pin::pin!(output_stream);
let mut all_reasoning = String::new();
let mut all_content = String::new();
while let Some(item) = output_stream.next().await {
if let Some(ref data) = item.data {
for choice in &data.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
if let Some(ref c) = choice.delta.content {
all_content.push_str(get_text(c));
}
}
}
}
(all_reasoning, all_content)
}
#[tokio::test]
async fn test_reasoning_parser_with_basic_parser() {
// Basic Parser test <think> </think> tags
......@@ -414,57 +446,69 @@ mod tests {
}
#[tokio::test]
async fn test_reasoning_parser_with_kimi_parser() {
// Create a mock runtime config with Kimi reasoning parser
let runtime_config = dynamo_llm::local_model::runtime_config::ModelRuntimeConfig {
reasoning_parser: Some("kimi".to_string()),
..Default::default()
};
// Create test input stream with Kimi-style reasoning tags
let input_chunks = vec![
create_mock_response_chunk("Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.".to_string(), None),
async fn test_reasoning_parser_with_kimi_k25() {
// (description, input_chunks, expected_reasoning, expected_content)
let cases = vec![
(
"thinking mode",
vec![
chunk("<think>Let me"),
chunk(" think about this carefully."),
chunk("</think>Bonjour!"),
],
"Let me think about this carefully.",
"Bonjour!",
),
(
"instant mode (empty think)",
vec![
chunk("<think>"),
chunk("</think>"),
chunk("Direct answer without thinking."),
],
"",
"Direct answer without thinking.",
),
(
"token-by-token",
vec![
chunk("<think>"),
chunk("The user"),
chunk(" asked me"),
chunk(" to say hello."),
chunk("</think>"),
chunk("Hello"),
chunk("!"),
],
"The user asked me to say hello.",
"Hello!",
),
];
let input_stream = stream::iter(input_chunks);
// Apply the reasoning parser transformation
let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
input_stream,
runtime_config.reasoning_parser.unwrap(),
);
// Pin the stream and collect all output chunks
let mut output_stream = std::pin::pin!(output_stream);
let mut output_chunks = Vec::new();
while let Some(chunk) = output_stream.next().await {
output_chunks.push(chunk);
for (desc, chunks, expected_reasoning, expected_content) in cases {
let (reasoning, content) = run_parser(chunks, "kimi_k25").await;
assert_eq!(reasoning, expected_reasoning, "FAILED reasoning: {desc}");
assert_eq!(content, expected_content, "FAILED content: {desc}");
}
}
// Verify that Kimi-style reasoning is parsed correctly
assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
assert!(
output_choice.delta.reasoning_content.is_some(),
"Should extract Kimi reasoning content"
);
assert!(
output_choice.delta.content.is_some(),
"Should have normal content"
);
let reasoning_content = output_choice.delta.reasoning_content.as_ref().unwrap();
let normal_content = output_choice.delta.content.as_ref().unwrap();
#[tokio::test]
async fn test_reasoning_parser_with_kimi_parser() {
let (reasoning, content) = run_parser(
vec![chunk(
"Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.",
)],
"kimi",
)
.await;
// Verify the content was parsed with Kimi tags
assert!(
reasoning_content.contains("Kimi reasoning"),
"Should contain Kimi reasoning content"
reasoning.contains("Kimi reasoning"),
"Should contain Kimi reasoning, got: {reasoning}"
);
assert!(
get_text(normal_content).contains("Let me analyze")
|| get_text(normal_content).contains("Here's my conclusion"),
"Should contain normal content"
content.contains("Let me analyze") || content.contains("Here's my conclusion"),
"Should contain normal content, got: {content}"
);
}
......@@ -586,6 +630,103 @@ mod tests {
);
}
#[tokio::test]
async fn test_kimi_k25_with_reasoning_and_tool_calls() {
// Simulates a real Kimi K2.5 response: <think> block followed by tool calls.
// Verifies that reasoning and tool_calling parsers don't interfere with each other.
let input_chunks = vec![
chunk("<think>I should check the weather"),
chunk(" before answering.</think>"),
chunk("<|tool_calls_section_begin|>"),
chunk("<|tool_call_begin|>functions.get_weather:0"),
chunk("<|tool_call_argument_begin|>"),
chunk(r#"{"location":"NYC"}"#),
chunk("<|tool_call_end|>"),
chunk("<|tool_calls_section_end|>"),
];
let input_stream = stream::iter(input_chunks);
// Step 1: reasoning parser (kimi_k25) extracts <think> into reasoning_content
let reasoning_parsed_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
input_stream,
"kimi_k25".to_string(),
);
// Step 2: tool calling jail (kimi_k2) extracts tool calls from remaining content
let tool_parsed_stream = OpenAIPreprocessor::apply_tool_calling_jail(
Some("kimi_k2".to_string()),
None,
None,
reasoning_parsed_stream,
);
let mut tool_parsed_stream = std::pin::pin!(tool_parsed_stream);
let mut output_chunks = Vec::new();
while let Some(chunk) = tool_parsed_stream.next().await {
output_chunks.push(chunk);
}
assert!(!output_chunks.is_empty(), "Should have output chunks");
let mut all_reasoning = String::new();
let mut all_normal_content = String::new();
let mut found_tool_calls = false;
let mut tool_call_function_name: Option<String> = None;
let mut tool_call_arguments: Option<serde_json::Value> = None;
for chunk in output_chunks.iter() {
if let Some(ref data) = chunk.data {
for choice in &data.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
if let Some(ref c) = choice.delta.content {
all_normal_content.push_str(get_text(c));
}
if let Some(ref tool_calls) = choice.delta.tool_calls
&& !tool_calls.is_empty()
{
found_tool_calls = true;
for tc in tool_calls {
if let Some(ref f) = tc.function {
if let Some(ref name) = f.name {
tool_call_function_name = Some(name.clone());
}
if let Some(ref args) = f.arguments {
tool_call_arguments = Some(serde_json::from_str(args).unwrap());
}
}
}
}
}
}
}
assert_eq!(
all_reasoning, "I should check the weather before answering.",
"Reasoning mismatch"
);
assert!(
found_tool_calls,
"Should have found tool calls in the output"
);
assert_eq!(
tool_call_function_name.as_deref(),
Some("get_weather"),
"Tool call function name should be 'get_weather'"
);
assert_eq!(
tool_call_arguments.as_ref(),
Some(&serde_json::json!({"location": "NYC"})),
"Tool call arguments mismatch"
);
// No normal content expected — everything is either reasoning or tool calls
assert!(
all_normal_content.trim().is_empty(),
"Expected no normal content, got: {all_normal_content:?}"
);
}
#[tokio::test]
#[ignore]
// (TODO: Ayush) Fix this test
......
......@@ -108,6 +108,10 @@ impl ReasoningParser for BasicReasoningParser {
while cursor < text.len() {
if currently_reasoning {
// Skip leading start token if present (handles force_reasoning + explicit <think>)
if text[cursor..].starts_with(&self.think_start_token) {
cursor += self.think_start_token.len();
}
// We're inside a reasoning block — look for end token
if let Some(end_offset) = text[cursor..].find(&self.think_end_token) {
reasoning_parts.push(&text[cursor..cursor + end_offset]);
......@@ -175,6 +179,17 @@ impl ReasoningParser for BasicReasoningParser {
continue;
}
// Buffer is a prefix of the start token (e.g., "<thi" for "<think>") — wait
// for more data before deciding whether to strip it or emit as reasoning.
// Only applies when force_reasoning=true and we haven't stripped the tag yet.
if !self.stripped_think_start
&& self._in_reasoning
&& !current_text.is_empty()
&& self.think_start_token.starts_with(current_text.as_str())
{
break;
}
if self._in_reasoning {
if let Some(end_idx) = current_text.find(self.think_end_token.as_str()) {
// End of reasoning block: accumulate content and transition out.
......
......@@ -26,6 +26,7 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map.insert("qwen3", ReasoningParserType::Qwen);
map.insert("nemotron_deci", ReasoningParserType::NemotronDeci);
map.insert("kimi", ReasoningParserType::Kimi);
map.insert("kimi_k25", ReasoningParserType::KimiK25);
map.insert("step3", ReasoningParserType::Step3);
map.insert("mistral", ReasoningParserType::Mistral);
map.insert("granite", ReasoningParserType::Granite);
......@@ -97,6 +98,7 @@ pub enum ReasoningParserType {
Qwen,
NemotronDeci,
Kimi,
KimiK25,
Mistral,
Granite,
MiniMaxAppendThink,
......@@ -152,6 +154,14 @@ impl ReasoningParserType {
true,
)),
},
ReasoningParserType::KimiK25 => ReasoningParserWrapper {
parser: Box::new(BasicReasoningParser::new(
"<think>".into(),
"</think>".into(),
true,
true,
)),
},
ReasoningParserType::Mistral => ReasoningParserWrapper {
parser: Box::new(BasicReasoningParser::new(
"[THINK]".into(),
......@@ -222,6 +232,7 @@ mod tests {
"qwen3",
"nemotron_deci",
"kimi",
"kimi_k25",
"step3",
"mistral",
"granite",
......@@ -233,4 +244,135 @@ mod tests {
assert!(parsers.contains(&parser));
}
}
#[test]
fn test_kimi_k25_detect_and_parse() {
// (description, input, expected_reasoning, expected_normal)
let cases = [
(
"force reasoning: no think tags",
"no think tags here",
"no think tags here",
"",
),
(
"standard think tags",
"<think>Let me reason about this.</think>Hello!",
"Let me reason about this.",
"Hello!",
),
(
"empty think block (instant mode)",
"<think></think>Hello from instant mode!",
"",
"Hello from instant mode!",
),
(
"empty think block with newline",
"<think>\n</think>Hello from instant mode!",
"",
"Hello from instant mode!",
),
];
for (desc, input, expected_reasoning, expected_normal) in cases {
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
let result = parser.detect_and_parse_reasoning(input, &[]);
assert_eq!(
result.reasoning_text, expected_reasoning,
"FAILED reasoning: {desc}"
);
assert_eq!(result.normal_text, expected_normal, "FAILED normal: {desc}");
}
}
#[test]
fn test_kimi_k25_streaming_force_reasoning() {
// Streaming: force_reasoning means tokens before <think> are treated as reasoning
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
// First chunk: partial think tag — buffered because it's a prefix of "<think>"
let r1 = parser.parse_reasoning_streaming_incremental("<thi", &[]);
assert_eq!(r1.reasoning_text, "");
assert_eq!(r1.normal_text, "");
// Second chunk: completes the think tag + reasoning content
let r2 = parser.parse_reasoning_streaming_incremental("nk>reasoning here", &[]);
assert_eq!(r2.reasoning_text, "reasoning here");
assert_eq!(r2.normal_text, "");
// Third chunk: close tag + normal content
let r3 = parser.parse_reasoning_streaming_incremental("</think>Hello!", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "Hello!");
}
#[test]
fn test_kimi_k25_streaming() {
// (description, tokens, expected_reasoning, expected_content)
let cases: Vec<(&str, &[&str], &str, &str)> = vec![
(
"complete response",
&[
"<think>",
"I need to",
" think about",
" this carefully.",
"</think>",
"Bonjour",
"!",
],
"I need to think about this carefully.",
"Bonjour!",
),
(
"empty think (instant mode)",
&["<think>", "</think>", "Direct answer."],
"",
"Direct answer.",
),
];
for (desc, tokens, expected_reasoning, expected_content) in cases {
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
let mut all_reasoning = String::new();
let mut all_content = String::new();
for token in tokens {
let r = parser.parse_reasoning_streaming_incremental(token, &[]);
all_reasoning.push_str(&r.reasoning_text);
all_content.push_str(&r.normal_text);
}
assert_eq!(
all_reasoning, expected_reasoning,
"FAILED reasoning: {desc}"
);
assert_eq!(all_content, expected_content, "FAILED content: {desc}");
}
}
#[test]
fn test_kimi_k25_parser_lookup_by_name() {
// Verify the parser can be looked up by name
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("kimi_k25");
let result = parser.detect_and_parse_reasoning("<think>thinking</think>answer", &[]);
assert_eq!(result.reasoning_text, "thinking");
assert_eq!(result.normal_text, "answer");
}
#[test]
fn test_kimi_vs_kimi_k25_different_tags() {
// Kimi (original) uses ◁think▷/◁/think▷, KimiK25 uses <think>/</think>
let mut kimi = ReasoningParserType::Kimi.get_reasoning_parser();
let mut kimi_k25 = ReasoningParserType::KimiK25.get_reasoning_parser();
// Kimi original does NOT parse <think> tags
let r_kimi = kimi.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
assert_eq!(r_kimi.normal_text, "<think>reasoning</think>answer");
assert_eq!(r_kimi.reasoning_text, "");
// KimiK25 does parse <think> tags
let r_k25 = kimi_k25.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
assert_eq!(r_k25.reasoning_text, "reasoning");
assert_eq!(r_k25.normal_text, "answer");
}
}
......@@ -130,6 +130,57 @@ impl Default for Glm47ParserConfig {
}
}
/// Configuration for Kimi K2 tool call parser
///
/// Format:
/// ```text
/// <|tool_calls_section_begin|>
/// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
/// <|tool_calls_section_end|>
/// ```
///
/// The model may emit either plural or singular forms of section tokens
/// (e.g., `<|tool_calls_section_begin|>` or `<|tool_call_section_begin|>`).
/// Both forms are supported via the `section_start_variants` and `section_end_variants` fields.
/// See vllm `kimi_k2_tool_parser.py` for reference.
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct KimiK2ParserConfig {
/// Primary start token for the tool calls section
pub section_start: String,
/// Primary end token for the tool calls section
pub section_end: String,
/// All recognized start tokens for the tool calls section (includes singular variants)
pub section_start_variants: Vec<String>,
/// All recognized end tokens for the tool calls section (includes singular variants)
pub section_end_variants: Vec<String>,
/// Start token for an individual tool call (e.g., "<|tool_call_begin|>")
pub call_start: String,
/// End token for an individual tool call (e.g., "<|tool_call_end|>")
pub call_end: String,
/// Token separating function ID from JSON arguments (e.g., "<|tool_call_argument_begin|>")
pub argument_begin: String,
}
impl Default for KimiK2ParserConfig {
fn default() -> Self {
Self {
section_start: "<|tool_calls_section_begin|>".to_string(),
section_end: "<|tool_calls_section_end|>".to_string(),
section_start_variants: vec![
"<|tool_calls_section_begin|>".to_string(),
"<|tool_call_section_begin|>".to_string(),
],
section_end_variants: vec![
"<|tool_calls_section_end|>".to_string(),
"<|tool_call_section_end|>".to_string(),
],
call_start: "<|tool_call_begin|>".to_string(),
call_end: "<|tool_call_end|>".to_string(),
argument_begin: "<|tool_call_argument_begin|>".to_string(),
}
}
}
/// Parser-specific configuration
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")]
......@@ -140,6 +191,7 @@ pub enum ParserConfig {
Harmony(JsonParserConfig),
Typescript,
Dsml(DsmlParserConfig),
KimiK2(KimiK2ParserConfig),
Glm47(Glm47ParserConfig),
}
......@@ -155,6 +207,7 @@ impl ParserConfig {
ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()],
ParserConfig::KimiK2(config) => config.section_start_variants.clone(),
}
}
......@@ -169,6 +222,7 @@ impl ParserConfig {
ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()],
ParserConfig::KimiK2(config) => config.section_end_variants.clone(),
}
}
}
......@@ -357,4 +411,15 @@ impl ToolCallConfig {
parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()),
}
}
pub fn kimi_k2() -> Self {
// Kimi K2 format:
// <|tool_calls_section_begin|>
// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
// <|tool_calls_section_end|>
// Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md
Self {
parser_config: ParserConfig::KimiK2(KimiK2ParserConfig::default()),
}
}
}
......@@ -23,7 +23,9 @@ pub struct ToolDefinition {
}
// Re-export main types and functions for convenience
pub use config::{JsonParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig};
pub use config::{
JsonParserConfig, KimiK2ParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig,
};
pub use dsml::try_tool_call_parse_dsml;
pub use harmony::parse_tool_calls_harmony_complete;
pub use json::try_tool_call_parse_json;
......@@ -34,4 +36,5 @@ pub use parsers::{
pub use pythonic::try_tool_call_parse_pythonic;
pub use response::{CalledFunction, ToolCallResponse, ToolCallType};
pub use tools::{try_tool_call_parse_aggregate, try_tool_call_parse_stream};
pub use xml::try_tool_call_parse_kimi_k2;
pub use xml::try_tool_call_parse_xml;
......@@ -19,8 +19,10 @@ use super::pythonic::{
};
use super::response::ToolCallResponse;
use super::xml::{
detect_tool_call_start_glm47, detect_tool_call_start_xml, find_tool_call_end_position_glm47,
find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_xml,
detect_tool_call_start_glm47, detect_tool_call_start_kimi_k2, detect_tool_call_start_xml,
find_tool_call_end_position_glm47, find_tool_call_end_position_kimi_k2,
find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_kimi_k2,
try_tool_call_parse_xml,
};
use std::collections::HashMap;
use std::sync::OnceLock;
......@@ -45,6 +47,7 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
map.insert("jamba", ToolCallConfig::jamba());
map.insert("minimax_m2", ToolCallConfig::minimax_m2());
map.insert("glm47", ToolCallConfig::glm47());
map.insert("kimi_k2", ToolCallConfig::kimi_k2());
map.insert("default", ToolCallConfig::default());
map.insert("nemotron_nano", ToolCallConfig::qwen3_coder()); // nemotron nano follows qwen3_coder format
map
......@@ -91,6 +94,11 @@ pub async fn try_tool_call_parse(
try_tool_call_parse_glm47(message, glm47_config, tools)?;
Ok((results, normal_content))
}
ParserConfig::KimiK2(kimi_config) => {
let (results, normal_content) =
try_tool_call_parse_kimi_k2(message, kimi_config, tools)?;
Ok((results, normal_content))
}
}
}
......@@ -144,6 +152,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::
ParserConfig::Glm47(glm47_config) => {
Ok(detect_tool_call_start_glm47(chunk, glm47_config))
}
ParserConfig::KimiK2(kimi_config) => {
Ok(detect_tool_call_start_kimi_k2(chunk, kimi_config))
}
},
None => anyhow::bail!(
"Parser '{}' is not implemented. Available parsers: {:?}",
......@@ -184,6 +195,9 @@ pub fn find_tool_call_end_position(chunk: &str, parser_str: Option<&str>) -> usi
ParserConfig::Glm47(glm47_config) => {
find_tool_call_end_position_glm47(chunk, glm47_config)
}
ParserConfig::KimiK2(kimi_config) => {
find_tool_call_end_position_kimi_k2(chunk, kimi_config)
}
},
None => {
// Unknown parser, return full content length
......@@ -225,6 +239,7 @@ mod tests {
"nemotron_nano",
"minimax_m2",
"glm47",
"kimi_k2",
];
for parser in available_parsers {
assert!(parsers.contains(&parser));
......
This diff is collapsed.
......@@ -2,12 +2,17 @@
// SPDX-License-Identifier: Apache-2.0
mod glm47_parser;
mod kimi_k2_parser;
mod parser;
pub use super::response;
pub use glm47_parser::{
detect_tool_call_start_glm47, find_tool_call_end_position_glm47, try_tool_call_parse_glm47,
};
pub use kimi_k2_parser::{
detect_tool_call_start_kimi_k2, find_tool_call_end_position_kimi_k2,
try_tool_call_parse_kimi_k2,
};
pub use parser::{
detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml,
};
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment