Unverified Commit d1bd210f authored by Nikita's avatar Nikita Committed by GitHub
Browse files

feat: Kimi K2/K2.5 tool and reasoning parsers (#6407)


Signed-off-by: default avatarNikita Sukharev <kaonael@gmail.com>
parent ff06b17e
...@@ -45,7 +45,14 @@ Parser to Model Mapping ...@@ -45,7 +45,14 @@ Parser to Model Mapping
| pythonic | meta-llama/Llama-4-* | | pythonic | meta-llama/Llama-4-* |
| jamba | ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, | | jamba | ai21labs/AI21-Jamba-*-1.5, ai21labs/AI21-Jamba-*-1.6, ai21labs/AI21-Jamba-*-1.7, |
| glm47 | zai-org/GLM-4.7 | | glm47 | zai-org/GLM-4.7 |
| kimi_k2 | moonshotai/Kimi-K2-Thinking*, moonshotai/Kimi-K2-Instruct*, moonshotai/Kimi-K2.5* |
\* Currently requires converting `tiktoken.model` to `tokenizers.json`.
> [!TIP]
> For Kimi K2.5 thinking models, pair `--dyn-tool-call-parser kimi_k2` with
> `--dyn-reasoning-parser kimi_k25` so that both `<think>` blocks and tool calls
> are parsed correctly from the same response.
## Examples ## Examples
......
...@@ -147,8 +147,8 @@ galil-seiferas = { version = "0.1" } ...@@ -147,8 +147,8 @@ galil-seiferas = { version = "0.1" }
# preprocessor # preprocessor
bs62 = { version = "0.1" } bs62 = { version = "0.1" }
minijinja = { version = "2.14.0", features = ["loader"] } minijinja = { version = "2.15.1", features = ["loader", "loop_controls"] }
minijinja-contrib = { version = "2.14.0", features = ["pycompat"] } minijinja-contrib = { version = "2.15.1", features = ["pycompat"] }
json-five = { version = "0.3" } json-five = { version = "0.3" }
# media loading in the preprocessor # media loading in the preprocessor
......
...@@ -946,6 +946,25 @@ impl OpenAIPreprocessor { ...@@ -946,6 +946,25 @@ impl OpenAIPreprocessor {
jail.apply_with_finish_reason(stream) jail.apply_with_finish_reason(stream)
} }
/// Check if reasoning parsing should be disabled based on per-request parameters.
/// For kimi_k25: disabled when chat_template_args contains "thinking": false.
fn is_reasoning_disabled_by_request(
reasoning_parser: Option<&str>,
chat_template_args: Option<&std::collections::HashMap<String, serde_json::Value>>,
) -> bool {
match reasoning_parser {
Some("kimi_k25") => {
if let Some(args) = chat_template_args
&& let Some(thinking) = args.get("thinking")
{
return thinking == &serde_json::Value::Bool(false);
}
false
}
_ => false,
}
}
// Motivation: Each transformation on the stream should be a separate step to allow for more flexibility // Motivation: Each transformation on the stream should be a separate step to allow for more flexibility
// Earlier reasoning parser logic was nested under delta generation logic in choice_from_postprocessor // Earlier reasoning parser logic was nested under delta generation logic in choice_from_postprocessor
// Since we have tool calling parsing as separate step, it makes sense to have reasoning parser as separate step as well // Since we have tool calling parsing as separate step, it makes sense to have reasoning parser as separate step as well
...@@ -1094,7 +1113,11 @@ impl ...@@ -1094,7 +1113,11 @@ impl
); );
// Try to parse reasoning content only if parser is configured // Try to parse reasoning content only if parser is configured
let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some(); let should_parse_reasoning = self.runtime_config.reasoning_parser.is_some()
&& !Self::is_reasoning_disabled_by_request(
self.runtime_config.reasoning_parser.as_deref(),
request.chat_template_args.as_ref(),
);
// Reasoning Content Parsing Transformation Step // Reasoning Content Parsing Transformation Step
// Current Solution: // Current Solution:
...@@ -1329,3 +1352,77 @@ impl ...@@ -1329,3 +1352,77 @@ impl
} }
// Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs` // Note: tests for jailing and parser detection live in `lib/llm/tests/test_jail.rs`
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_reasoning_disabled_by_request() {
let thinking_true = {
let mut m = std::collections::HashMap::new();
m.insert("thinking".to_string(), serde_json::Value::Bool(true));
m
};
let thinking_false = {
let mut m = std::collections::HashMap::new();
m.insert("thinking".to_string(), serde_json::Value::Bool(false));
m
};
let empty_args = std::collections::HashMap::new();
// (parser, args, expected_disabled, description)
let cases = [
(
Some("kimi_k25"),
Some(&thinking_false),
true,
"kimi_k25 + thinking=false → disabled",
),
(
Some("kimi_k25"),
Some(&thinking_true),
false,
"kimi_k25 + thinking=true → enabled",
),
(
Some("kimi_k25"),
None,
false,
"kimi_k25 + no args → enabled",
),
(
Some("kimi_k25"),
Some(&empty_args),
false,
"kimi_k25 + empty args → enabled",
),
(
Some("deepseek_r1"),
Some(&thinking_false),
false,
"deepseek_r1 → never disabled",
),
(
Some("basic"),
Some(&thinking_false),
false,
"basic → never disabled",
),
(
None,
Some(&thinking_false),
false,
"no parser → never disabled",
),
];
for (parser, args, expected, desc) in cases {
assert_eq!(
OpenAIPreprocessor::is_reasoning_disabled_by_request(parser, args),
expected,
"FAILED: {desc}",
);
}
}
}
...@@ -105,6 +105,38 @@ mod tests { ...@@ -105,6 +105,38 @@ mod tests {
} }
} }
/// Shorthand for creating a mock chunk with content only
fn chunk(content: &str) -> Annotated<NvCreateChatCompletionStreamResponse> {
create_mock_response_chunk(content.to_string(), None)
}
/// Run chunks through a reasoning parser, return aggregated (reasoning, content)
async fn run_parser(
chunks: Vec<Annotated<NvCreateChatCompletionStreamResponse>>,
parser: &str,
) -> (String, String) {
let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
stream::iter(chunks),
parser.to_string(),
);
let mut output_stream = std::pin::pin!(output_stream);
let mut all_reasoning = String::new();
let mut all_content = String::new();
while let Some(item) = output_stream.next().await {
if let Some(ref data) = item.data {
for choice in &data.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
if let Some(ref c) = choice.delta.content {
all_content.push_str(get_text(c));
}
}
}
}
(all_reasoning, all_content)
}
#[tokio::test] #[tokio::test]
async fn test_reasoning_parser_with_basic_parser() { async fn test_reasoning_parser_with_basic_parser() {
// Basic Parser test <think> </think> tags // Basic Parser test <think> </think> tags
...@@ -414,57 +446,69 @@ mod tests { ...@@ -414,57 +446,69 @@ mod tests {
} }
#[tokio::test] #[tokio::test]
async fn test_reasoning_parser_with_kimi_parser() { async fn test_reasoning_parser_with_kimi_k25() {
// Create a mock runtime config with Kimi reasoning parser // (description, input_chunks, expected_reasoning, expected_content)
let runtime_config = dynamo_llm::local_model::runtime_config::ModelRuntimeConfig { let cases = vec![
reasoning_parser: Some("kimi".to_string()), (
..Default::default() "thinking mode",
}; vec![
chunk("<think>Let me"),
// Create test input stream with Kimi-style reasoning tags chunk(" think about this carefully."),
let input_chunks = vec![ chunk("</think>Bonjour!"),
create_mock_response_chunk("Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.".to_string(), None), ],
"Let me think about this carefully.",
"Bonjour!",
),
(
"instant mode (empty think)",
vec![
chunk("<think>"),
chunk("</think>"),
chunk("Direct answer without thinking."),
],
"",
"Direct answer without thinking.",
),
(
"token-by-token",
vec![
chunk("<think>"),
chunk("The user"),
chunk(" asked me"),
chunk(" to say hello."),
chunk("</think>"),
chunk("Hello"),
chunk("!"),
],
"The user asked me to say hello.",
"Hello!",
),
]; ];
let input_stream = stream::iter(input_chunks);
// Apply the reasoning parser transformation
let output_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
input_stream,
runtime_config.reasoning_parser.unwrap(),
);
// Pin the stream and collect all output chunks for (desc, chunks, expected_reasoning, expected_content) in cases {
let mut output_stream = std::pin::pin!(output_stream); let (reasoning, content) = run_parser(chunks, "kimi_k25").await;
let mut output_chunks = Vec::new(); assert_eq!(reasoning, expected_reasoning, "FAILED reasoning: {desc}");
while let Some(chunk) = output_stream.next().await { assert_eq!(content, expected_content, "FAILED content: {desc}");
output_chunks.push(chunk);
} }
}
// Verify that Kimi-style reasoning is parsed correctly #[tokio::test]
assert_eq!(output_chunks.len(), 1); async fn test_reasoning_parser_with_kimi_parser() {
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0]; let (reasoning, content) = run_parser(
vec![chunk(
assert!( "Let me analyze this. ◁think▷This is Kimi reasoning content◁/think▷ Here's my conclusion.",
output_choice.delta.reasoning_content.is_some(), )],
"Should extract Kimi reasoning content" "kimi",
); )
assert!( .await;
output_choice.delta.content.is_some(),
"Should have normal content"
);
let reasoning_content = output_choice.delta.reasoning_content.as_ref().unwrap();
let normal_content = output_choice.delta.content.as_ref().unwrap();
// Verify the content was parsed with Kimi tags
assert!( assert!(
reasoning_content.contains("Kimi reasoning"), reasoning.contains("Kimi reasoning"),
"Should contain Kimi reasoning content" "Should contain Kimi reasoning, got: {reasoning}"
); );
assert!( assert!(
get_text(normal_content).contains("Let me analyze") content.contains("Let me analyze") || content.contains("Here's my conclusion"),
|| get_text(normal_content).contains("Here's my conclusion"), "Should contain normal content, got: {content}"
"Should contain normal content"
); );
} }
...@@ -586,6 +630,103 @@ mod tests { ...@@ -586,6 +630,103 @@ mod tests {
); );
} }
#[tokio::test]
async fn test_kimi_k25_with_reasoning_and_tool_calls() {
// Simulates a real Kimi K2.5 response: <think> block followed by tool calls.
// Verifies that reasoning and tool_calling parsers don't interfere with each other.
let input_chunks = vec![
chunk("<think>I should check the weather"),
chunk(" before answering.</think>"),
chunk("<|tool_calls_section_begin|>"),
chunk("<|tool_call_begin|>functions.get_weather:0"),
chunk("<|tool_call_argument_begin|>"),
chunk(r#"{"location":"NYC"}"#),
chunk("<|tool_call_end|>"),
chunk("<|tool_calls_section_end|>"),
];
let input_stream = stream::iter(input_chunks);
// Step 1: reasoning parser (kimi_k25) extracts <think> into reasoning_content
let reasoning_parsed_stream = OpenAIPreprocessor::parse_reasoning_content_from_stream(
input_stream,
"kimi_k25".to_string(),
);
// Step 2: tool calling jail (kimi_k2) extracts tool calls from remaining content
let tool_parsed_stream = OpenAIPreprocessor::apply_tool_calling_jail(
Some("kimi_k2".to_string()),
None,
None,
reasoning_parsed_stream,
);
let mut tool_parsed_stream = std::pin::pin!(tool_parsed_stream);
let mut output_chunks = Vec::new();
while let Some(chunk) = tool_parsed_stream.next().await {
output_chunks.push(chunk);
}
assert!(!output_chunks.is_empty(), "Should have output chunks");
let mut all_reasoning = String::new();
let mut all_normal_content = String::new();
let mut found_tool_calls = false;
let mut tool_call_function_name: Option<String> = None;
let mut tool_call_arguments: Option<serde_json::Value> = None;
for chunk in output_chunks.iter() {
if let Some(ref data) = chunk.data {
for choice in &data.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
if let Some(ref c) = choice.delta.content {
all_normal_content.push_str(get_text(c));
}
if let Some(ref tool_calls) = choice.delta.tool_calls
&& !tool_calls.is_empty()
{
found_tool_calls = true;
for tc in tool_calls {
if let Some(ref f) = tc.function {
if let Some(ref name) = f.name {
tool_call_function_name = Some(name.clone());
}
if let Some(ref args) = f.arguments {
tool_call_arguments = Some(serde_json::from_str(args).unwrap());
}
}
}
}
}
}
}
assert_eq!(
all_reasoning, "I should check the weather before answering.",
"Reasoning mismatch"
);
assert!(
found_tool_calls,
"Should have found tool calls in the output"
);
assert_eq!(
tool_call_function_name.as_deref(),
Some("get_weather"),
"Tool call function name should be 'get_weather'"
);
assert_eq!(
tool_call_arguments.as_ref(),
Some(&serde_json::json!({"location": "NYC"})),
"Tool call arguments mismatch"
);
// No normal content expected — everything is either reasoning or tool calls
assert!(
all_normal_content.trim().is_empty(),
"Expected no normal content, got: {all_normal_content:?}"
);
}
#[tokio::test] #[tokio::test]
#[ignore] #[ignore]
// (TODO: Ayush) Fix this test // (TODO: Ayush) Fix this test
......
...@@ -108,6 +108,10 @@ impl ReasoningParser for BasicReasoningParser { ...@@ -108,6 +108,10 @@ impl ReasoningParser for BasicReasoningParser {
while cursor < text.len() { while cursor < text.len() {
if currently_reasoning { if currently_reasoning {
// Skip leading start token if present (handles force_reasoning + explicit <think>)
if text[cursor..].starts_with(&self.think_start_token) {
cursor += self.think_start_token.len();
}
// We're inside a reasoning block — look for end token // We're inside a reasoning block — look for end token
if let Some(end_offset) = text[cursor..].find(&self.think_end_token) { if let Some(end_offset) = text[cursor..].find(&self.think_end_token) {
reasoning_parts.push(&text[cursor..cursor + end_offset]); reasoning_parts.push(&text[cursor..cursor + end_offset]);
...@@ -175,6 +179,17 @@ impl ReasoningParser for BasicReasoningParser { ...@@ -175,6 +179,17 @@ impl ReasoningParser for BasicReasoningParser {
continue; continue;
} }
// Buffer is a prefix of the start token (e.g., "<thi" for "<think>") — wait
// for more data before deciding whether to strip it or emit as reasoning.
// Only applies when force_reasoning=true and we haven't stripped the tag yet.
if !self.stripped_think_start
&& self._in_reasoning
&& !current_text.is_empty()
&& self.think_start_token.starts_with(current_text.as_str())
{
break;
}
if self._in_reasoning { if self._in_reasoning {
if let Some(end_idx) = current_text.find(self.think_end_token.as_str()) { if let Some(end_idx) = current_text.find(self.think_end_token.as_str()) {
// End of reasoning block: accumulate content and transition out. // End of reasoning block: accumulate content and transition out.
......
...@@ -26,6 +26,7 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT ...@@ -26,6 +26,7 @@ fn get_reasoning_parser_map() -> &'static HashMap<&'static str, ReasoningParserT
map.insert("qwen3", ReasoningParserType::Qwen); map.insert("qwen3", ReasoningParserType::Qwen);
map.insert("nemotron_deci", ReasoningParserType::NemotronDeci); map.insert("nemotron_deci", ReasoningParserType::NemotronDeci);
map.insert("kimi", ReasoningParserType::Kimi); map.insert("kimi", ReasoningParserType::Kimi);
map.insert("kimi_k25", ReasoningParserType::KimiK25);
map.insert("step3", ReasoningParserType::Step3); map.insert("step3", ReasoningParserType::Step3);
map.insert("mistral", ReasoningParserType::Mistral); map.insert("mistral", ReasoningParserType::Mistral);
map.insert("granite", ReasoningParserType::Granite); map.insert("granite", ReasoningParserType::Granite);
...@@ -97,6 +98,7 @@ pub enum ReasoningParserType { ...@@ -97,6 +98,7 @@ pub enum ReasoningParserType {
Qwen, Qwen,
NemotronDeci, NemotronDeci,
Kimi, Kimi,
KimiK25,
Mistral, Mistral,
Granite, Granite,
MiniMaxAppendThink, MiniMaxAppendThink,
...@@ -152,6 +154,14 @@ impl ReasoningParserType { ...@@ -152,6 +154,14 @@ impl ReasoningParserType {
true, true,
)), )),
}, },
ReasoningParserType::KimiK25 => ReasoningParserWrapper {
parser: Box::new(BasicReasoningParser::new(
"<think>".into(),
"</think>".into(),
true,
true,
)),
},
ReasoningParserType::Mistral => ReasoningParserWrapper { ReasoningParserType::Mistral => ReasoningParserWrapper {
parser: Box::new(BasicReasoningParser::new( parser: Box::new(BasicReasoningParser::new(
"[THINK]".into(), "[THINK]".into(),
...@@ -222,6 +232,7 @@ mod tests { ...@@ -222,6 +232,7 @@ mod tests {
"qwen3", "qwen3",
"nemotron_deci", "nemotron_deci",
"kimi", "kimi",
"kimi_k25",
"step3", "step3",
"mistral", "mistral",
"granite", "granite",
...@@ -233,4 +244,135 @@ mod tests { ...@@ -233,4 +244,135 @@ mod tests {
assert!(parsers.contains(&parser)); assert!(parsers.contains(&parser));
} }
} }
#[test]
fn test_kimi_k25_detect_and_parse() {
// (description, input, expected_reasoning, expected_normal)
let cases = [
(
"force reasoning: no think tags",
"no think tags here",
"no think tags here",
"",
),
(
"standard think tags",
"<think>Let me reason about this.</think>Hello!",
"Let me reason about this.",
"Hello!",
),
(
"empty think block (instant mode)",
"<think></think>Hello from instant mode!",
"",
"Hello from instant mode!",
),
(
"empty think block with newline",
"<think>\n</think>Hello from instant mode!",
"",
"Hello from instant mode!",
),
];
for (desc, input, expected_reasoning, expected_normal) in cases {
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
let result = parser.detect_and_parse_reasoning(input, &[]);
assert_eq!(
result.reasoning_text, expected_reasoning,
"FAILED reasoning: {desc}"
);
assert_eq!(result.normal_text, expected_normal, "FAILED normal: {desc}");
}
}
#[test]
fn test_kimi_k25_streaming_force_reasoning() {
// Streaming: force_reasoning means tokens before <think> are treated as reasoning
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
// First chunk: partial think tag — buffered because it's a prefix of "<think>"
let r1 = parser.parse_reasoning_streaming_incremental("<thi", &[]);
assert_eq!(r1.reasoning_text, "");
assert_eq!(r1.normal_text, "");
// Second chunk: completes the think tag + reasoning content
let r2 = parser.parse_reasoning_streaming_incremental("nk>reasoning here", &[]);
assert_eq!(r2.reasoning_text, "reasoning here");
assert_eq!(r2.normal_text, "");
// Third chunk: close tag + normal content
let r3 = parser.parse_reasoning_streaming_incremental("</think>Hello!", &[]);
assert_eq!(r3.reasoning_text, "");
assert_eq!(r3.normal_text, "Hello!");
}
#[test]
fn test_kimi_k25_streaming() {
// (description, tokens, expected_reasoning, expected_content)
let cases: Vec<(&str, &[&str], &str, &str)> = vec![
(
"complete response",
&[
"<think>",
"I need to",
" think about",
" this carefully.",
"</think>",
"Bonjour",
"!",
],
"I need to think about this carefully.",
"Bonjour!",
),
(
"empty think (instant mode)",
&["<think>", "</think>", "Direct answer."],
"",
"Direct answer.",
),
];
for (desc, tokens, expected_reasoning, expected_content) in cases {
let mut parser = ReasoningParserType::KimiK25.get_reasoning_parser();
let mut all_reasoning = String::new();
let mut all_content = String::new();
for token in tokens {
let r = parser.parse_reasoning_streaming_incremental(token, &[]);
all_reasoning.push_str(&r.reasoning_text);
all_content.push_str(&r.normal_text);
}
assert_eq!(
all_reasoning, expected_reasoning,
"FAILED reasoning: {desc}"
);
assert_eq!(all_content, expected_content, "FAILED content: {desc}");
}
}
#[test]
fn test_kimi_k25_parser_lookup_by_name() {
// Verify the parser can be looked up by name
let mut parser = ReasoningParserType::get_reasoning_parser_from_name("kimi_k25");
let result = parser.detect_and_parse_reasoning("<think>thinking</think>answer", &[]);
assert_eq!(result.reasoning_text, "thinking");
assert_eq!(result.normal_text, "answer");
}
#[test]
fn test_kimi_vs_kimi_k25_different_tags() {
// Kimi (original) uses ◁think▷/◁/think▷, KimiK25 uses <think>/</think>
let mut kimi = ReasoningParserType::Kimi.get_reasoning_parser();
let mut kimi_k25 = ReasoningParserType::KimiK25.get_reasoning_parser();
// Kimi original does NOT parse <think> tags
let r_kimi = kimi.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
assert_eq!(r_kimi.normal_text, "<think>reasoning</think>answer");
assert_eq!(r_kimi.reasoning_text, "");
// KimiK25 does parse <think> tags
let r_k25 = kimi_k25.detect_and_parse_reasoning("<think>reasoning</think>answer", &[]);
assert_eq!(r_k25.reasoning_text, "reasoning");
assert_eq!(r_k25.normal_text, "answer");
}
} }
...@@ -130,6 +130,57 @@ impl Default for Glm47ParserConfig { ...@@ -130,6 +130,57 @@ impl Default for Glm47ParserConfig {
} }
} }
/// Configuration for Kimi K2 tool call parser
///
/// Format:
/// ```text
/// <|tool_calls_section_begin|>
/// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
/// <|tool_calls_section_end|>
/// ```
///
/// The model may emit either plural or singular forms of section tokens
/// (e.g., `<|tool_calls_section_begin|>` or `<|tool_call_section_begin|>`).
/// Both forms are supported via the `section_start_variants` and `section_end_variants` fields.
/// See vllm `kimi_k2_tool_parser.py` for reference.
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
pub struct KimiK2ParserConfig {
/// Primary start token for the tool calls section
pub section_start: String,
/// Primary end token for the tool calls section
pub section_end: String,
/// All recognized start tokens for the tool calls section (includes singular variants)
pub section_start_variants: Vec<String>,
/// All recognized end tokens for the tool calls section (includes singular variants)
pub section_end_variants: Vec<String>,
/// Start token for an individual tool call (e.g., "<|tool_call_begin|>")
pub call_start: String,
/// End token for an individual tool call (e.g., "<|tool_call_end|>")
pub call_end: String,
/// Token separating function ID from JSON arguments (e.g., "<|tool_call_argument_begin|>")
pub argument_begin: String,
}
impl Default for KimiK2ParserConfig {
fn default() -> Self {
Self {
section_start: "<|tool_calls_section_begin|>".to_string(),
section_end: "<|tool_calls_section_end|>".to_string(),
section_start_variants: vec![
"<|tool_calls_section_begin|>".to_string(),
"<|tool_call_section_begin|>".to_string(),
],
section_end_variants: vec![
"<|tool_calls_section_end|>".to_string(),
"<|tool_call_section_end|>".to_string(),
],
call_start: "<|tool_call_begin|>".to_string(),
call_end: "<|tool_call_end|>".to_string(),
argument_begin: "<|tool_call_argument_begin|>".to_string(),
}
}
}
/// Parser-specific configuration /// Parser-specific configuration
#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
...@@ -140,6 +191,7 @@ pub enum ParserConfig { ...@@ -140,6 +191,7 @@ pub enum ParserConfig {
Harmony(JsonParserConfig), Harmony(JsonParserConfig),
Typescript, Typescript,
Dsml(DsmlParserConfig), Dsml(DsmlParserConfig),
KimiK2(KimiK2ParserConfig),
Glm47(Glm47ParserConfig), Glm47(Glm47ParserConfig),
} }
...@@ -155,6 +207,7 @@ impl ParserConfig { ...@@ -155,6 +207,7 @@ impl ParserConfig {
ParserConfig::Typescript => vec![], ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()], ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()], ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()],
ParserConfig::KimiK2(config) => config.section_start_variants.clone(),
} }
} }
...@@ -169,6 +222,7 @@ impl ParserConfig { ...@@ -169,6 +222,7 @@ impl ParserConfig {
ParserConfig::Typescript => vec![], ParserConfig::Typescript => vec![],
ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()], ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()],
ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()], ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()],
ParserConfig::KimiK2(config) => config.section_end_variants.clone(),
} }
} }
} }
...@@ -357,4 +411,15 @@ impl ToolCallConfig { ...@@ -357,4 +411,15 @@ impl ToolCallConfig {
parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()), parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()),
} }
} }
pub fn kimi_k2() -> Self {
// Kimi K2 format:
// <|tool_calls_section_begin|>
// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|>
// <|tool_calls_section_end|>
// Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md
Self {
parser_config: ParserConfig::KimiK2(KimiK2ParserConfig::default()),
}
}
} }
...@@ -23,7 +23,9 @@ pub struct ToolDefinition { ...@@ -23,7 +23,9 @@ pub struct ToolDefinition {
} }
// Re-export main types and functions for convenience // Re-export main types and functions for convenience
pub use config::{JsonParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig}; pub use config::{
JsonParserConfig, KimiK2ParserConfig, ParserConfig, ToolCallConfig, XmlParserConfig,
};
pub use dsml::try_tool_call_parse_dsml; pub use dsml::try_tool_call_parse_dsml;
pub use harmony::parse_tool_calls_harmony_complete; pub use harmony::parse_tool_calls_harmony_complete;
pub use json::try_tool_call_parse_json; pub use json::try_tool_call_parse_json;
...@@ -34,4 +36,5 @@ pub use parsers::{ ...@@ -34,4 +36,5 @@ pub use parsers::{
pub use pythonic::try_tool_call_parse_pythonic; pub use pythonic::try_tool_call_parse_pythonic;
pub use response::{CalledFunction, ToolCallResponse, ToolCallType}; pub use response::{CalledFunction, ToolCallResponse, ToolCallType};
pub use tools::{try_tool_call_parse_aggregate, try_tool_call_parse_stream}; pub use tools::{try_tool_call_parse_aggregate, try_tool_call_parse_stream};
pub use xml::try_tool_call_parse_kimi_k2;
pub use xml::try_tool_call_parse_xml; pub use xml::try_tool_call_parse_xml;
...@@ -19,8 +19,10 @@ use super::pythonic::{ ...@@ -19,8 +19,10 @@ use super::pythonic::{
}; };
use super::response::ToolCallResponse; use super::response::ToolCallResponse;
use super::xml::{ use super::xml::{
detect_tool_call_start_glm47, detect_tool_call_start_xml, find_tool_call_end_position_glm47, detect_tool_call_start_glm47, detect_tool_call_start_kimi_k2, detect_tool_call_start_xml,
find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_xml, find_tool_call_end_position_glm47, find_tool_call_end_position_kimi_k2,
find_tool_call_end_position_xml, try_tool_call_parse_glm47, try_tool_call_parse_kimi_k2,
try_tool_call_parse_xml,
}; };
use std::collections::HashMap; use std::collections::HashMap;
use std::sync::OnceLock; use std::sync::OnceLock;
...@@ -45,6 +47,7 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> { ...@@ -45,6 +47,7 @@ pub fn get_tool_parser_map() -> &'static HashMap<&'static str, ToolCallConfig> {
map.insert("jamba", ToolCallConfig::jamba()); map.insert("jamba", ToolCallConfig::jamba());
map.insert("minimax_m2", ToolCallConfig::minimax_m2()); map.insert("minimax_m2", ToolCallConfig::minimax_m2());
map.insert("glm47", ToolCallConfig::glm47()); map.insert("glm47", ToolCallConfig::glm47());
map.insert("kimi_k2", ToolCallConfig::kimi_k2());
map.insert("default", ToolCallConfig::default()); map.insert("default", ToolCallConfig::default());
map.insert("nemotron_nano", ToolCallConfig::qwen3_coder()); // nemotron nano follows qwen3_coder format map.insert("nemotron_nano", ToolCallConfig::qwen3_coder()); // nemotron nano follows qwen3_coder format
map map
...@@ -91,6 +94,11 @@ pub async fn try_tool_call_parse( ...@@ -91,6 +94,11 @@ pub async fn try_tool_call_parse(
try_tool_call_parse_glm47(message, glm47_config, tools)?; try_tool_call_parse_glm47(message, glm47_config, tools)?;
Ok((results, normal_content)) Ok((results, normal_content))
} }
ParserConfig::KimiK2(kimi_config) => {
let (results, normal_content) =
try_tool_call_parse_kimi_k2(message, kimi_config, tools)?;
Ok((results, normal_content))
}
} }
} }
...@@ -144,6 +152,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow:: ...@@ -144,6 +152,9 @@ pub fn detect_tool_call_start(chunk: &str, parser_str: Option<&str>) -> anyhow::
ParserConfig::Glm47(glm47_config) => { ParserConfig::Glm47(glm47_config) => {
Ok(detect_tool_call_start_glm47(chunk, glm47_config)) Ok(detect_tool_call_start_glm47(chunk, glm47_config))
} }
ParserConfig::KimiK2(kimi_config) => {
Ok(detect_tool_call_start_kimi_k2(chunk, kimi_config))
}
}, },
None => anyhow::bail!( None => anyhow::bail!(
"Parser '{}' is not implemented. Available parsers: {:?}", "Parser '{}' is not implemented. Available parsers: {:?}",
...@@ -184,6 +195,9 @@ pub fn find_tool_call_end_position(chunk: &str, parser_str: Option<&str>) -> usi ...@@ -184,6 +195,9 @@ pub fn find_tool_call_end_position(chunk: &str, parser_str: Option<&str>) -> usi
ParserConfig::Glm47(glm47_config) => { ParserConfig::Glm47(glm47_config) => {
find_tool_call_end_position_glm47(chunk, glm47_config) find_tool_call_end_position_glm47(chunk, glm47_config)
} }
ParserConfig::KimiK2(kimi_config) => {
find_tool_call_end_position_kimi_k2(chunk, kimi_config)
}
}, },
None => { None => {
// Unknown parser, return full content length // Unknown parser, return full content length
...@@ -225,6 +239,7 @@ mod tests { ...@@ -225,6 +239,7 @@ mod tests {
"nemotron_nano", "nemotron_nano",
"minimax_m2", "minimax_m2",
"glm47", "glm47",
"kimi_k2",
]; ];
for parser in available_parsers { for parser in available_parsers {
assert!(parsers.contains(&parser)); assert!(parsers.contains(&parser));
......
This diff is collapsed.
...@@ -2,12 +2,17 @@ ...@@ -2,12 +2,17 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
mod glm47_parser; mod glm47_parser;
mod kimi_k2_parser;
mod parser; mod parser;
pub use super::response; pub use super::response;
pub use glm47_parser::{ pub use glm47_parser::{
detect_tool_call_start_glm47, find_tool_call_end_position_glm47, try_tool_call_parse_glm47, detect_tool_call_start_glm47, find_tool_call_end_position_glm47, try_tool_call_parse_glm47,
}; };
pub use kimi_k2_parser::{
detect_tool_call_start_kimi_k2, find_tool_call_end_position_kimi_k2,
try_tool_call_parse_kimi_k2,
};
pub use parser::{ pub use parser::{
detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml, detect_tool_call_start_xml, find_tool_call_end_position_xml, try_tool_call_parse_xml,
}; };
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment