// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 use super::json::JsonParserType; #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct JsonParserConfig { /// Start token for individual tool calls (e.g., "") pub tool_call_start_tokens: Vec, /// End token for individual tool calls (e.g., "") pub tool_call_end_tokens: Vec, /// Separator tokens between function name and arguments /// (e.g., "<|tool▁sep|>" for DeepSeek v3.1) /// Used by some models to separate function name from arguments pub tool_call_separator_tokens: Vec, /// The key for the function name in the tool call /// i.e. `{"name": "function", "arguments": {...}}` it would be /// "name" pub function_name_keys: Vec, /// The key for the arguments in the tool call /// i.e. `{"name": "function", "arguments": {...}}` it would be /// "arguments" pub arguments_keys: Vec, /// The type of JSON parser to use #[serde(default)] pub parser_type: JsonParserType, /// Parse input as bare JSON (a `{...}` object or `[...]` array) with no /// wrapping markers. Intended for guided-decoding paths where the backend /// emits a raw JSON shape. When true, `tool_call_start_tokens` / /// `tool_call_end_tokens` are ignored. #[serde(default)] pub bare_json_mode: bool, } impl Default for JsonParserConfig { fn default() -> Self { Self { tool_call_start_tokens: vec!["".to_string(), "<|python_tag|>".to_string()], tool_call_end_tokens: vec!["".to_string(), "".to_string()], tool_call_separator_tokens: vec![], function_name_keys: vec!["name".to_string()], arguments_keys: vec!["arguments".to_string(), "parameters".to_string()], parser_type: JsonParserType::Basic, bare_json_mode: false, } } } #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct XmlParserConfig { /// Start token for individual tool calls (e.g., "") pub tool_call_start_token: String, /// End token for individual tool calls (e.g., "") pub tool_call_end_token: String, /// Start token for function name (e.g., "") pub function_end_token: String, /// Start token for parameter (e.g., "") pub parameter_end_token: String, } impl Default for XmlParserConfig { fn default() -> Self { Self { tool_call_start_token: "".to_string(), tool_call_end_token: "".to_string(), function_start_token: "".to_string(), parameter_start_token: "".to_string(), } } } /// Configuration for DSML-style tool call parser (DeepSeek V3.2+) #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct DsmlParserConfig { /// Start token for function_calls block (e.g., "<|DSML|function_calls>") pub function_calls_start: String, /// End token for function_calls block (e.g., "") pub function_calls_end: String, /// Start prefix for invoke (e.g., "<|DSML|invoke name=") pub invoke_start_prefix: String, /// End token for invoke (e.g., "") pub invoke_end: String, /// Start prefix for parameter (e.g., "<|DSML|parameter name=") pub parameter_prefix: String, /// End token for parameter (e.g., "") pub parameter_end: String, } impl Default for DsmlParserConfig { fn default() -> Self { Self { function_calls_start: "<|DSML|function_calls>".to_string(), function_calls_end: "".to_string(), invoke_start_prefix: "<|DSML|invoke name=".to_string(), invoke_end: "".to_string(), parameter_prefix: "<|DSML|parameter name=".to_string(), parameter_end: "".to_string(), } } } /// Configuration for GLM-4.7 style tool call parser /// Format: function_nameparamvalue #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct Glm47ParserConfig { /// Start token for tool call block (e.g., "") pub tool_call_start: String, /// End token for tool call block (e.g., "") pub tool_call_end: String, /// Start token for argument key (e.g., "") pub arg_key_start: String, /// End token for argument key (e.g., "") pub arg_key_end: String, /// Start token for argument value (e.g., "") pub arg_value_start: String, /// End token for argument value (e.g., "") pub arg_value_end: String, } impl Default for Glm47ParserConfig { fn default() -> Self { Self { tool_call_start: "".to_string(), tool_call_end: "".to_string(), arg_key_start: "".to_string(), arg_key_end: "".to_string(), arg_value_start: "".to_string(), arg_value_end: "".to_string(), } } } /// Configuration for Kimi K2 tool call parser /// /// Format: /// ```text /// <|tool_calls_section_begin|> /// <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|> /// <|tool_calls_section_end|> /// ``` /// /// The model may emit either plural or singular forms of section tokens /// (e.g., `<|tool_calls_section_begin|>` or `<|tool_call_section_begin|>`). /// Both forms are supported via the `section_start_variants` and `section_end_variants` fields. /// See vllm `kimi_k2_tool_parser.py` for reference. #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct KimiK2ParserConfig { /// Primary start token for the tool calls section pub section_start: String, /// Primary end token for the tool calls section pub section_end: String, /// All recognized start tokens for the tool calls section (includes singular variants) pub section_start_variants: Vec, /// All recognized end tokens for the tool calls section (includes singular variants) pub section_end_variants: Vec, /// Start token for an individual tool call (e.g., "<|tool_call_begin|>") pub call_start: String, /// End token for an individual tool call (e.g., "<|tool_call_end|>") pub call_end: String, /// Token separating function ID from JSON arguments (e.g., "<|tool_call_argument_begin|>") pub argument_begin: String, } impl Default for KimiK2ParserConfig { fn default() -> Self { Self { section_start: "<|tool_calls_section_begin|>".to_string(), section_end: "<|tool_calls_section_end|>".to_string(), section_start_variants: vec![ "<|tool_calls_section_begin|>".to_string(), "<|tool_call_section_begin|>".to_string(), ], section_end_variants: vec![ "<|tool_calls_section_end|>".to_string(), "<|tool_call_section_end|>".to_string(), ], call_start: "<|tool_call_begin|>".to_string(), call_end: "<|tool_call_end|>".to_string(), argument_begin: "<|tool_call_argument_begin|>".to_string(), } } } /// Parser-specific configuration #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum ParserConfig { Json(JsonParserConfig), Xml(XmlParserConfig), Pythonic, Harmony(JsonParserConfig), Typescript, Dsml(DsmlParserConfig), KimiK2(KimiK2ParserConfig), Glm47(Glm47ParserConfig), } impl ParserConfig { /// Get the tool call start tokens for this parser configuration /// Returns a vector of start tokens that indicate the beginning of a tool call pub fn tool_call_start_tokens(&self) -> Vec { match self { ParserConfig::Json(config) => config.tool_call_start_tokens.clone(), ParserConfig::Harmony(config) => config.tool_call_start_tokens.clone(), ParserConfig::Xml(config) => vec![config.tool_call_start_token.clone()], ParserConfig::Pythonic => vec![], ParserConfig::Typescript => vec![], ParserConfig::Dsml(config) => vec![config.function_calls_start.clone()], ParserConfig::Glm47(config) => vec![config.tool_call_start.clone()], ParserConfig::KimiK2(config) => config.section_start_variants.clone(), } } /// Get the tool call end tokens for this parser configuration /// Returns a vector of end tokens that indicate the end of a tool call pub fn tool_call_end_tokens(&self) -> Vec { match self { ParserConfig::Json(config) => config.tool_call_end_tokens.clone(), ParserConfig::Harmony(config) => config.tool_call_end_tokens.clone(), ParserConfig::Xml(config) => vec![config.tool_call_end_token.clone()], ParserConfig::Pythonic => vec![], ParserConfig::Typescript => vec![], ParserConfig::Dsml(config) => vec![config.function_calls_end.clone()], ParserConfig::Glm47(config) => vec![config.tool_call_end.clone()], ParserConfig::KimiK2(config) => config.section_end_variants.clone(), } } } /// Configuration for parsing tool calls with different formats #[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] pub struct ToolCallConfig { /// Parser-specific configuration. pub parser_config: ParserConfig, } impl Default for ToolCallConfig { fn default() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig::default()), } } } impl ToolCallConfig { /// Default configuration for hermes tool calls /// {"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}\n pub fn hermes() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["".to_string()], tool_call_end_tokens: vec!["".to_string()], ..Default::default() }), } } /// Default configuration for nemotron tool calls /// [{"name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"}}] pub fn nemotron_deci() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["".to_string()], tool_call_end_tokens: vec!["".to_string()], ..Default::default() }), } } pub fn llama3_json() -> Self { // <|python_tag|>{ "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"} } // or { "name": "get_weather", "arguments": {"location": "San Francisco, CA", "unit": "fahrenheit"} } Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["<|python_tag|>".to_string()], tool_call_end_tokens: vec!["".to_string()], ..Default::default() }), } } pub fn mistral() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["[TOOL_CALLS]".to_string()], tool_call_end_tokens: vec!["[/TOOL_CALLS]".to_string(), "".to_string()], ..Default::default() }), } } pub fn phi4() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["functools".to_string()], tool_call_end_tokens: vec!["".to_string()], ..Default::default() }), } } pub fn pythonic() -> Self { Self { parser_config: ParserConfig::Pythonic, } } pub fn harmony() -> Self { Self { parser_config: ParserConfig::Harmony(JsonParserConfig { tool_call_start_tokens: vec!["<|start|>assistant<|channel|>commentary".to_string()], tool_call_end_tokens: vec!["<|call|>".to_string()], ..Default::default() }), } } pub fn deepseek_v3_1() -> Self { // The whole tool calls block is wrapped between // <|tool▁calls▁begin|> ... <|tool▁calls▁end|> // regardless of number of tool calls. For external use of this // config, we want them to only be operating on the whole block, // so the tool parser can properly consume all tool call tokens. // https://huggingface.co/deepseek-ai/DeepSeek-V3.1#toolcall Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec![ "<|tool▁calls▁begin|>".to_string(), // "<|tool▁call▁begin|>".to_string(), ], tool_call_end_tokens: vec![ "<|tool▁calls▁end|>".to_string(), // "<|tool▁call▁end|>".to_string(), ], tool_call_separator_tokens: vec!["<|tool▁sep|>".to_string()], parser_type: JsonParserType::DeepseekV31, ..Default::default() }), } } pub fn deepseek_v3() -> Self { // DeepSeek V3 format: // <|tool▁calls▁begin|><|tool▁call▁begin|>{type}<|tool▁sep|>{function_name}\n```json\n{arguments}\n```<|tool▁call▁end|><|tool▁calls▁end|> // There are some differences between DeepSeek V3 and DeepSeek V3.1 Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["<|tool▁calls▁begin|>".to_string()], tool_call_end_tokens: vec!["<|tool▁calls▁end|>".to_string()], tool_call_separator_tokens: vec!["<|tool▁sep|>".to_string()], parser_type: JsonParserType::DeepseekV3, ..Default::default() }), } } pub fn qwen3_coder() -> Self { // value Self { parser_config: ParserConfig::Xml(XmlParserConfig::default()), } } pub fn jamba() -> Self { Self { parser_config: ParserConfig::Json(JsonParserConfig { tool_call_start_tokens: vec!["".to_string()], tool_call_end_tokens: vec!["".to_string()], ..Default::default() }), } } pub fn deepseek_v3_2() -> Self { // DeepSeek V3.2 format (DSML): // <|DSML|function_calls> // <|DSML|invoke name="function_name"> // <|DSML|parameter name="param_name" string="true|false">value // // Self { parser_config: ParserConfig::Dsml(DsmlParserConfig::default()), } } pub fn deepseek_v4() -> Self { // DeepSeek V4 format (DSML): // <|DSML|tool_calls> // <|DSML|invoke name="function_name"> // <|DSML|parameter name="param_name" string="true|false">value // // Self { parser_config: ParserConfig::Dsml(DsmlParserConfig { function_calls_start: "<|DSML|tool_calls>".to_string(), function_calls_end: "".to_string(), ..Default::default() }), } } pub fn minimax_m2() -> Self { // MiniMax-M2.1 format: // // // value // // // Reference: https://huggingface.co/MiniMaxAI/MiniMax-M2.1/blob/main/docs/tool_calling_guide.md Self { parser_config: ParserConfig::Xml(XmlParserConfig { tool_call_start_token: "".to_string(), tool_call_end_token: "".to_string(), function_start_token: "".to_string(), parameter_start_token: "".to_string(), }), } } pub fn glm47() -> Self { // GLM-4.7 format: // function_nameparam1value1 // Reference: https://huggingface.co/zai-org/GLM-4.7/blob/main/chat_template.jinja Self { parser_config: ParserConfig::Glm47(Glm47ParserConfig::default()), } } pub fn kimi_k2() -> Self { // Kimi K2 format: // <|tool_calls_section_begin|> // <|tool_call_begin|>functions.{name}:{index}<|tool_call_argument_begin|>{json_args}<|tool_call_end|> // <|tool_calls_section_end|> // Reference: https://huggingface.co/moonshotai/Kimi-K2-Instruct/blob/main/docs/tool_call_guidance.md Self { parser_config: ParserConfig::KimiK2(KimiK2ParserConfig::default()), } } }