feat: DeepSeek V3.2 chat template support (#4797)

Signed-off-by: Vladislav Nosivskoy <vladnosiv@gmail.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>

feat: DeepSeek V3.2 chat template support (#4797)
Signed-off-by: Vladislav Nosivskoy <vladnosiv@gmail.com> Co-authored-by: Ryan McCormick <rmccormick@nvidia.com>
1efc7d63 · Vladislav Nosivskoy · GitHub · 64a30d3d · 1efc7d63 · 1efc7d63
Unverified Commit 1efc7d63 authored Dec 12, 2025 by Vladislav Nosivskoy Committed by GitHub Dec 12, 2025
15 changed files
--- a/.github/workflows/copyright-check.ps1
+++ b/.github/workflows/copyright-check.ps1
@@ -84,7 +84,7 @@ $global:copyright_results = @{
 $ignored_files = @('.clang-format', '.gitattributes', '.gitignore', '.gitkeep', '.patch', 'Cargo.lock', 'LICENSE', 'uv.lock', 'rust-toolchain.toml', 'codespell.txt', 'exclusions.txt')
 write-debug "<copyright-check> ignored_files = ['$($ignored_files -join "','")']."
-$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models')
+$ignored_paths = @('.github', '.mypy_cache', '.pytest_cache', 'lib/llm/tests/data/sample-models', 'lib/llm/tests/data/deepseek-v3.2')
 write-debug "<copyright-check> ignored_paths = ['$($ignored_paths -join "','")']."
 $ignored_types = @('.bat', '.gif', '.ico', '.ipynb', '.jpg', '.jpeg', '.patch', '.png', '.pyc', '.pyi', '.rst', '.zip', '.md', '.json')
 write-debug "<copyright-check> ignored_types = ['$($ignored_types -join "', '")']."

--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -64,6 +64,7 @@ repos:
  - id: mixed-line-ending
  - id: requirements-txt-fixer
  - id: trailing-whitespace
+    exclude: lib/llm/tests/data/deepseek-v3.2/.*\.txt$
 # NOTE: removing from pre commit
 # will move to gitlab ci to run in proper

--- a/LICENSE
+++ b/LICENSE
-                                 Apache License
+   NOTICE: The test data files under ./lib/llm/tests/data/deepseek-v3.2 are
+   derived from the DeepSeek-V3.2 model repository (originally developed by
+   DeepSeek). The original files were obtained from:
+   https://huggingface.co/deepseek-ai/DeepSeek-V3.2 (commit c69397ecfd1fd142e90e3fbad51f4c7e40b9f3d3)
+   These files are licensed under the MIT License. The full text of the MIT
+   License can be found in ./lib/llm/tests/data/deepseek-v3.2/LICENSE.
+   The rest of this codebase is licensed under the Apache License 2.0 as
+   described below.
+                                Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/

--- a/lib/llm/src/preprocessor/prompt.rs
+++ b/lib/llm/src/preprocessor/prompt.rs
@@ -23,6 +23,7 @@ use minijinja::value::Value;
 use std::collections::HashMap;
 use std::sync::Arc;
+pub mod deepseek_v32;
 mod template;
 pub use template::ContextMixins;

--- a/lib/llm/src/preprocessor/prompt/deepseek_v32.rs
+++ b/lib/llm/src/preprocessor/prompt/deepseek_v32.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! DeepSeek V3.2 native prompt formatting
+//!
+//! This module provides native Rust implementation of DeepSeek V3.2's chat template,
+//! based on their official Python code: encoding_dsv32.py
+//!
+//! Reference: https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding
+use anyhow::{Context, Result};
+use serde_json::Value as JsonValue;
+/// Special tokens for DeepSeek V3.2
+pub mod tokens {
+    pub const BOS: &str = "<｜begin▁of▁sentence｜>";
+    pub const EOS: &str = "<｜end▁of▁sentence｜>";
+    pub const THINKING_START: &str = "<think>";
+    pub const THINKING_END: &str = "</think>";
+    pub const DSML_TOKEN: &str = "｜DSML｜";
+    pub const USER_START: &str = "<｜User｜>";
+    pub const ASSISTANT_START: &str = "<｜Assistant｜>";
+}
+/// System message template for tools
+const TOOLS_SYSTEM_TEMPLATE: &str = r#"## Tools
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<{dsml_token}function_calls>" block like the following as part of your reply to the user:
+<{dsml_token}function_calls>
+<{dsml_token}invoke name="$FUNCTION_NAME">
+<{dsml_token}parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</{dsml_token}parameter>
+...
+</{dsml_token}invoke>
+<{dsml_token}invoke name="$FUNCTION_NAME2">
+...
+</{dsml_token}invoke>
+</{dsml_token}function_calls>
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+<{dsml_token}function_calls>
+...
+</{dsml_token}function_calls>
+<function_results>
+...
+</function_results>
+{thinking_start_token}...thinking about results{thinking_end_token}
+Here are the functions available in JSONSchema format:
+<functions>
+{tool_schemas}
+</functions>
+"#;
+const RESPONSE_FORMAT_TEMPLATE: &str =
+    "## Response Format:\n\nYou MUST strictly adhere to the following schema to reply:\n{schema}";
+const TOOL_CALL_TEMPLATE: &str =
+    "<{dsml_token}invoke name=\"{name}\">\n{arguments}\n</{dsml_token}invoke>";
+#[allow(dead_code)]
+const TOOL_CALLS_TEMPLATE: &str =
+    "<{dsml_token}function_calls>\n{tool_calls}\n</{dsml_token}function_calls>";
+const TOOL_OUTPUT_TEMPLATE: &str = "\n<result>{content}</result>";
+/// Thinking mode for the model
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ThinkingMode {
+    Chat,
+    Thinking,
+}
+impl ThinkingMode {
+    pub fn as_str(&self) -> &'static str {
+        match self {
+            ThinkingMode::Chat => "chat",
+            ThinkingMode::Thinking => "thinking",
+        }
+    }
+}
+/// Convert value to JSON string matching Python's json.dumps() format with spaces
+fn to_json(value: &JsonValue) -> String {
+    // Python's json.dumps() adds spaces after colons and commas
+    // {"name": "value", "key": "value2"}
+    // Rust's serde_json::to_string() produces:
+    // {"name":"value","key":"value2"}
+    // We need to match Python's format for test compatibility
+    let compact = serde_json::to_string(value).unwrap_or_else(|_| "{}".to_string());
+    // Add spaces after colons and commas (but not inside strings)
+    let mut result = String::with_capacity(compact.len() + compact.len() / 4);
+    let mut in_string = false;
+    let mut prev_char = '\0';
+    for ch in compact.chars() {
+        if ch == '"' && prev_char != '\\' {
+            in_string = !in_string;
+        }
+        result.push(ch);
+        // Add space after ':' or ',' if not inside a string
+        if !in_string && (ch == ':' || ch == ',') {
+            result.push(' ');
+        }
+        prev_char = ch;
+    }
+    result
+}
+/// Extract tools from OpenAI format
+fn tools_from_openai_format(tools: &[JsonValue]) -> Vec<JsonValue> {
+    tools
+        .iter()
+        .filter_map(|tool| tool.get("function").cloned())
+        .collect()
+}
+/// Render tools section for system prompt
+fn render_tools(tools: &[JsonValue]) -> String {
+    let tools_json: Vec<String> = tools_from_openai_format(tools)
+        .iter()
+        .map(to_json)
+        .collect();
+    TOOLS_SYSTEM_TEMPLATE
+        .replace("{tool_schemas}", &tools_json.join("\n"))
+        .replace("{dsml_token}", tokens::DSML_TOKEN)
+        .replace("{thinking_start_token}", tokens::THINKING_START)
+        .replace("{thinking_end_token}", tokens::THINKING_END)
+}
+/// Find the last user or developer message index
+fn find_last_user_index(messages: &[JsonValue]) -> Option<usize> {
+    messages
+        .iter()
+        .enumerate()
+        .rev()
+        .find(|(_, msg)| {
+            msg.get("role")
+                .and_then(|r| r.as_str())
+                .map(|r| r == "user" || r == "developer")
+                .unwrap_or(false)
+        })
+        .map(|(idx, _)| idx)
+}
+/// Encode arguments to DSML parameter format
+fn encode_arguments_to_dsml(tool_call: &JsonValue) -> Result<String> {
+    let arguments_str = tool_call
+        .get("arguments")
+        .and_then(|a| a.as_str())
+        .context("Missing or invalid 'arguments' field")?;
+    let arguments: JsonValue =
+        serde_json::from_str(arguments_str).context("Failed to parse arguments JSON")?;
+    let arguments_obj = arguments
+        .as_object()
+        .context("Arguments must be an object")?;
+    let mut params = Vec::new();
+    for (key, value) in arguments_obj {
+        let is_string = value.is_string();
+        let value_str = if is_string {
+            value.as_str().unwrap().to_string()
+        } else {
+            to_json(value)
+        };
+        let param = format!(
+            "<{}parameter name=\"{}\" string=\"{}\">{}</{}parameter>",
+            tokens::DSML_TOKEN,
+            key,
+            if is_string { "true" } else { "false" },
+            value_str,
+            tokens::DSML_TOKEN
+        );
+        params.push(param);
+    }
+    Ok(params.join("\n"))
+}
+/// Render a single message
+fn render_message(
+    index: usize,
+    messages: &[JsonValue],
+    thinking_mode: ThinkingMode,
+    last_user_idx: Option<usize>,
+) -> Result<String> {
+    let msg = &messages[index];
+    let role = msg
+        .get("role")
+        .and_then(|r| r.as_str())
+        .context("Missing 'role' field")?;
+    let mut prompt = String::new();
+    match role {
+        "system" => {
+            let content = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
+            prompt.push_str(content);
+            if let Some(tools) = msg.get("tools").and_then(|t| t.as_array()) {
+                prompt.push_str("\n\n");
+                prompt.push_str(&render_tools(tools));
+            }
+            if let Some(response_format) = msg.get("response_format") {
+                prompt.push_str("\n\n");
+                prompt.push_str(
+                    &RESPONSE_FORMAT_TEMPLATE.replace("{schema}", &to_json(response_format)),
+                );
+            }
+        }
+        "user" => {
+            let content = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
+            prompt.push_str(tokens::USER_START);
+            prompt.push_str(content);
+            prompt.push_str(tokens::ASSISTANT_START);
+            if Some(index) == last_user_idx && thinking_mode == ThinkingMode::Thinking {
+                prompt.push_str(tokens::THINKING_START);
+            } else {
+                prompt.push_str(tokens::THINKING_END);
+            }
+        }
+        "developer" => {
+            let content = msg
+                .get("content")
+                .and_then(|c| c.as_str())
+                .context("Developer role requires content")?;
+            let mut content_developer = String::new();
+            if let Some(tools) = msg.get("tools").and_then(|t| t.as_array()) {
+                content_developer.push_str("\n\n");
+                content_developer.push_str(&render_tools(tools));
+            }
+            if let Some(response_format) = msg.get("response_format") {
+                content_developer.push_str("\n\n");
+                content_developer.push_str(
+                    &RESPONSE_FORMAT_TEMPLATE.replace("{schema}", &to_json(response_format)),
+                );
+            }
+            content_developer.push_str(&format!("\n\n# The user's message is: {}", content));
+            prompt.push_str(tokens::USER_START);
+            prompt.push_str(&content_developer);
+            prompt.push_str(tokens::ASSISTANT_START);
+            if Some(index) == last_user_idx && thinking_mode == ThinkingMode::Thinking {
+                prompt.push_str(tokens::THINKING_START);
+            } else {
+                prompt.push_str(tokens::THINKING_END);
+            }
+        }
+        "assistant" => {
+            // Handle reasoning content
+            // NOTE: If this assistant comes after last user message, the opening <think>
+            // was already added in the user message. We only need to add content and closing tag.
+            if thinking_mode == ThinkingMode::Thinking
+                && last_user_idx.is_some_and(|idx| index > idx)
+                && let Some(reasoning) = msg.get("reasoning_content").and_then(|r| r.as_str())
+            {
+                // DON'T add THINKING_START - it was already added in user message
+                prompt.push_str(reasoning);
+                prompt.push_str(tokens::THINKING_END);
+            }
+            // Handle content
+            if let Some(content) = msg.get("content").and_then(|c| c.as_str()) {
+                prompt.push_str(content);
+            }
+            // Handle tool calls
+            if let Some(tool_calls) = msg.get("tool_calls").and_then(|t| t.as_array())
+                && !tool_calls.is_empty()
+            {
+                prompt.push_str("\n\n");
+                prompt.push_str(&format!("<{}function_calls>\n", tokens::DSML_TOKEN));
+                for tool_call in tool_calls {
+                    let name = tool_call
+                        .get("function")
+                        .and_then(|f| f.get("name"))
+                        .and_then(|n| n.as_str())
+                        .context("Missing tool call name")?;
+                    let arguments = encode_arguments_to_dsml(
+                        tool_call.get("function").context("Missing function")?,
+                    )?;
+                    let invoke = TOOL_CALL_TEMPLATE
+                        .replace("{dsml_token}", tokens::DSML_TOKEN)
+                        .replace("{name}", name)
+                        .replace("{arguments}", &arguments);
+                    prompt.push_str(&invoke);
+                    prompt.push('\n');
+                }
+                prompt.push_str(&format!("</{}function_calls>", tokens::DSML_TOKEN));
+            }
+            prompt.push_str(tokens::EOS);
+        }
+        "tool" => {
+            // Find the previous assistant message
+            let mut prev_assistant_idx = None;
+            let mut tool_count = 0;
+            for i in (0..index).rev() {
+                let prev_role = messages[i].get("role").and_then(|r| r.as_str());
+                if prev_role == Some("tool") {
+                    tool_count += 1;
+                } else if prev_role == Some("assistant") {
+                    prev_assistant_idx = Some(i);
+                    break;
+                }
+            }
+            let tool_call_order = tool_count + 1;
+            // Add opening tag for first tool result
+            if tool_call_order == 1 {
+                prompt.push_str("\n\n<function_results>");
+            }
+            // Add result
+            let content = msg.get("content").and_then(|c| c.as_str()).unwrap_or("");
+            prompt.push_str(&TOOL_OUTPUT_TEMPLATE.replace("{content}", content));
+            // Check if this is the last tool result
+            if let Some(prev_idx) = prev_assistant_idx {
+                let tool_calls_count = messages[prev_idx]
+                    .get("tool_calls")
+                    .and_then(|t| t.as_array())
+                    .map(|a| a.len())
+                    .unwrap_or(0);
+                if tool_call_order == tool_calls_count {
+                    prompt.push_str("\n</function_results>");
+                    if last_user_idx.is_some_and(|idx| index >= idx)
+                        && thinking_mode == ThinkingMode::Thinking
+                    {
+                        prompt.push_str("\n\n");
+                        prompt.push_str(tokens::THINKING_START);
+                    } else {
+                        prompt.push_str("\n\n");
+                        prompt.push_str(tokens::THINKING_END);
+                    }
+                }
+            }
+        }
+        _ => anyhow::bail!("Unknown role: {}", role),
+    }
+    Ok(prompt)
+}
+/// Encode messages to prompt string
+///
+/// # Arguments
+/// * `messages` - Array of messages in OpenAI format
+/// * `thinking_mode` - Whether to use thinking mode
+/// * `add_bos_token` - Whether to add BOS token at start
+///
+/// # Returns
+/// Formatted prompt string ready for tokenization
+pub fn encode_messages(
+    messages: &[JsonValue],
+    thinking_mode: ThinkingMode,
+    add_bos_token: bool,
+) -> Result<String> {
+    let mut prompt = String::new();
+    if add_bos_token {
+        prompt.push_str(tokens::BOS);
+    }
+    let last_user_idx = find_last_user_index(messages);
+    for (index, _) in messages.iter().enumerate() {
+        let msg_prompt = render_message(index, messages, thinking_mode, last_user_idx)?;
+        prompt.push_str(&msg_prompt);
+    }
+    Ok(prompt)
+}
+/// DeepSeek V3.2 Prompt Formatter
+///
+/// Implements OAIPromptFormatter for DeepSeek V3.2 models using native Rust implementation
+#[derive(Debug)]
+pub struct DeepSeekV32Formatter {
+    thinking_mode: ThinkingMode,
+}
+impl DeepSeekV32Formatter {
+    pub fn new(thinking_mode: ThinkingMode) -> Self {
+        Self { thinking_mode }
+    }
+    /// Create formatter with thinking mode enabled (default for DSV3.2)
+    pub fn new_thinking() -> Self {
+        Self::new(ThinkingMode::Thinking)
+    }
+    /// Create formatter with chat mode
+    pub fn new_chat() -> Self {
+        Self::new(ThinkingMode::Chat)
+    }
+}
+impl super::OAIPromptFormatter for DeepSeekV32Formatter {
+    fn supports_add_generation_prompt(&self) -> bool {
+        true
+    }
+    fn render(&self, req: &dyn super::OAIChatLikeRequest) -> Result<String> {
+        // Get messages from request
+        let messages_value = req.messages();
+        // Convert minijinja Value to serde_json Value
+        let messages_json =
+            serde_json::to_value(&messages_value).context("Failed to convert messages to JSON")?;
+        let messages_array = messages_json
+            .as_array()
+            .context("Messages is not an array")?;
+        // Encode with native implementation
+        encode_messages(
+            messages_array,
+            self.thinking_mode,
+            true, // always add BOS token
+        )
+    }
+}
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+    #[test]
+    fn test_simple_conversation() {
+        let messages = json!([
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": "Hello!"}
+        ]);
+        let result =
+            encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true).unwrap();
+        assert!(result.starts_with(tokens::BOS));
+        assert!(result.contains("You are a helpful assistant."));
+        assert!(result.contains(tokens::USER_START));
+        assert!(result.contains("Hello!"));
+        assert!(result.contains(tokens::ASSISTANT_START));
+        assert!(result.contains(tokens::THINKING_START));
+    }
+    #[test]
+    fn test_tools_rendering() {
+        let messages = json!([
+            {
+                "role": "system",
+                "content": "You are helpful.",
+                "tools": [{
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get weather",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {"type": "string"}
+                            }
+                        }
+                    }
+                }]
+            },
+            {"role": "user", "content": "What's the weather?"}
+        ]);
+        let result =
+            encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true).unwrap();
+        assert!(result.contains("## Tools"));
+        assert!(result.contains("get_weather"));
+        assert!(result.contains("<functions>"));
+    }
+}
--- a/lib/llm/src/preprocessor/prompt/template.rs
+++ b/lib/llm/src/preprocessor/prompt/template.rs
@@ -18,6 +18,18 @@ use tokcfg::{ChatTemplate, ChatTemplateValue};
 impl PromptFormatter {
    pub fn from_mdc(mdc: &ModelDeploymentCard) -> Result<PromptFormatter> {
+        // Special handling for DeepSeek-V3.2(-Speciale) which doesn't provide Jinja chat_template
+        let name_lower = mdc.display_name.to_lowercase();
+        if name_lower.contains("deepseek")
+            && name_lower.contains("v3.2")
+            && !name_lower.contains("exp")
+        {
+            tracing::info!("Detected DeepSeek V3.2 model (non-Exp), using native Rust formatter");
+            return Ok(Self::OAI(Arc::new(
+                super::deepseek_v32::DeepSeekV32Formatter::new_thinking(),
+            )));
+        }
        match mdc
            .prompt_formatter
            .as_ref()

--- a/lib/llm/tests/data/deepseek-v3.2/LICENSE
+++ b/lib/llm/tests/data/deepseek-v3.2/LICENSE
+MIT License
+Copyright (c) 2025 DeepSeek
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
--- a/lib/llm/tests/data/deepseek-v3.2/README.md
+++ b/lib/llm/tests/data/deepseek-v3.2/README.md
+# DeepSeek-V3.2 Test Data
+## Source
+The test files in this directory are taken from the official DeepSeek-V3.2 model repository on Hugging Face:
+https://huggingface.co/deepseek-ai/DeepSeek-V3.2 (commit c69397ecfd1fd142e90e3fbad51f4c7e40b9f3d3)
+These files are used for testing the DeepSeek-V3.2 model's tool calling and encoding capabilities.
+## License
+All files in this directory are licensed under the MIT License. See the [LICENSE](./LICENSE) file in this directory for the full license text.
+Copyright (c) 2025 DeepSeek
--- a/lib/llm/tests/data/deepseek-v3.2/test_input.json
+++ b/lib/llm/tests/data/deepseek-v3.2/test_input.json
+{
+    "tools": [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_datetime",
+                "description": "Get the current date and time",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "timezone": {
+                            "type": "string",
+                            "description": "The timezone, e.g. Asia/Shanghai, UTC"
+                        }
+                    },
+                    "required": ["timezone"]
+                }
+            }
+        },
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the weather for a specific date and location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name, e.g. Beijing, Hangzhou"
+                        },
+                        "date": {
+                            "type": "string",
+                            "description": "The date in YYYY-MM-DD format"
+                        }
+                    },
+                    "required": ["location", "date"]
+                }
+            }
+        }
+    ],
+    "messages": [
+        {
+            "role": "system",
+            "content": "You are a helpful Assistant."
+        },
+        {
+            "role": "user",
+            "content": "明天杭州和北京的天气怎么样？"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "用户询问明天的天气，我需要先获取当前日期来计算明天的日期📅",
+            "tool_calls": [
+                {
+                    "id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"timezone\": \"Asia/Shanghai\"}",
+                        "name": "get_datetime"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_xK9mN3pL2qR8vT5wY6hZ1aB4",
+            "role": "tool",
+            "content": "{\"current_date\": \"2024-01-15\", \"current_time\": \"14:30:00\", \"timezone\": \"Asia/Shanghai\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "现在知道今天是2024-01-15，明天就是2024-01-16。接下来查询杭州和北京明天的天气🌤️",
+            "tool_calls": [
+                {
+                    "id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\"}",
+                        "name": "get_weather"
+                    }
+                },
+                {
+                    "id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\"}",
+                        "name": "get_weather"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_bN7kR9mX3pQ2wL5vY8jZ4cD6",
+            "role": "tool",
+            "content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-16\", \"temperature_high\": \"12\", \"temperature_low\": \"5\", \"weather\": \"多云\", \"humidity\": \"65%\"}"
+        },
+        {
+            "tool_call_id": "call_dP9mL7kX5rT4yN3wZ2hV8eF1",
+            "role": "tool",
+            "content": "{\"location\": \"Beijing\", \"date\": \"2024-01-16\", \"temperature_high\": \"-2\", \"temperature_low\": \"-8\", \"weather\": \"晴\", \"humidity\": \"30%\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "已获取两个城市明天的天气信息，现在整理给用户✨",
+            "content": "根据查询结果，明天（2024年1月16日）的天气情况如下：\n\n**杭州**：\n- 天气：多云\n- 最高温度：12°C\n- 最低温度：5°C\n- 湿度：65%\n\n**北京**：\n- 天气：晴\n- 最高温度：-2°C\n- 最低温度：-8°C\n- 湿度：30%\n\n杭州明天会比较温暖但有些多云，而北京会很冷但是晴天。建议在北京的朋友要注意保暖！"
+        },
+        {
+            "role": "user",
+            "content": "谢谢！那后天呢？"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "用户现在问后天的天气，后天是2024-01-17，我可以直接查询（因为已知今天日期）🗓️",
+            "tool_calls": [
+                {
+                    "id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\"}",
+                        "name": "get_weather"
+                    }
+                },
+                {
+                    "id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
+                    "type": "function",
+                    "function": {
+                        "arguments": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\"}",
+                        "name": "get_weather"
+                    }
+                }
+            ]
+        },
+        {
+            "tool_call_id": "call_fR3nK8mV7pL4xT2yW9jB5gH3",
+            "role": "tool",
+            "content": "{\"location\": \"Hangzhou\", \"date\": \"2024-01-17\", \"temperature_high\": \"15\", \"temperature_low\": \"8\", \"weather\": \"小雨\", \"humidity\": \"80%\"}"
+        },
+        {
+            "tool_call_id": "call_hT5pN2kY9rV6zL3wX1mD7jK8",
+            "role": "tool",
+            "content": "{\"location\": \"Beijing\", \"date\": \"2024-01-17\", \"temperature_high\": \"0\", \"temperature_low\": \"-6\", \"weather\": \"多云\", \"humidity\": \"45%\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "获取到后天的天气数据，整理回复给用户📝",
+            "content": "后天（2024年1月17日）的天气情况：\n\n**杭州**：\n- 天气：小雨\n- 最高温度：15°C\n- 最低温度：8°C\n- 湿度：80%\n\n**北京**：\n- 天气：多云\n- 最高温度：0°C\n- 最低温度：-6°C\n- 湿度：45%\n\n杭州后天会有小雨，温度略有回升，记得带伞。北京会稍微暖和一点，但依然很冷，请继续做好保暖措施。"
+        }
+    ]
+}
--- a/lib/llm/tests/data/deepseek-v3.2/test_input_search_w_date.json
+++ b/lib/llm/tests/data/deepseek-v3.2/test_input_search_w_date.json
--- a/lib/llm/tests/data/deepseek-v3.2/test_input_search_wo_date.json
+++ b/lib/llm/tests/data/deepseek-v3.2/test_input_search_wo_date.json
--- a/lib/llm/tests/data/deepseek-v3.2/test_output.txt
+++ b/lib/llm/tests/data/deepseek-v3.2/test_output.txt
+<｜begin▁of▁sentence｜>You are a helpful Assistant.
+## Tools
+You have access to a set of tools you can use to answer the user's question.
+You can invoke functions by writing a "<｜DSML｜function_calls>" block like the following as part of your reply to the user:
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="$FUNCTION_NAME">
+<｜DSML｜parameter name="$PARAMETER_NAME" string="true|false">$PARAMETER_VALUE</｜DSML｜parameter>
+...
+</｜DSML｜invoke>
+<｜DSML｜invoke name="$FUNCTION_NAME2">
+...
+</｜DSML｜invoke>
+</｜DSML｜function_calls>
+String and scalar parameters should be specified as is without any escaping or quotes, while lists and objects should use JSON format. The "string" attribute should be set to "true" for string type parameters and "false" for other types (numbers, booleans, arrays, objects).
+If the thinking_mode is enabled, then after function results you should strongly consider outputting a thinking block. Here is an example:
+<｜DSML｜function_calls>
+...
+</｜DSML｜function_calls>
+<function_results>
+...
+</function_results>
+<think>...thinking about results</think>
+Here are the functions available in JSONSchema format:
+<functions>
+{"name": "get_datetime", "description": "Get the current date and time", "parameters": {"type": "object", "properties": {"timezone": {"type": "string", "description": "The timezone, e.g. Asia/Shanghai, UTC"}}, "required": ["timezone"]}}
+{"name": "get_weather", "description": "Get the weather for a specific date and location", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The city name, e.g. Beijing, Hangzhou"}, "date": {"type": "string", "description": "The date in YYYY-MM-DD format"}}, "required": ["location", "date"]}}
+</functions>
+<｜User｜>明天杭州和北京的天气怎么样？<｜Assistant｜></think>
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_datetime">
+<｜DSML｜parameter name="timezone" string="true">Asia/Shanghai</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+<function_results>
+<result>{"current_date": "2024-01-15", "current_time": "14:30:00", "timezone": "Asia/Shanghai"}</result>
+</function_results>
+</think>
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Hangzhou</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-16</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+<function_results>
+<result>{"location": "Hangzhou", "date": "2024-01-16", "temperature_high": "12", "temperature_low": "5", "weather": "多云", "humidity": "65%"}</result>
+<result>{"location": "Beijing", "date": "2024-01-16", "temperature_high": "-2", "temperature_low": "-8", "weather": "晴", "humidity": "30%"}</result>
+</function_results>
+</think>根据查询结果，明天（2024年1月16日）的天气情况如下：
+**杭州**：
+- 天气：多云
+- 最高温度：12°C
+- 最低温度：5°C
+- 湿度：65%
+**北京**：
+- 天气：晴
+- 最高温度：-2°C
+- 最低温度：-8°C
+- 湿度：30%
+杭州明天会比较温暖但有些多云，而北京会很冷但是晴天。建议在北京的朋友要注意保暖！<｜end▁of▁sentence｜><｜User｜>谢谢！那后天呢？<｜Assistant｜><think>用户现在问后天的天气，后天是2024-01-17，我可以直接查询（因为已知今天日期）🗓️</think>
+<｜DSML｜function_calls>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Hangzhou</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-17</｜DSML｜parameter>
+</｜DSML｜invoke>
+<｜DSML｜invoke name="get_weather">
+<｜DSML｜parameter name="location" string="true">Beijing</｜DSML｜parameter>
+<｜DSML｜parameter name="date" string="true">2024-01-17</｜DSML｜parameter>
+</｜DSML｜invoke>
+</｜DSML｜function_calls><｜end▁of▁sentence｜>
+<function_results>
+<result>{"location": "Hangzhou", "date": "2024-01-17", "temperature_high": "15", "temperature_low": "8", "weather": "小雨", "humidity": "80%"}</result>
+<result>{"location": "Beijing", "date": "2024-01-17", "temperature_high": "0", "temperature_low": "-6", "weather": "多云", "humidity": "45%"}</result>
+</function_results>
+<think>获取到后天的天气数据，整理回复给用户📝</think>后天（2024年1月17日）的天气情况：
+**杭州**：
+- 天气：小雨
+- 最高温度：15°C
+- 最低温度：8°C
+- 湿度：80%
+**北京**：
+- 天气：多云
+- 最高温度：0°C
+- 最低温度：-6°C
+- 湿度：45%
+杭州后天会有小雨，温度略有回升，记得带伞。北京会稍微暖和一点，但依然很冷，请继续做好保暖措施。<｜end▁of▁sentence｜>
--- a/lib/llm/tests/data/deepseek-v3.2/test_output_search_w_date.txt
+++ b/lib/llm/tests/data/deepseek-v3.2/test_output_search_w_date.txt
--- a/lib/llm/tests/data/deepseek-v3.2/test_output_search_wo_date.txt
+++ b/lib/llm/tests/data/deepseek-v3.2/test_output_search_wo_date.txt
--- a/lib/llm/tests/deepseek_v32_encoding.rs
+++ b/lib/llm/tests/deepseek_v32_encoding.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Tests for DeepSeek V3.2 encoding against official test data
+//!
+//! These tests use the official test files from:
+//! https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding
+use dynamo_llm::preprocessor::prompt::deepseek_v32::{ThinkingMode, encode_messages};
+use serde_json::Value as JsonValue;
+use std::fs;
+use std::path::PathBuf;
+fn get_test_data_path() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("tests/data/deepseek-v3.2")
+}
+fn run_official_test(input_file: &str, output_file: &str) {
+    let test_dir = get_test_data_path();
+    // Load test input
+    let input_path = test_dir.join(input_file);
+    let input_data: JsonValue = serde_json::from_str(
+        &fs::read_to_string(&input_path)
+            .unwrap_or_else(|_| panic!("Failed to read {}", input_file)),
+    )
+    .unwrap_or_else(|_| panic!("Failed to parse {}", input_file));
+    // Load expected output
+    let output_path = test_dir.join(output_file);
+    let expected_output = fs::read_to_string(&output_path)
+        .unwrap_or_else(|_| panic!("Failed to read {}", output_file));
+    // Extract messages and tools
+    let mut messages = input_data["messages"]
+        .as_array()
+        .expect("Missing messages")
+        .clone();
+    // Add tools to first message (system) if present
+    if let Some(tools) = input_data.get("tools")
+        && let Some(first_msg) = messages.get_mut(0)
+    {
+        first_msg
+            .as_object_mut()
+            .unwrap()
+            .insert("tools".to_string(), tools.clone());
+    }
+    // Encode messages
+    let result = encode_messages(
+        &messages,
+        ThinkingMode::Thinking,
+        true, // add_bos_token
+    )
+    .expect("Failed to encode messages");
+    // Compare outputs
+    let expected = expected_output.trim();
+    let actual = result.trim();
+    if expected != actual {
+        println!("=== Test: {} ===", input_file);
+        // Show first difference
+        let exp_lines: Vec<&str> = expected.lines().collect();
+        let act_lines: Vec<&str> = actual.lines().collect();
+        for (i, (exp_line, act_line)) in exp_lines.iter().zip(act_lines.iter()).enumerate() {
+            if exp_line != act_line {
+                println!("Line {} differs:", i + 1);
+                println!("  Expected: {}", exp_line);
+                println!("  Actual:   {}", act_line);
+                break;
+            }
+        }
+        if exp_lines.len() != act_lines.len() {
+            println!("\nLine count mismatch:");
+            println!("  Expected: {} lines", exp_lines.len());
+            println!("  Actual:   {} lines", act_lines.len());
+        }
+        panic!("Output does not match expected for {}", input_file);
+    }
+}
+#[test]
+fn test_official_basic_example() {
+    run_official_test("test_input.json", "test_output.txt");
+}
+#[test]
+fn test_official_search_without_date() {
+    run_official_test(
+        "test_input_search_wo_date.json",
+        "test_output_search_wo_date.txt",
+    );
+}
+#[test]
+fn test_official_search_with_date() {
+    run_official_test(
+        "test_input_search_w_date.json",
+        "test_output_search_w_date.txt",
+    );
+}
+#[test]
+fn test_simple_conversation_no_tools() {
+    // Simple test without tools
+    let messages = serde_json::json!([
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "Hello!"},
+        {"role": "assistant", "content": "Hi! How can I help you today?"},
+        {"role": "user", "content": "What is 2+2?"}
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Check basic structure
+    assert!(result.starts_with("<｜begin▁of▁sentence｜>"));
+    assert!(result.contains("<｜User｜>Hello!<｜Assistant｜>"));
+    assert!(result.contains("Hi! How can I help you today?"));
+    assert!(result.contains("<｜end▁of▁sentence｜>"));
+}
+#[test]
+fn test_comprehensive_conversation_with_tools() {
+    // Comprehensive test covering all features with English text
+    let messages = serde_json::json!([
+        {
+            "role": "system",
+            "content": "You are a helpful weather assistant.",
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_datetime",
+                        "description": "Get the current date and time",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "timezone": {
+                                    "type": "string",
+                                    "description": "The timezone, e.g. America/New_York, UTC"
+                                }
+                            },
+                            "required": ["timezone"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "description": "Get the weather for a specific date and location",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city name, e.g. New York, San Francisco"
+                                },
+                                "date": {
+                                    "type": "string",
+                                    "description": "The date in YYYY-MM-DD format"
+                                }
+                            },
+                            "required": ["location", "date"]
+                        }
+                    }
+                }
+            ]
+        },
+        {"role": "user", "content": "What's the weather tomorrow in San Francisco and New York?"},
+        {
+            "role": "assistant",
+            "reasoning_content": "User is asking about tomorrow's weather. I need to first get the current date to calculate tomorrow's date.",
+            "tool_calls": [{
+                "id": "call_1",
+                "type": "function",
+                "function": {
+                    "name": "get_datetime",
+                    "arguments": "{\"timezone\": \"America/New_York\"}"
+                }
+            }]
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_1",
+            "content": "{\"current_date\": \"2024-01-15\", \"current_time\": \"14:30:00\", \"timezone\": \"America/New_York\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "Now I know today is 2024-01-15, so tomorrow is 2024-01-16. Let me query the weather for both cities.",
+            "tool_calls": [
+                {
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"San Francisco\", \"date\": \"2024-01-16\"}"
+                    }
+                },
+                {
+                    "id": "call_3",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"New York\", \"date\": \"2024-01-16\"}"
+                    }
+                }
+            ]
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_2",
+            "content": "{\"location\": \"San Francisco\", \"date\": \"2024-01-16\", \"temperature_high\": \"65\", \"temperature_low\": \"55\", \"weather\": \"sunny\", \"humidity\": \"70%\"}"
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_3",
+            "content": "{\"location\": \"New York\", \"date\": \"2024-01-16\", \"temperature_high\": \"30\", \"temperature_low\": \"20\", \"weather\": \"cloudy\", \"humidity\": \"45%\"}"
+        },
+        {
+            "role": "assistant",
+            "reasoning_content": "Got the weather data for both cities. Let me format a nice response for the user.",
+            "content": "Here's the weather forecast for tomorrow (January 16, 2024):\n\n**San Francisco**:\n- Weather: Sunny\n- High: 65°F\n- Low: 55°F\n- Humidity: 70%\n\n**New York**:\n- Weather: Cloudy\n- High: 30°F\n- Low: 20°F\n- Humidity: 45%\n\nSan Francisco will be warm and sunny, while New York will be cold and cloudy. Dress warmly if you're in New York!"
+        }
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Check all major components are present
+    assert!(result.starts_with("<｜begin▁of▁sentence｜>"));
+    assert!(result.contains("## Tools"));
+    assert!(result.contains("get_datetime"));
+    assert!(result.contains("get_weather"));
+    assert!(result.contains("<｜User｜>What's the weather tomorrow"));
+    assert!(result.contains("<｜Assistant｜><think>"));
+    assert!(result.contains("User is asking about tomorrow's weather"));
+    assert!(result.contains("</think>"));
+    assert!(result.contains("<｜DSML｜function_calls>"));
+    assert!(result.contains("<｜DSML｜invoke name=\"get_datetime\">"));
+    assert!(result.contains(
+        "<｜DSML｜parameter name=\"timezone\" string=\"true\">America/New_York</｜DSML｜parameter>"
+    ));
+    assert!(result.contains("</｜DSML｜function_calls>"));
+    assert!(result.contains("<function_results>"));
+    assert!(result.contains("<result>"));
+    assert!(result.contains("</function_results>"));
+    assert!(result.contains("San Francisco"));
+    assert!(result.contains("New York"));
+    assert!(result.contains("<｜end▁of▁sentence｜>"));
+}
+#[test]
+fn test_with_reasoning_content() {
+    let messages = serde_json::json!([
+        {"role": "user", "content": "Calculate 15 * 23"},
+        {
+            "role": "assistant",
+            "content": "The answer is 345.",
+            "reasoning_content": "Let me compute this step by step: 15 * 23 = 15 * 20 + 15 * 3 = 300 + 45 = 345"
+        }
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Should contain thinking tags with reasoning
+    assert!(result.contains("<think>"));
+    assert!(result.contains("</think>"));
+    assert!(result.contains("Let me compute this step by step"));
+}
+#[test]
+fn test_tool_call_formatting() {
+    let messages = serde_json::json!([
+        {"role": "user", "content": "What's the weather in Beijing?"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_123",
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": "{\"location\": \"Beijing\", \"unit\": \"celsius\"}"
+                }
+            }]
+        }
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Check DSML format
+    assert!(result.contains("<｜DSML｜function_calls>"));
+    assert!(result.contains("<｜DSML｜invoke name=\"get_weather\">"));
+    assert!(result.contains(
+        "<｜DSML｜parameter name=\"location\" string=\"true\">Beijing</｜DSML｜parameter>"
+    ));
+    assert!(
+        result.contains(
+            "<｜DSML｜parameter name=\"unit\" string=\"true\">celsius</｜DSML｜parameter>"
+        )
+    );
+    assert!(result.contains("</｜DSML｜invoke>"));
+    assert!(result.contains("</｜DSML｜function_calls>"));
+}
+#[test]
+fn test_tool_results() {
+    let messages = serde_json::json!([
+        {"role": "user", "content": "Check weather"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [{
+                "id": "call_123",
+                "type": "function",
+                "function": {
+                    "name": "get_weather",
+                    "arguments": "{\"location\": \"Beijing\"}"
+                }
+            }]
+        },
+        {
+            "role": "tool",
+            "tool_call_id": "call_123",
+            "content": "{\"temperature\": \"20C\", \"condition\": \"sunny\"}"
+        }
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Check function_results wrapper
+    assert!(result.contains("<function_results>"));
+    assert!(result.contains("<result>"));
+    assert!(result.contains("{\"temperature\": \"20C\", \"condition\": \"sunny\"}"));
+    assert!(result.contains("</result>"));
+    assert!(result.contains("</function_results>"));
+}
+#[test]
+fn test_multiple_tool_calls() {
+    let messages = serde_json::json!([
+        {"role": "user", "content": "Weather in Beijing and Shanghai"},
+        {
+            "role": "assistant",
+            "content": "",
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"Beijing\"}"
+                    }
+                },
+                {
+                    "id": "call_2",
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "arguments": "{\"location\": \"Shanghai\"}"
+                    }
+                }
+            ]
+        }
+    ]);
+    let result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+        .expect("Failed to encode");
+    // Should contain both tool calls
+    assert!(result.contains("Beijing"));
+    assert!(result.contains("Shanghai"));
+    // Should be in same function_calls block
+    assert_eq!(result.matches("<｜DSML｜function_calls>").count(), 1);
+    assert_eq!(result.matches("</｜DSML｜function_calls>").count(), 1);
+    // But two invocations
+    assert_eq!(result.matches("<｜DSML｜invoke").count(), 2);
+}
+#[test]
+fn test_chat_mode_vs_thinking_mode() {
+    let messages = serde_json::json!([
+        {"role": "user", "content": "Hello"}
+    ]);
+    let chat_result = encode_messages(messages.as_array().unwrap(), ThinkingMode::Chat, true)
+        .expect("Failed to encode");
+    let thinking_result =
+        encode_messages(messages.as_array().unwrap(), ThinkingMode::Thinking, true)
+            .expect("Failed to encode");
+    // Chat mode should have </think>, thinking mode should have <think>
+    assert!(chat_result.contains("</think>"));
+    assert!(!chat_result.contains("<think>"));
+    assert!(thinking_result.contains("<think>"));
+}