feat: support `reasoning_content` context management (DeepSeek v3.2 & GLM-4.7 & Kimi-2.5) (#6107)

Signed-off-by: Vladislav Nosivskoy <vladnosiv@gmail.com>

feat: support `reasoning_content` context management (DeepSeek v3.2 & GLM-4.7 & Kimi-2.5) (#6107)
Signed-off-by: Vladislav Nosivskoy <vladnosiv@gmail.com>
2cee89a0 · Vladislav Nosivskoy · GitHub · e18840ce · 2cee89a0 · 2cee89a0
Unverified Commit 2cee89a0 authored Feb 11, 2026 by Vladislav Nosivskoy Committed by GitHub Feb 11, 2026
3 changed files
--- a/lib/async-openai/src/types/chat.rs
+++ b/lib/async-openai/src/types/chat.rs
@@ -476,6 +476,10 @@ pub struct ChatCompletionRequestAssistantMessage {
    /// The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<ChatCompletionRequestAssistantMessageContent>,
+    /// Optional internal reasoning content from a previous assistant turn.
+    /// Used by reasoning-capable models that consume prior chain-of-thought-like context.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning_content: Option<String>,
    /// The refusal message by the assistant.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub refusal: Option<String>,
@@ -1274,4 +1278,40 @@ mod tests {
        let json = serde_json::to_string(&request).unwrap();
        assert!(json.contains("mm_processor_kwargs"));
    }
+    #[test]
+    fn test_assistant_request_reasoning_content_roundtrip() {
+        let json = r#"{
+            "model": "deepseek-v3.2",
+            "messages": [
+                {"role": "user", "content": "test"},
+                {
+                    "role": "assistant",
+                    "reasoning_content": "thinking...",
+                    "tool_calls": [{
+                        "id": "call_1",
+                        "type": "function",
+                        "function": {
+                            "name": "f",
+                            "arguments": "{}"
+                        }
+                    }]
+                }
+            ]
+        }"#;
+        let request: CreateChatCompletionRequest = serde_json::from_str(json).unwrap();
+        let assistant = match &request.messages[1] {
+            ChatCompletionRequestMessage::Assistant(msg) => msg,
+            _ => panic!("expected assistant message"),
+        };
+        assert_eq!(assistant.reasoning_content.as_deref(), Some("thinking..."));
+        let serialized = serde_json::to_value(&request).unwrap();
+        assert_eq!(
+            serialized["messages"][1]["reasoning_content"],
+            serde_json::Value::String("thinking...".to_string())
+        );
+    }
 }
--- a/lib/llm/src/entrypoint/input/text.rs
+++ b/lib/llm/src/entrypoint/input/text.rs
@@ -130,6 +130,7 @@ async fn main_loop(
        // Stream the output to stdout
        let mut stdout = std::io::stdout();
        let mut assistant_message = String::new();
+        let mut assistant_reasoning = String::new();
        while let Some(item) = stream.next().await {
            if cancel_token.is_cancelled() {
                break;
@@ -154,6 +155,9 @@ async fn main_loop(
                            }
                        }
                    }
+                    if let Some(reasoning) = &chat_comp.delta.reasoning_content {
+                        assistant_reasoning += reasoning;
+                    }
                    if let Some(reason) = chat_comp.finish_reason {
                        tracing::trace!("finish reason: {reason:?}");
                        break;
@@ -183,6 +187,7 @@ async fn main_loop(
        let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant(
            dynamo_async_openai::types::ChatCompletionRequestAssistantMessage {
                content: Some(assistant_content),
+                reasoning_content: (!assistant_reasoning.is_empty()).then_some(assistant_reasoning),
                ..Default::default()
            },
        );

--- a/lib/llm/tests/deepseek_v32_encoding.rs
+++ b/lib/llm/tests/deepseek_v32_encoding.rs
@@ -7,6 +7,8 @@
 //! https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding
 use dynamo_llm::preprocessor::prompt::deepseek_v32::{ThinkingMode, encode_messages};
+use dynamo_llm::preprocessor::prompt::{OAIChatLikeRequest, OAIPromptFormatter};
+use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
 use serde_json::Value as JsonValue;
 use std::fs;
 use std::path::PathBuf;
@@ -277,6 +279,48 @@ fn test_with_reasoning_content() {
    assert!(result.contains("Let me compute this step by step"));
 }
+#[test]
+fn test_reasoning_content_survives_chat_request_parsing_and_rendering() {
+    let json = r#"{
+        "model": "deepseek-v3.2",
+        "messages": [
+            {"role": "user", "content": "weather tomorrow?"},
+            {
+                "role": "assistant",
+                "reasoning_content": "need date first",
+                "tool_calls": [{
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {
+                        "name": "get_datetime",
+                        "arguments": "{\"timezone\":\"UTC\"}"
+                    }
+                }]
+            },
+            {
+                "role": "tool",
+                "tool_call_id": "call_1",
+                "content": "{\"current_date\":\"2024-01-15\"}"
+            }
+        ]
+    }"#;
+    let request: NvCreateChatCompletionRequest = serde_json::from_str(json).unwrap();
+    let messages = serde_json::to_value(request.messages()).unwrap();
+    assert_eq!(
+        messages[1]["reasoning_content"],
+        serde_json::Value::String("need date first".to_string())
+    );
+    let formatter =
+        dynamo_llm::preprocessor::prompt::deepseek_v32::DeepSeekV32Formatter::new_thinking();
+    let rendered = formatter.render(&request).unwrap();
+    assert!(rendered.contains("need date first"));
+    assert!(rendered.contains("<think>"));
+    assert!(rendered.contains("</think>"));
+}
 #[test]
 fn test_tool_call_formatting() {
    let messages = serde_json::json!([