Unverified Commit 2cee89a0 authored by Vladislav Nosivskoy's avatar Vladislav Nosivskoy Committed by GitHub
Browse files

feat: support `reasoning_content` context management (DeepSeek v3.2 & GLM-4.7 & Kimi-2.5) (#6107)


Signed-off-by: default avatarVladislav Nosivskoy <vladnosiv@gmail.com>
parent e18840ce
...@@ -476,6 +476,10 @@ pub struct ChatCompletionRequestAssistantMessage { ...@@ -476,6 +476,10 @@ pub struct ChatCompletionRequestAssistantMessage {
/// The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified. /// The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<ChatCompletionRequestAssistantMessageContent>, pub content: Option<ChatCompletionRequestAssistantMessageContent>,
/// Optional internal reasoning content from a previous assistant turn.
/// Used by reasoning-capable models that consume prior chain-of-thought-like context.
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_content: Option<String>,
/// The refusal message by the assistant. /// The refusal message by the assistant.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub refusal: Option<String>, pub refusal: Option<String>,
...@@ -1274,4 +1278,40 @@ mod tests { ...@@ -1274,4 +1278,40 @@ mod tests {
let json = serde_json::to_string(&request).unwrap(); let json = serde_json::to_string(&request).unwrap();
assert!(json.contains("mm_processor_kwargs")); assert!(json.contains("mm_processor_kwargs"));
} }
#[test]
fn test_assistant_request_reasoning_content_roundtrip() {
let json = r#"{
"model": "deepseek-v3.2",
"messages": [
{"role": "user", "content": "test"},
{
"role": "assistant",
"reasoning_content": "thinking...",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "f",
"arguments": "{}"
}
}]
}
]
}"#;
let request: CreateChatCompletionRequest = serde_json::from_str(json).unwrap();
let assistant = match &request.messages[1] {
ChatCompletionRequestMessage::Assistant(msg) => msg,
_ => panic!("expected assistant message"),
};
assert_eq!(assistant.reasoning_content.as_deref(), Some("thinking..."));
let serialized = serde_json::to_value(&request).unwrap();
assert_eq!(
serialized["messages"][1]["reasoning_content"],
serde_json::Value::String("thinking...".to_string())
);
}
} }
...@@ -130,6 +130,7 @@ async fn main_loop( ...@@ -130,6 +130,7 @@ async fn main_loop(
// Stream the output to stdout // Stream the output to stdout
let mut stdout = std::io::stdout(); let mut stdout = std::io::stdout();
let mut assistant_message = String::new(); let mut assistant_message = String::new();
let mut assistant_reasoning = String::new();
while let Some(item) = stream.next().await { while let Some(item) = stream.next().await {
if cancel_token.is_cancelled() { if cancel_token.is_cancelled() {
break; break;
...@@ -154,6 +155,9 @@ async fn main_loop( ...@@ -154,6 +155,9 @@ async fn main_loop(
} }
} }
} }
if let Some(reasoning) = &chat_comp.delta.reasoning_content {
assistant_reasoning += reasoning;
}
if let Some(reason) = chat_comp.finish_reason { if let Some(reason) = chat_comp.finish_reason {
tracing::trace!("finish reason: {reason:?}"); tracing::trace!("finish reason: {reason:?}");
break; break;
...@@ -183,6 +187,7 @@ async fn main_loop( ...@@ -183,6 +187,7 @@ async fn main_loop(
let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant( let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant(
dynamo_async_openai::types::ChatCompletionRequestAssistantMessage { dynamo_async_openai::types::ChatCompletionRequestAssistantMessage {
content: Some(assistant_content), content: Some(assistant_content),
reasoning_content: (!assistant_reasoning.is_empty()).then_some(assistant_reasoning),
..Default::default() ..Default::default()
}, },
); );
......
...@@ -7,6 +7,8 @@ ...@@ -7,6 +7,8 @@
//! https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding //! https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding
use dynamo_llm::preprocessor::prompt::deepseek_v32::{ThinkingMode, encode_messages}; use dynamo_llm::preprocessor::prompt::deepseek_v32::{ThinkingMode, encode_messages};
use dynamo_llm::preprocessor::prompt::{OAIChatLikeRequest, OAIPromptFormatter};
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use serde_json::Value as JsonValue; use serde_json::Value as JsonValue;
use std::fs; use std::fs;
use std::path::PathBuf; use std::path::PathBuf;
...@@ -277,6 +279,48 @@ fn test_with_reasoning_content() { ...@@ -277,6 +279,48 @@ fn test_with_reasoning_content() {
assert!(result.contains("Let me compute this step by step")); assert!(result.contains("Let me compute this step by step"));
} }
#[test]
fn test_reasoning_content_survives_chat_request_parsing_and_rendering() {
let json = r#"{
"model": "deepseek-v3.2",
"messages": [
{"role": "user", "content": "weather tomorrow?"},
{
"role": "assistant",
"reasoning_content": "need date first",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "get_datetime",
"arguments": "{\"timezone\":\"UTC\"}"
}
}]
},
{
"role": "tool",
"tool_call_id": "call_1",
"content": "{\"current_date\":\"2024-01-15\"}"
}
]
}"#;
let request: NvCreateChatCompletionRequest = serde_json::from_str(json).unwrap();
let messages = serde_json::to_value(request.messages()).unwrap();
assert_eq!(
messages[1]["reasoning_content"],
serde_json::Value::String("need date first".to_string())
);
let formatter =
dynamo_llm::preprocessor::prompt::deepseek_v32::DeepSeekV32Formatter::new_thinking();
let rendered = formatter.render(&request).unwrap();
assert!(rendered.contains("need date first"));
assert!(rendered.contains("<think>"));
assert!(rendered.contains("</think>"));
}
#[test] #[test]
fn test_tool_call_formatting() { fn test_tool_call_formatting() {
let messages = serde_json::json!([ let messages = serde_json::json!([
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment