Unverified Commit 2cee89a0 authored by Vladislav Nosivskoy's avatar Vladislav Nosivskoy Committed by GitHub
Browse files

feat: support `reasoning_content` context management (DeepSeek v3.2 & GLM-4.7 & Kimi-2.5) (#6107)


Signed-off-by: default avatarVladislav Nosivskoy <vladnosiv@gmail.com>
parent e18840ce
......@@ -476,6 +476,10 @@ pub struct ChatCompletionRequestAssistantMessage {
/// The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.
#[serde(skip_serializing_if = "Option::is_none")]
pub content: Option<ChatCompletionRequestAssistantMessageContent>,
/// Optional internal reasoning content from a previous assistant turn.
/// Used by reasoning-capable models that consume prior chain-of-thought-like context.
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning_content: Option<String>,
/// The refusal message by the assistant.
#[serde(skip_serializing_if = "Option::is_none")]
pub refusal: Option<String>,
......@@ -1274,4 +1278,40 @@ mod tests {
let json = serde_json::to_string(&request).unwrap();
assert!(json.contains("mm_processor_kwargs"));
}
#[test]
fn test_assistant_request_reasoning_content_roundtrip() {
let json = r#"{
"model": "deepseek-v3.2",
"messages": [
{"role": "user", "content": "test"},
{
"role": "assistant",
"reasoning_content": "thinking...",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "f",
"arguments": "{}"
}
}]
}
]
}"#;
let request: CreateChatCompletionRequest = serde_json::from_str(json).unwrap();
let assistant = match &request.messages[1] {
ChatCompletionRequestMessage::Assistant(msg) => msg,
_ => panic!("expected assistant message"),
};
assert_eq!(assistant.reasoning_content.as_deref(), Some("thinking..."));
let serialized = serde_json::to_value(&request).unwrap();
assert_eq!(
serialized["messages"][1]["reasoning_content"],
serde_json::Value::String("thinking...".to_string())
);
}
}
......@@ -130,6 +130,7 @@ async fn main_loop(
// Stream the output to stdout
let mut stdout = std::io::stdout();
let mut assistant_message = String::new();
let mut assistant_reasoning = String::new();
while let Some(item) = stream.next().await {
if cancel_token.is_cancelled() {
break;
......@@ -154,6 +155,9 @@ async fn main_loop(
}
}
}
if let Some(reasoning) = &chat_comp.delta.reasoning_content {
assistant_reasoning += reasoning;
}
if let Some(reason) = chat_comp.finish_reason {
tracing::trace!("finish reason: {reason:?}");
break;
......@@ -183,6 +187,7 @@ async fn main_loop(
let assistant_message = dynamo_async_openai::types::ChatCompletionRequestMessage::Assistant(
dynamo_async_openai::types::ChatCompletionRequestAssistantMessage {
content: Some(assistant_content),
reasoning_content: (!assistant_reasoning.is_empty()).then_some(assistant_reasoning),
..Default::default()
},
);
......
......@@ -7,6 +7,8 @@
//! https://huggingface.co/deepseek-ai/DeepSeek-V3.2/tree/main/encoding
use dynamo_llm::preprocessor::prompt::deepseek_v32::{ThinkingMode, encode_messages};
use dynamo_llm::preprocessor::prompt::{OAIChatLikeRequest, OAIPromptFormatter};
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
use serde_json::Value as JsonValue;
use std::fs;
use std::path::PathBuf;
......@@ -277,6 +279,48 @@ fn test_with_reasoning_content() {
assert!(result.contains("Let me compute this step by step"));
}
#[test]
fn test_reasoning_content_survives_chat_request_parsing_and_rendering() {
let json = r#"{
"model": "deepseek-v3.2",
"messages": [
{"role": "user", "content": "weather tomorrow?"},
{
"role": "assistant",
"reasoning_content": "need date first",
"tool_calls": [{
"id": "call_1",
"type": "function",
"function": {
"name": "get_datetime",
"arguments": "{\"timezone\":\"UTC\"}"
}
}]
},
{
"role": "tool",
"tool_call_id": "call_1",
"content": "{\"current_date\":\"2024-01-15\"}"
}
]
}"#;
let request: NvCreateChatCompletionRequest = serde_json::from_str(json).unwrap();
let messages = serde_json::to_value(request.messages()).unwrap();
assert_eq!(
messages[1]["reasoning_content"],
serde_json::Value::String("need date first".to_string())
);
let formatter =
dynamo_llm::preprocessor::prompt::deepseek_v32::DeepSeekV32Formatter::new_thinking();
let rendered = formatter.render(&request).unwrap();
assert!(rendered.contains("need date first"));
assert!(rendered.contains("<think>"));
assert!(rendered.contains("</think>"));
}
#[test]
fn test_tool_call_formatting() {
let messages = serde_json::json!([
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment