Unverified Commit 4ef28940 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(responses): own input chain in dynamo-protocols to accept Codex/Agents-SDK shapes (#8275)

parent c388483a
......@@ -7,11 +7,11 @@ use std::collections::HashMap;
use dynamo_protocols::types::responses::{
AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem,
InputParam, InputRole, InputTokenDetails, Instructions, Item, MessageItem, OutputItem,
OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent, OutputTokenDetails,
Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage, Role as ResponseRole,
ServiceTier, Status, SummaryPart, SummaryTextContent, TextResponseFormatConfiguration, Tool,
ToolChoiceOptions, ToolChoiceParam, Truncation,
InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item,
MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent,
OutputTokenDetails, Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage,
Role as ResponseRole, ServiceTier, Status, SummaryPart, SummaryTextContent,
TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam, Truncation,
};
use dynamo_protocols::types::{
ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
......@@ -23,7 +23,8 @@ use dynamo_protocols::types::{
ChatCompletionRequestUserMessageContent, ChatCompletionRequestUserMessageContentPart,
ChatCompletionTool, ChatCompletionToolChoiceOption, ChatCompletionToolType,
CreateChatCompletionRequest, FunctionName, FunctionObject, FunctionType,
ImageDetail as ChatImageDetail, ImageUrl, ResponseFormat, ServiceTier as ChatServiceTier,
ImageDetail as ChatImageDetail, ImageUrl, ReasoningContent, ResponseFormat,
ServiceTier as ChatServiceTier,
};
use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize};
......@@ -35,9 +36,25 @@ use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatComplet
use super::nvext::{NvExt, NvExtProvider};
use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
/// Request body for `POST /v1/responses`. Uses a plain
/// `#[derive(Deserialize)]` — the relaxed input shapes are handled by
/// Dynamo-owning the input chain in `dynamo_protocols::types::responses`
/// (see that crate's `CLAUDE.md`), not by a custom pre-parse JSON patcher.
/// An earlier iteration of this type carried a hand-written `impl Deserialize`
/// that walked `serde_json::Value` to inject synthetic defaults for missing
/// `id` / `status` / `annotations`; that was replaced by typed ownership for
/// correctness and to avoid the double-deserialize cost.
#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateResponse {
/// Flattened CreateResponse fields (model, input, temperature, etc.)
/// Flattened CreateResponse fields (model, input, temperature, etc.).
///
/// `CreateResponse` and its `input` chain (`InputParam`, `InputItem`,
/// `Item`, `MessageItem`, `InputOutputMessage`, `InputOutputMessageContent`,
/// `InputOutputTextContent`) are Dynamo-owned in `dynamo-protocols`. They
/// mirror upstream async-openai but accept the relaxed shapes real clients
/// emit (optional `id` / `status` / `content` on assistant messages,
/// optional `annotations` on `output_text` parts). See
/// `dynamo_protocols::types::responses` for the full rationale.
#[serde(flatten)]
#[schema(value_type = Object)]
pub inner: dynamo_protocols::types::responses::CreateResponse,
......@@ -227,17 +244,110 @@ fn convert_input_content_to_text(content: &[InputContent]) -> String {
.join("")
}
/// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall,
/// Reasoning, assistant EasyMessage). Chat Completions represents an assistant
/// turn as a single message carrying `content`, `tool_calls`, and
/// `reasoning_content`, so we coalesce adjacent assistant-side Responses input
/// items before emitting.
///
/// `touched` records whether any assistant-side item was seen in the current
/// run. Without it, a standalone assistant message with empty text (or only
/// Refusal content parts that this converter currently strips) would be lost
/// entirely — breaking turn boundaries the model relies on.
#[derive(Default)]
struct PendingAssistant {
content: Option<String>,
reasoning_content: Option<String>,
tool_calls: Vec<ChatCompletionMessageToolCall>,
touched: bool,
}
impl PendingAssistant {
fn push_text(&mut self, text: &str) {
self.touched = true;
if text.is_empty() {
return;
}
match self.content.as_mut() {
Some(existing) => existing.push_str(text),
None => self.content = Some(text.to_string()),
}
}
/// Route prior-turn reasoning summary text into the pending assistant's
/// `reasoning_content`. Codex and the Agents SDK round-trip `Item::Reasoning`
/// mid-turn so the model can see its own chain-of-thought as input context.
fn push_reasoning(&mut self, text: &str) {
self.touched = true;
if text.is_empty() {
return;
}
match self.reasoning_content.as_mut() {
Some(existing) => existing.push_str(text),
None => self.reasoning_content = Some(text.to_string()),
}
}
fn push_tool_call(&mut self, call: ChatCompletionMessageToolCall) {
self.touched = true;
self.tool_calls.push(call);
}
fn flush_into(self, out: &mut Vec<ChatCompletionRequestMessage>) {
if !self.touched {
return;
}
// Content rules:
// - real text pushed → emit Some(Text(text))
// - pure tool-call turn (no text, has tool_calls) → emit None, matching
// Chat Completions spec's nullable-content contract and the converter's
// behavior before the coalescing refactor.
// - turn-boundary preservation (assistant item seen but no text, no
// tool_calls) → emit Some(Text("")) so adjacent user turns aren't
// silently merged.
let content = if self.content.is_some() || self.tool_calls.is_empty() {
Some(
self.content
.map(ChatCompletionRequestAssistantMessageContent::Text)
.unwrap_or_else(|| {
ChatCompletionRequestAssistantMessageContent::Text(String::new())
}),
)
} else {
None
};
out.push(ChatCompletionRequestMessage::Assistant(
ChatCompletionRequestAssistantMessage {
content,
reasoning_content: self.reasoning_content.map(ReasoningContent::Text),
refusal: None,
name: None,
audio: None,
tool_calls: if self.tool_calls.is_empty() {
None
} else {
Some(self.tool_calls)
},
#[allow(deprecated)]
function_call: None,
},
));
}
}
/// Convert InputParam::Items to a Vec of ChatCompletionRequestMessages.
fn convert_input_items_to_messages(
items: &[InputItem],
) -> Result<Vec<ChatCompletionRequestMessage>, anyhow::Error> {
let mut messages = Vec::with_capacity(items.len());
let mut pending = PendingAssistant::default();
for item in items {
match item {
InputItem::Item(inner_item) => match inner_item {
Item::Message(msg_item) => match msg_item {
MessageItem::Input(msg) => {
std::mem::take(&mut pending).flush_into(&mut messages);
let chat_msg = match msg.role {
InputRole::System | InputRole::Developer => {
let text = convert_input_content_to_text(&msg.content);
......@@ -263,56 +373,37 @@ fn convert_input_items_to_messages(
messages.push(chat_msg);
}
MessageItem::Output(out_msg) => {
// Previous assistant output message -> assistant message
// Fold Refusal parts into the assistant's text content
// (same turn-position they appeared in). Upstream
// `ChatCompletionRequestAssistantMessage` has a
// dedicated `refusal` field, but most chat templates
// render only `content`; putting refusal text inline
// preserves it across turns without requiring template
// awareness of a separate refusal field.
let text = out_msg
.content
.iter()
.filter_map(|c| match c {
OutputMessageContent::OutputText(t) => Some(t.text.as_str()),
_ => None,
.map(|c| match c {
InputOutputMessageContent::OutputText(t) => t.text.as_str(),
InputOutputMessageContent::Refusal(r) => r.refusal.as_str(),
})
.collect::<Vec<_>>()
.join("");
messages.push(ChatCompletionRequestMessage::Assistant(
ChatCompletionRequestAssistantMessage {
content: Some(ChatCompletionRequestAssistantMessageContent::Text(
text,
)),
reasoning_content: None,
refusal: None,
name: None,
audio: None,
tool_calls: None,
#[allow(deprecated)]
function_call: None,
},
));
pending.push_text(&text);
}
},
Item::FunctionCall(fc) => {
// A function call from a previous assistant turn -> assistant message with tool_calls
messages.push(ChatCompletionRequestMessage::Assistant(
ChatCompletionRequestAssistantMessage {
content: None,
reasoning_content: None,
refusal: None,
name: None,
audio: None,
tool_calls: Some(vec![ChatCompletionMessageToolCall {
id: fc.call_id.clone(),
r#type: FunctionType::Function,
function: dynamo_protocols::types::FunctionCall {
name: fc.name.clone(),
arguments: fc.arguments.clone(),
},
}]),
#[allow(deprecated)]
function_call: None,
pending.push_tool_call(ChatCompletionMessageToolCall {
id: fc.call_id.clone(),
r#type: FunctionType::Function,
function: dynamo_protocols::types::FunctionCall {
name: fc.name.clone(),
arguments: fc.arguments.clone(),
},
));
});
}
Item::FunctionCallOutput(fco) => {
// The output of a function call -> tool message
std::mem::take(&mut pending).flush_into(&mut messages);
let output_text = match &fco.output {
FunctionCallOutput::Text(text) => text.clone(),
FunctionCallOutput::Content(parts) => convert_input_content_to_text(parts),
......@@ -324,15 +415,30 @@ fn convert_input_items_to_messages(
},
));
}
Item::Reasoning(r) => {
let text = r
.summary
.iter()
.map(|SummaryPart::SummaryText(t)| t.text.as_str())
.collect::<Vec<_>>()
.join("");
pending.push_reasoning(&text);
}
other => {
// Unknown / unsupported variants (ComputerCall, WebSearchCall,
// tool-output items other than FunctionCallOutput, etc.). We do
// not have a faithful Chat Completions mapping, but silently
// consuming them without flushing would let a following
// FunctionCall coalesce with tool_calls from a different
// semantic turn. Flush first, then skip.
tracing::debug!(
"Skipping unsupported input item type during conversion: {:?}",
std::mem::discriminant(other)
);
std::mem::take(&mut pending).flush_into(&mut messages);
}
},
InputItem::EasyMessage(easy) => {
// Handle easy input messages based on role
let content_text = match &easy.content {
dynamo_protocols::types::responses::EasyInputContent::Text(text) => {
text.clone()
......@@ -341,35 +447,33 @@ fn convert_input_items_to_messages(
convert_input_content_to_text(parts)
}
};
let chat_msg = match easy.role {
match easy.role {
ResponseRole::System | ResponseRole::Developer => {
ChatCompletionRequestMessage::System(ChatCompletionRequestSystemMessage {
content: ChatCompletionRequestSystemMessageContent::Text(content_text),
name: None,
})
std::mem::take(&mut pending).flush_into(&mut messages);
messages.push(ChatCompletionRequestMessage::System(
ChatCompletionRequestSystemMessage {
content: ChatCompletionRequestSystemMessageContent::Text(
content_text,
),
name: None,
},
));
}
ResponseRole::User => {
ChatCompletionRequestMessage::User(ChatCompletionRequestUserMessage {
content: ChatCompletionRequestUserMessageContent::Text(content_text),
name: None,
})
std::mem::take(&mut pending).flush_into(&mut messages);
messages.push(ChatCompletionRequestMessage::User(
ChatCompletionRequestUserMessage {
content: ChatCompletionRequestUserMessageContent::Text(
content_text,
),
name: None,
},
));
}
ResponseRole::Assistant => ChatCompletionRequestMessage::Assistant(
ChatCompletionRequestAssistantMessage {
content: Some(ChatCompletionRequestAssistantMessageContent::Text(
content_text,
)),
reasoning_content: None,
refusal: None,
name: None,
audio: None,
tool_calls: None,
#[allow(deprecated)]
function_call: None,
},
),
};
messages.push(chat_msg);
ResponseRole::Assistant => {
pending.push_text(&content_text);
}
}
}
InputItem::ItemReference(_) => {
// Skip item references
......@@ -377,6 +481,8 @@ fn convert_input_items_to_messages(
}
}
pending.flush_into(&mut messages);
Ok(messages)
}
......@@ -865,7 +971,8 @@ pub fn chat_completion_to_response(
mod tests {
use dynamo_protocols::types::responses::{
CreateResponse, FunctionCallOutput, FunctionCallOutputItemParam, FunctionTool,
FunctionToolCall, InputContent, InputImageContent, InputItem, InputMessage, InputParam,
FunctionToolCall, InputContent, InputImageContent, InputItem, InputMessage,
InputOutputMessage, InputOutputMessageContent, InputOutputTextContent, InputParam,
InputRole, InputTextContent, Item, MessageItem, Tool,
};
use dynamo_protocols::types::{
......@@ -1002,16 +1109,18 @@ mod tests {
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::Message(MessageItem::Output(OutputMessage {
id: "msg_1".into(),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: Some("msg_1".into()),
role: AssistantRole::Assistant,
status: OutputStatus::Completed,
status: Some(OutputStatus::Completed),
phase: None,
content: vec![OutputMessageContent::OutputText(OutputTextContent {
text: "4".into(),
annotations: vec![],
logprobs: None,
})],
content: vec![InputOutputMessageContent::OutputText(
InputOutputTextContent {
text: "4".into(),
annotations: vec![],
logprobs: None,
},
)],
}))),
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
......@@ -1126,6 +1235,709 @@ mod tests {
assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_)));
}
#[test]
fn test_function_call_with_interstitial_assistant_message_is_coalesced() {
// Regression: prior turn was `function_call` + assistant text + `function_call_output`.
// The converter must emit a SINGLE assistant chat message carrying both `content`
// and `tool_calls`, otherwise chat templates that require a tool message to
// immediately follow its assistant tool_call (e.g. MiniMax) will reject the input.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "What's the weather?".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: r#"{"location":"SF"}"#.into(),
call_id: "call_123".into(),
namespace: None,
name: "get_weather".into(),
id: None,
status: None,
})),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: Some("msg_interstitial".into()),
role: AssistantRole::Assistant,
status: Some(OutputStatus::Completed),
phase: None,
content: vec![InputOutputMessageContent::OutputText(
InputOutputTextContent {
text: "\n\n".into(),
annotations: vec![],
logprobs: None,
},
)],
}))),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "call_123".into(),
output: FunctionCallOutput::Text(r#"{"temp":"72F"}"#.into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(
messages.len(),
3,
"expected coalesced [user, assistant, tool]"
);
assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_)));
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
let tool_calls = a.tool_calls.as_ref().expect("tool_calls must be present");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].id, "call_123");
assert_eq!(tool_calls[0].function.name, "get_weather");
match a
.content
.as_ref()
.expect("content must carry interstitial text")
{
ChatCompletionRequestAssistantMessageContent::Text(t) => {
assert_eq!(t, "\n\n");
}
_ => panic!("expected text content"),
}
}
_ => panic!("expected a single merged assistant message at index 1"),
}
assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_)));
}
#[test]
fn test_easy_message_assistant_coalesced_with_adjacent_function_call() {
// The same coalescing rule applies to EasyInputMessage shape (string content,
// role=assistant, no `type:"message"` discriminator).
use dynamo_protocols::types::responses::{
EasyInputContent, EasyInputMessage, Role as ResponseRole,
};
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::EasyMessage(EasyInputMessage {
role: ResponseRole::User,
content: EasyInputContent::Text("x".into()),
..Default::default()
}),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::EasyMessage(EasyInputMessage {
role: ResponseRole::Assistant,
content: EasyInputContent::Text("".into()),
..Default::default()
}),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c".into(),
output: FunctionCallOutput::Text("x".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
assert!(a.tool_calls.is_some());
assert_eq!(a.tool_calls.as_ref().unwrap().len(), 1);
}
_ => panic!("expected merged assistant message"),
}
assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_)));
}
#[test]
fn test_standalone_assistant_message_with_empty_content_preserves_turn() {
// A prior assistant turn that produced no text (empty content or
// refusal-only parts the converter strips) must still emit an assistant
// message. Otherwise adjacent user turns get silently merged, which
// breaks strict-alternation chat templates and distorts the context
// the model sees.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "first question".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: None,
role: AssistantRole::Assistant,
status: None,
phase: None,
content: vec![InputOutputMessageContent::OutputText(
InputOutputTextContent {
text: "".into(),
annotations: vec![],
logprobs: None,
},
)],
}))),
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "second question".into(),
})],
role: InputRole::User,
status: None,
}))),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(
messages.len(),
3,
"empty assistant turn must not be silently dropped"
);
assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_)));
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
assert!(a.tool_calls.is_none());
match a.content.as_ref().expect("empty turn still emits content") {
ChatCompletionRequestAssistantMessageContent::Text(t) => {
assert_eq!(t, "");
}
_ => panic!("expected text content"),
}
}
_ => panic!("expected assistant turn boundary preserved"),
}
assert!(matches!(messages[2], ChatCompletionRequestMessage::User(_)));
}
#[test]
fn test_easy_assistant_message_with_empty_content_preserves_turn() {
// Same turn-boundary preservation applies to EasyInputMessage shape.
use dynamo_protocols::types::responses::{
EasyInputContent, EasyInputMessage, Role as ResponseRole,
};
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::EasyMessage(EasyInputMessage {
role: ResponseRole::User,
content: EasyInputContent::Text("first".into()),
..Default::default()
}),
InputItem::EasyMessage(EasyInputMessage {
role: ResponseRole::Assistant,
content: EasyInputContent::Text("".into()),
..Default::default()
}),
InputItem::EasyMessage(EasyInputMessage {
role: ResponseRole::User,
content: EasyInputContent::Text("second".into()),
..Default::default()
}),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
assert!(matches!(
messages[1],
ChatCompletionRequestMessage::Assistant(_)
));
}
#[test]
fn test_pure_function_call_turn_emits_null_content() {
// Chat Completions spec allows `content: null` on assistant messages
// that carry only `tool_calls`. Some Jinja templates gate on
// `{% if message.content is not none %}`; we must not emit
// `content: ""` for pure-tool-call turns. Turn-boundary cases (empty
// OutputMessage with no tool_calls) still emit `Some(Text(""))`.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "hi".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c".into(),
output: FunctionCallOutput::Text("ok".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
assert!(
a.content.is_none(),
"pure tool-call turn must have content: null, got {:?}",
a.content
);
assert!(a.tool_calls.is_some());
}
_ => panic!("expected assistant message"),
}
}
#[test]
fn test_reasoning_item_routed_into_reasoning_content() {
// Regression: Codex / Agents SDK round-trip Item::Reasoning mid-turn.
// The converter must route the reasoning summary into the coalesced
// assistant message's `reasoning_content`, not silently drop it.
use dynamo_protocols::types::responses::{ReasoningItem, SummaryPart, SummaryTextContent};
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "solve".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::Reasoning(ReasoningItem {
id: "rs_1".into(),
summary: vec![SummaryPart::SummaryText(SummaryTextContent {
text: "thinking step 1".into(),
})],
content: None,
encrypted_content: None,
status: None,
})),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c".into(),
output: FunctionCallOutput::Text("ok".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
match a
.reasoning_content
.as_ref()
.expect("reasoning must be preserved")
{
ReasoningContent::Text(t) => assert_eq!(t, "thinking step 1"),
_ => panic!("expected Text reasoning content"),
}
assert!(a.tool_calls.is_some());
}
_ => panic!("expected assistant message with reasoning + tool_calls"),
}
}
#[test]
fn test_unsupported_item_variant_flushes_pending() {
// Sequence: function_call → (an unsupported tool-output variant) →
// function_call → function_call_output. Without a flush on the
// catch-all, the two FunctionCalls would coalesce into a single
// assistant `tool_calls` list despite being different semantic turns.
use dynamo_protocols::types::responses::{
ComputerCallOutputItemParam, ComputerScreenshotImage, ComputerScreenshotImageType,
};
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "go".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c1".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::Item(Item::ComputerCallOutput(ComputerCallOutputItemParam {
call_id: "cc1".into(),
output: ComputerScreenshotImage {
r#type: ComputerScreenshotImageType::ComputerScreenshot,
image_url: None,
file_id: None,
},
acknowledged_safety_checks: None,
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c2".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c2".into(),
output: FunctionCallOutput::Text("ok".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
// Expected: User, Assistant(tc=[c1]), Assistant(tc=[c2]), Tool(c2)
// Without the catch-all flush, we'd get Assistant(tc=[c1,c2]) instead.
assert!(messages.len() >= 4, "catch-all must flush pending");
let tc_msgs: Vec<_> = messages
.iter()
.filter_map(|m| match m {
ChatCompletionRequestMessage::Assistant(a) => a.tool_calls.as_ref(),
_ => None,
})
.collect();
assert_eq!(
tc_msgs.len(),
2,
"two tool-call turns must not coalesce across unsupported variant"
);
assert_eq!(tc_msgs[0].len(), 1);
assert_eq!(tc_msgs[0][0].id, "c1");
assert_eq!(tc_msgs[1].len(), 1);
assert_eq!(tc_msgs[1][0].id, "c2");
}
#[test]
fn test_function_call_then_output_text_then_output_merges_to_one_turn() {
// Canonical MiniMax repro (the Codex/Agents-SDK sequence that first
// broke): user → function_call → assistant text → function_call_output.
// Must yield 3 chat messages: user, assistant(content + tool_calls),
// tool. Any other shape breaks the chat template's tool-call pairing.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "call say".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: r#"{"x":"hi"}"#.into(),
call_id: "c".into(),
namespace: None,
name: "say".into(),
id: None,
status: None,
})),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: None,
role: AssistantRole::Assistant,
status: None,
phase: None,
content: vec![InputOutputMessageContent::OutputText(
InputOutputTextContent {
text: "\n\n\n".into(),
annotations: vec![],
logprobs: None,
},
)],
}))),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c".into(),
output: FunctionCallOutput::Text("hi".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
assert!(matches!(messages[0], ChatCompletionRequestMessage::User(_)));
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
let tool_calls = a.tool_calls.as_ref().expect("tool_calls present");
assert_eq!(tool_calls.len(), 1);
assert_eq!(tool_calls[0].id, "c");
match a.content.as_ref().expect("text content present") {
ChatCompletionRequestAssistantMessageContent::Text(t) => {
assert_eq!(t, "\n\n\n");
}
_ => panic!("expected text content"),
}
}
_ => panic!("expected merged assistant message"),
}
assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_)));
}
#[test]
fn test_output_text_then_function_call_then_output_merges_to_one_turn() {
// Reverse ordering: assistant text before the function_call. The
// coalescer's accumulator is order-agnostic — both orderings must
// produce the same merged assistant message.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "call say".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: None,
role: AssistantRole::Assistant,
status: None,
phase: None,
content: vec![InputOutputMessageContent::OutputText(
InputOutputTextContent {
text: "let me call it".into(),
annotations: vec![],
logprobs: None,
},
)],
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: r#"{"x":"hi"}"#.into(),
call_id: "c".into(),
namespace: None,
name: "say".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c".into(),
output: FunctionCallOutput::Text("hi".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
assert_eq!(a.tool_calls.as_ref().expect("tool_calls present").len(), 1);
match a.content.as_ref().expect("content present") {
ChatCompletionRequestAssistantMessageContent::Text(t) => {
assert_eq!(t, "let me call it");
}
_ => panic!("expected text content"),
}
}
_ => panic!("expected merged assistant message"),
}
}
#[test]
fn test_multiple_function_calls_merge_into_single_assistant_message() {
// Parallel tool calls (`parallel_tool_calls: true`) produce multiple
// adjacent Item::FunctionCall items. They must coalesce into a single
// assistant message carrying all tool_calls.
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "do two things".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c1".into(),
namespace: None,
name: "f".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCall(FunctionToolCall {
arguments: "{}".into(),
call_id: "c2".into(),
namespace: None,
name: "g".into(),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c1".into(),
output: FunctionCallOutput::Text("r1".into()),
id: None,
status: None,
})),
InputItem::Item(Item::FunctionCallOutput(FunctionCallOutputItemParam {
call_id: "c2".into(),
output: FunctionCallOutput::Text("r2".into()),
id: None,
status: None,
})),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
// user, assistant(tc=[c1, c2]), tool(c1), tool(c2)
assert_eq!(messages.len(), 4);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
let tool_calls = a.tool_calls.as_ref().expect("tool_calls present");
assert_eq!(tool_calls.len(), 2, "parallel tool_calls must coalesce");
assert_eq!(tool_calls[0].id, "c1");
assert_eq!(tool_calls[1].id, "c2");
assert!(a.content.is_none(), "pure-tool-call turn has null content");
}
_ => panic!("expected single merged assistant message"),
}
assert!(matches!(messages[2], ChatCompletionRequestMessage::Tool(_)));
assert!(matches!(messages[3], ChatCompletionRequestMessage::Tool(_)));
}
#[test]
fn test_refusal_content_folded_into_assistant_text() {
// Refusal parts in a prior assistant turn must survive to the next
// turn. We fold refusal text into the assistant's `content` so
// templates render it identically to normal content; otherwise the
// model loses visibility into what it previously refused.
use dynamo_protocols::types::responses::RefusalContent;
let req = NvCreateResponse {
inner: CreateResponse {
input: InputParam::Items(vec![
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "try again".into(),
})],
role: InputRole::User,
status: None,
}))),
InputItem::Item(Item::Message(MessageItem::Output(InputOutputMessage {
id: None,
role: AssistantRole::Assistant,
status: None,
phase: None,
content: vec![InputOutputMessageContent::Refusal(RefusalContent {
refusal: "I cannot help with that.".into(),
})],
}))),
InputItem::Item(Item::Message(MessageItem::Input(InputMessage {
content: vec![InputContent::InputText(InputTextContent {
text: "ok different question".into(),
})],
role: InputRole::User,
status: None,
}))),
]),
model: Some("test-model".into()),
..Default::default()
},
nvext: None,
};
let chat_req: NvCreateChatCompletionRequest = req.try_into().unwrap();
let messages = &chat_req.inner.messages;
assert_eq!(messages.len(), 3);
match &messages[1] {
ChatCompletionRequestMessage::Assistant(a) => {
match a.content.as_ref().expect("refusal folded into content") {
ChatCompletionRequestAssistantMessageContent::Text(t) => {
assert_eq!(t, "I cannot help with that.");
}
_ => panic!("expected text content"),
}
}
_ => panic!("expected assistant message carrying folded refusal"),
}
}
#[test]
fn test_tools_conversion() {
let req = NvCreateResponse {
......@@ -1548,23 +2360,60 @@ thinking
}
#[test]
fn test_output_message_without_id_and_status_fails_to_deserialize() {
use dynamo_protocols::types::responses::InputItem;
// With the upstream schema, `id` (String) and `status` (OutputStatus) are
// required on OutputMessage. An assistant message without them can't
// deserialize as either OutputMessage or InputMessage (wrong role).
fn test_bare_assistant_output_message_deserializes_via_owned_types() {
// Regression: upstream async-openai's OutputMessage required `id` and
// `status`. Dynamo-owned types make them optional so real-world client
// shapes (no id/status, no annotations) round-trip successfully.
let json = serde_json::json!({
"role": "assistant",
"content": [{"type": "output_text", "text": "Hello!", "annotations": []}],
"content": [{"type": "output_text", "text": "Hello!"}],
"type": "message"
});
let result = serde_json::from_value::<InputItem>(json);
assert!(
result.is_err(),
"Expected deserialization to fail without id and status"
);
let item: InputItem =
serde_json::from_value(json).expect("relaxed deserialize should succeed");
match item {
InputItem::Item(Item::Message(MessageItem::Output(msg))) => {
assert_eq!(msg.role, AssistantRole::Assistant);
assert!(msg.id.is_none());
assert!(msg.status.is_none());
}
other => panic!("Expected Item::Message(Output), got {:?}", other),
}
}
#[test]
fn test_nvcreate_response_accepts_bare_assistant_messages() {
// End-to-end: a real Codex-style payload with an interstitial assistant
// text item (no id/status/annotations) deserializes into NvCreateResponse
// via the standard derive on our Dynamo-owned CreateResponse chain.
let body = serde_json::json!({
"model": "m",
"input": [
{"type": "message", "role": "user", "content": [
{"type": "input_text", "text": "hi"}
]},
{"type": "function_call", "call_id": "c", "name": "f", "arguments": "{}"},
{"type": "message", "role": "assistant", "content": [
{"type": "output_text", "text": "\n\n\n"}
]},
{"type": "function_call_output", "call_id": "c", "output": "x"}
]
});
let req: NvCreateResponse =
serde_json::from_value(body).expect("relaxed deserialize should succeed");
let items = match &req.inner.input {
InputParam::Items(items) => items,
_ => panic!("expected Items input"),
};
assert_eq!(items.len(), 4);
match &items[2] {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert_eq!(out.role, AssistantRole::Assistant);
}
other => panic!("expected MessageItem::Output, got {:?}", other),
}
}
#[test]
......@@ -1582,8 +2431,8 @@ thinking
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(msg))) => {
assert_eq!(msg.id, "msg_abc123");
assert_eq!(msg.status, OutputStatus::Completed);
assert_eq!(msg.id.as_deref(), Some("msg_abc123"));
assert_eq!(msg.status, Some(OutputStatus::Completed));
}
other => panic!("Expected Item::Message(Output), got {:?}", other),
}
......
CLAUDE.md
\ No newline at end of file
# lib/protocols
OpenAI-compatible request/response types for Dynamo's HTTP surface. Built on top of the `async-openai` crate, with selective Dynamo-owned overrides where we need behaviors upstream won't accept or hasn't merged yet.
If you're extending or debugging types here, read this whole file before editing. The central question every change hinges on is: **do we re-export this upstream, or do we own it?** This document exists so the answer is consistent.
## The core tension
`async-openai` is well-maintained but slow on input-laxity PRs. The maintainer generally wants changes to match the OpenAPI spec exactly, even when OpenAI's *hosted* API accepts more permissive shapes on input than the spec requires. See `64bit/async-openai#535` (optional `ReasoningItem.id`) and prior work on optional `OutputMessage.id`/`status` — both driven by real Agents-SDK / Codex traffic that the spec technically rejects.
We can't block Dynamo on upstream merges. We also can't fork the whole crate — it's enormous and updates often. The rule we settled on is: **re-export upstream by default; own the narrowest type subtree that lets us fix the behavior we need.**
## The ownership rubric
Default to upstream. Own a type only when at least one of these is true:
1. **Upstream rejects a shape real clients send.** The driving case. Example: `OutputMessage.id`/`status`/`annotations` marked required upstream but routinely omitted by Codex / Agents SDK on input.
2. **We need to extend the schema with a Dynamo-specific field** that doesn't belong upstream. Example: `CreateChatCompletionRequest.mm_processor_kwargs` (vLLM multimodal), `ChatCompletionRequestAssistantMessage.reasoning_content` (R1 / QwQ), `ChatCompletionStreamOptions.continuous_usage_stats`.
3. **Upstream's type forces a shape that breaks downstream backends.** Example: `FunctionCall.arguments` is `String` upstream; LangChain and similar send it as an object. We own `FunctionCall` to accept both via a custom deserializer and normalize to `String`.
4. **Upstream has a known bug.** Example: `ChatCompletionMessageToolCall.type` wasn't always serialized; we own it with `#[serde(default = "default_function_type")]` to preserve wire compat.
**Do not own a type just because an adjacent type is owned.** Keep the blast radius small. If owning `OutputMessage` would cascade into owning `Response`, `OutputItem`, streaming events, and half the crate — stop and find a narrower fix (see "Naming: avoiding dual-side collisions" below).
## Layout
- `src/types/chat.rs` — Chat Completions (request, response, stream, messages). Extensively owned: multimodal content, reasoning, continuous usage stats, flexible `arguments`.
- `src/types/responses/mod.rs` — Responses API (Codex, Agents SDK). Input chain owned; output chain fully upstream.
- `src/types/completion.rs` — Legacy completions. Mostly upstream.
- `src/types/anthropic.rs` — Anthropic Messages API. Fully owned (no upstream equivalent in `async-openai`).
- `src/types/embeddings`, `src/types/images` — full upstream re-export (no Dynamo extensions).
## Re-export conventions
Use **explicit re-exports** (`pub use foo::{A, B, C}`), not globs, when you need to selectively shadow. Globs (`pub use foo::*`) are allowed at the top of a module — Rust lets a local `pub struct Foo` shadow a glob-imported `Foo` (the glob just emits `unused_imports` warnings). But explicit lists make the ownership split obvious to readers and catch mistakes at compile time when upstream renames or removes a type.
`src/types/responses/mod.rs` uses glob re-export because the surface is huge (200+ types). `src/types/chat.rs` uses explicit lists because the surface is manageable and Dynamo owns more of it. Either pattern is acceptable; pick based on how many types you'd have to enumerate to exclude the ones you own.
## Naming: avoiding dual-side collisions
**The trap.** Upstream sometimes reuses the same type on both request-input and response-output sides. `OutputMessage` is the canonical example: it appears inside `MessageItem::Output(...)` (input side — a prior assistant turn echoed back) AND inside `OutputItem::Message(...)` (output side — the assistant message we just produced).
If we relax `OutputMessage` (make `id`/`status` optional) and shadow upstream's name, every place that constructs an `OutputItem::Message(OutputMessage { ... })` on the output side breaks: `OutputItem::Message` variant holds upstream's type, not ours, and our relaxed struct doesn't match.
The naive fix is to also own `OutputItem`. But that cascades into owning `Response`, streaming events, and a long tail of their sub-types. The right fix is smaller:
**Rule.** If a type is reused by upstream on both input and output sides, give the Dynamo-owned input-side variant a *different name*. The output side keeps using upstream's name via the glob / explicit re-export.
Current naming in `responses/mod.rs`:
- `InputOutputMessage` — Dynamo-owned, relaxed; used in `MessageItem::Output(...)` on the input side.
- `OutputMessage` — upstream, unchanged; used in `OutputItem::Message(...)` on the output side.
- Same pattern for `InputOutputMessageContent` (input) vs upstream `OutputMessageContent` (output), and `InputOutputTextContent` (input) vs upstream `OutputTextContent` (output).
Input-only types can shadow upstream with the same name — no conflict. Current shadows: `MessageItem`, `Item`, `InputItem`, `InputParam`, `CreateResponse`.
## The Responses input chain, specifically
As of this writing, the owned input chain is:
```
CreateResponse
└── input: InputParam (shadow)
└── InputItem (shadow)
├── ItemReference (upstream)
├── EasyInputMessage (upstream)
└── Item (shadow, mirrors upstream variant-for-variant)
├── Message(MessageItem) (shadow)
│ ├── Input(InputMessage) (upstream)
│ └── Output(InputOutputMessage) (NEW NAME — relaxed)
│ └── content: Vec<InputOutputMessageContent> (NEW NAME)
│ └── OutputText(InputOutputTextContent) (NEW NAME — relaxed)
└── ... 19 other upstream variants (FunctionCall, Reasoning, etc.)
```
`Item` mirrors upstream variant-for-variant because it's a `#[serde(tag = "type")]` enum — we can't inherit variants. If upstream adds a new variant to their `Item`, we must add it here too, or payloads carrying that type will fail to deserialize. This is the one place where upstream drift bites us; accept it as the cost of owning the chain.
The output chain (`Response`, `OutputItem`, `OutputMessage`, streaming events, etc.) is fully upstream. We mint valid id/status on output, so there's no lenience needed and no reason to own it.
## When upstream finally merges a relaxation
If an upstream PR lands that makes a field optional (matching what we relaxed), the checklist is:
1. Bump `async-openai` in `Cargo.toml`.
2. Delete the owned override if it's now identical to upstream, or narrow it if upstream only partially relaxed.
3. Update consumer sites (convert `Option<T>` to `T` if upstream still has the field but non-optional, etc.).
4. Run the full test suite; the serialization-shape tests should catch any regressions.
Don't leave redundant Dynamo-owned types in place "just in case." Dead ownership is tech debt.
## When upstream renames or restructures a type we re-export
Glob re-exports will silently pick up the rename. Explicit re-exports will fail to compile — which is the point. Update the explicit list and any consumer code, confirm no semantic drift, run tests.
## Testing patterns
- Serialization-shape tests (`test_response_wire_format_shape` in `lib/llm`) validate that our serialized JSON matches the API spec. Lean on these when you change owned types.
- Deserialization tests for owned types should cover both the relaxed shape (the reason we own it) and the strict shape (to prove we didn't break spec-conformant clients).
- When you add a new Dynamo field to an owned type, add a test that omits it and asserts the default behavior.
## Things that are explicitly *not* this crate's job
- HTTP transport (request execution, retries, streaming frame parsing) — that's `lib/llm/src/http/`.
- Semantic conversion between API types (Responses → Chat, Anthropic → Chat, etc.) — that lives in `lib/llm/src/protocols/` and uses the types defined here.
- Model-specific tokenization or prompt templating.
Keep this crate declarative: types, serde derives, builders, conversions-by-`From`. Business logic belongs downstream.
## Common mistakes
- Owning a type because it's *nearby* a bug, not because of the bug itself. Narrow the fix.
- Shadowing a dual-side type without checking output-side construction sites. `grep` the workspace for constructor calls before renaming.
- Adding fields to upstream-re-exported types via `#[serde(default)]` on a local wrapper struct. Doesn't work — serde can't inject defaults into a foreign type unless you use `#[serde(remote)]`, which requires field-for-field mirroring and doesn't help with optional-vs-required mismatches.
- Forgetting to update `From` impls when adding variants. The compiler catches exhaustive matches but not variant count on `From<Ours> for Upstream` when the enum is non-exhaustive.
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Re-exports upstream async-openai responses types.
// Upstream provides sdk convenience methods (output_text, etc.) directly.
// Dynamo owns the Responses-API input-side type chain. Upstream async-openai
// is the source for everything else (output-side types, streaming events,
// individual tool-call payloads, etc.).
//
// The input chain is owned because upstream marks fields as required that
// real-world clients (OpenAI Agents SDK, Codex, etc.) routinely omit when
// round-tripping a prior assistant turn as input:
// - `OutputMessage.id` / `.status` — omitted when echoing a previous output
// - `OutputTextContent.annotations` — omitted when the part carried none
// Upstream is slow to relax these (see e.g. 64bit/async-openai#535 for the
// sibling `ReasoningItem.id` fix, still open at time of writing); OpenAI's own
// hosted API accepts the relaxed shapes on input regardless.
//
// This mirrors the pattern in `crate::types::chat` where Dynamo owns the
// request types it needs to extend or relax while re-exporting the rest of
// upstream's type library verbatim.
//
// Naming: the relaxed assistant-input message is `InputOutputMessage` (and
// `InputOutputMessageContent` / `InputOutputTextContent` for its content
// parts) to avoid colliding with upstream's `OutputMessage`, which remains the
// canonical type for *output-side* response construction (`OutputItem`,
// `Response.output`). `MessageItem`, `Item`, `InputItem`, `InputParam`, and
// `CreateResponse` are input-only and shadow upstream's same-named types
// without conflict.
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
// Re-export all upstream response types (includes shared types like
// ComparisonFilter, ResponseUsage, InputTokenDetails, etc.)
// Re-export all upstream response types (shared structures like ResponseUsage,
// tool-call item types, streaming events, etc.). The types we own below
// shadow their upstream counterparts where no dual-side conflict exists.
pub use async_openai::types::responses::*;
// Re-export from parent module for backward compat
// Re-export from parent module for backward compat.
pub use crate::types::ImageDetail;
pub use crate::types::ReasoningEffort;
pub use crate::types::ResponseFormatJsonSchema;
/// Stream of response events
pub type ResponseStream = std::pin::Pin<
Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
>;
// Backward-compatible type aliases for Dynamo consumer code migration.
pub type Input = InputParam;
pub type PromptConfig = Prompt;
pub type TextConfig = ResponseTextParam;
pub type TextResponseFormat = TextResponseFormatConfiguration;
/// Stream of response events.
pub type ResponseStream = std::pin::Pin<
Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
>;
// ---------------------------------------------------------------------------
// Input-side assistant message (relaxed vs upstream OutputMessage)
// ---------------------------------------------------------------------------
/// Deserialize `null` or a missing field as the default empty `Vec`. Plain
/// `#[serde(default)]` only fires when the field is absent; explicit `null`
/// would otherwise fail `Vec::deserialize`. Clients (notably some Agents SDK
/// variants) have been observed to send `"annotations": null`, so treat
/// omission and explicit null the same.
fn deserialize_null_as_empty_vec<'de, T, D>(deserializer: D) -> Result<Vec<T>, D::Error>
where
T: Deserialize<'de>,
D: serde::Deserializer<'de>,
{
Option::<Vec<T>>::deserialize(deserializer).map(Option::unwrap_or_default)
}
/// Relaxed counterpart to upstream `OutputTextContent` for input-side content.
/// `annotations` tolerates both missing and explicit `null`; upstream requires
/// it to be a present non-null array.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputOutputTextContent {
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
pub annotations: Vec<Annotation>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub logprobs: Option<Vec<LogProb>>,
pub text: String,
}
/// Content parts of a prior assistant message presented as input.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InputOutputMessageContent {
OutputText(InputOutputTextContent),
Refusal(RefusalContent),
}
/// An assistant message echoed back as input for a subsequent turn. Relaxed
/// compared to upstream `OutputMessage`: `id`, `status`, and `content` are all
/// optional. Some clients send a bare assistant shell (`{"type":"message",
/// "role":"assistant"}`) with no `content` at all, usually on pure tool-call
/// turns; treat absent `content` as an empty vec, same way we treat a missing
/// `id`/`status`.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputOutputMessage {
#[serde(default, deserialize_with = "deserialize_null_as_empty_vec")]
pub content: Vec<InputOutputMessageContent>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub id: Option<String>,
pub role: AssistantRole,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub phase: Option<MessagePhase>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub status: Option<OutputStatus>,
}
// ---------------------------------------------------------------------------
// Input-side Item / Message / InputItem / InputParam (shadow upstream)
// ---------------------------------------------------------------------------
/// Message item within `Item`. Untagged; disambiguated by the `role` field:
/// the `Output` variant requires `role: "assistant"` (via `AssistantRole`,
/// which is a single-variant enum) and `Input` requires `role` in
/// `"user" | "system" | "developer"` (via `InputRole`). A payload with an
/// unknown role (e.g. `"tool"`) or a missing `role` produces the generic
/// untagged-enum error — callers are expected to send a valid role. If you
/// see the "data did not match any variant of untagged enum" failure on this
/// type, it is almost always a role mismatch.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum MessageItem {
/// Prior assistant output echoed back (role: assistant). Tried first — its
/// `role` constraint excludes user/system/developer inputs.
Output(InputOutputMessage),
/// User / system / developer input message.
Input(InputMessage),
}
/// Structured input/output item, discriminated by `type`. Mirrors upstream
/// `Item` variant-for-variant; only `Message` uses a Dynamo-owned type.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum Item {
Message(MessageItem),
FileSearchCall(FileSearchToolCall),
ComputerCall(ComputerToolCall),
ComputerCallOutput(ComputerCallOutputItemParam),
WebSearchCall(WebSearchToolCall),
FunctionCall(FunctionToolCall),
FunctionCallOutput(FunctionCallOutputItemParam),
ToolSearchCall(ToolSearchCallItemParam),
ToolSearchOutput(ToolSearchOutputItemParam),
Reasoning(ReasoningItem),
Compaction(CompactionSummaryItemParam),
ImageGenerationCall(ImageGenToolCall),
CodeInterpreterCall(CodeInterpreterToolCall),
LocalShellCall(LocalShellToolCall),
LocalShellCallOutput(LocalShellToolCallOutput),
ShellCall(FunctionShellCallItemParam),
ShellCallOutput(FunctionShellCallOutputItemParam),
ApplyPatchCall(ApplyPatchToolCallItemParam),
ApplyPatchCallOutput(ApplyPatchToolCallOutputItemParam),
McpListTools(MCPListTools),
McpApprovalRequest(MCPApprovalRequest),
McpApprovalResponse(MCPApprovalResponse),
McpCall(MCPToolCall),
CustomToolCallOutput(CustomToolCallOutput),
CustomToolCall(CustomToolCall),
}
/// Single input item. Untagged; order matters (most specific first).
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum InputItem {
ItemReference(ItemReference),
Item(Item),
EasyMessage(EasyInputMessage),
}
/// Input to a `POST /v1/responses` request.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)]
pub enum InputParam {
Text(String),
Items(Vec<InputItem>),
}
impl Default for InputParam {
fn default() -> Self {
Self::Text(String::new())
}
}
// ---------------------------------------------------------------------------
// CreateResponse (owned, uses Dynamo-owned InputParam)
// ---------------------------------------------------------------------------
/// Request body for `POST /v1/responses`. Mirrors upstream `CreateResponse`
/// field-for-field but uses Dynamo-owned `InputParam`, which transitively
/// accepts the relaxed input shapes described in this module's header. All
/// other fields reference upstream types verbatim.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
pub struct CreateResponse {
#[serde(skip_serializing_if = "Option::is_none")]
pub background: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub conversation: Option<ConversationParam>,
#[serde(skip_serializing_if = "Option::is_none")]
pub include: Option<Vec<IncludeEnum>>,
pub input: InputParam,
#[serde(skip_serializing_if = "Option::is_none")]
pub instructions: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_output_tokens: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub max_tool_calls: Option<u32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub metadata: Option<HashMap<String, String>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub model: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub parallel_tool_calls: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub previous_response_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt: Option<Prompt>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub prompt_cache_retention: Option<PromptCacheRetention>,
#[serde(skip_serializing_if = "Option::is_none")]
pub reasoning: Option<Reasoning>,
#[serde(skip_serializing_if = "Option::is_none")]
pub safety_identifier: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub service_tier: Option<ServiceTier>,
#[serde(skip_serializing_if = "Option::is_none")]
pub store: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub stream_options: Option<ResponseStreamOptions>,
#[serde(skip_serializing_if = "Option::is_none")]
pub temperature: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<ResponseTextParam>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_choice: Option<ToolChoiceParam>,
#[serde(skip_serializing_if = "Option::is_none")]
pub tools: Option<Vec<Tool>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_logprobs: Option<u8>,
#[serde(skip_serializing_if = "Option::is_none")]
pub top_p: Option<f32>,
#[serde(skip_serializing_if = "Option::is_none")]
pub truncation: Option<Truncation>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn relaxed_assistant_message_without_id_or_status() {
let json = serde_json::json!({
"type": "message",
"role": "assistant",
"content": [{"type": "output_text", "text": "hi"}]
});
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert_eq!(out.role, AssistantRole::Assistant);
assert!(out.id.is_none());
assert!(out.status.is_none());
}
other => panic!("expected Item::Message(Output), got {other:?}"),
}
}
#[test]
fn assistant_message_without_content_field_deserializes() {
// Bare assistant shell — no `content` field at all. Seen in real
// Codex/Agents-SDK traffic on pure tool-call turns. `#[serde(default)]`
// on `content` must accept omission and yield an empty vec.
let json = serde_json::json!({
"type": "message",
"role": "assistant"
});
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert_eq!(out.role, AssistantRole::Assistant);
assert!(out.content.is_empty());
assert!(out.id.is_none());
assert!(out.status.is_none());
}
other => panic!("expected Item::Message(Output), got {other:?}"),
}
}
#[test]
fn assistant_message_with_explicit_null_content_deserializes() {
// Mirrors the `annotations: null` case: some serializers emit JSON null
// for absent fields instead of omitting them. `Vec::deserialize` rejects
// null, so `content` also needs `deserialize_null_as_empty_vec`.
let json = serde_json::json!({
"type": "message",
"role": "assistant",
"content": null
});
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert!(out.content.is_empty());
}
other => panic!("expected Item::Message(Output), got {other:?}"),
}
}
#[test]
fn mcp_call_item_deserializes() {
// Guards against Item variant drift vs upstream — MCP item types were
// added after the initial owned `Item` chain landed.
let json = serde_json::json!({
"type": "mcp_call",
"id": "mcp_1",
"server_label": "srv",
"name": "t",
"arguments": "{}"
});
let item: InputItem = serde_json::from_value(json).unwrap();
assert!(matches!(item, InputItem::Item(Item::McpCall(_))));
}
#[test]
fn strict_assistant_message_still_deserializes() {
let json = serde_json::json!({
"type": "message",
"role": "assistant",
"id": "msg_1",
"status": "completed",
"content": [{"type": "output_text", "text": "hi", "annotations": []}]
});
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert_eq!(out.id.as_deref(), Some("msg_1"));
assert_eq!(out.status, Some(OutputStatus::Completed));
}
other => panic!("expected Item::Message(Output), got {other:?}"),
}
}
#[test]
fn user_message_routes_to_input_variant() {
let json = serde_json::json!({
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "hi"}]
});
let item: InputItem = serde_json::from_value(json).unwrap();
assert!(matches!(
item,
InputItem::Item(Item::Message(MessageItem::Input(_)))
));
}
#[test]
fn function_call_item_still_deserializes() {
let json = serde_json::json!({
"type": "function_call",
"call_id": "c",
"name": "f",
"arguments": "{}"
});
let item: InputItem = serde_json::from_value(json).unwrap();
assert!(matches!(item, InputItem::Item(Item::FunctionCall(_))));
}
#[test]
fn easy_message_string_content_routes_to_easymessage() {
let json = serde_json::json!({"role": "assistant", "content": "x"});
let item: InputItem = serde_json::from_value(json).unwrap();
assert!(matches!(item, InputItem::EasyMessage(_)));
}
#[test]
fn output_text_without_annotations_defaults_empty() {
let json = serde_json::json!({"type": "output_text", "text": "hi"});
let part: InputOutputMessageContent = serde_json::from_value(json).unwrap();
match part {
InputOutputMessageContent::OutputText(t) => {
assert!(t.annotations.is_empty());
}
_ => panic!("expected OutputText"),
}
}
#[test]
fn output_text_with_explicit_null_annotations_deserializes_as_empty() {
// Some clients serialize absent fields as JSON null instead of omitting
// them. `Vec::deserialize` would reject null; the custom deserializer
// treats explicit null identically to a missing field.
let json = serde_json::json!({"type": "output_text", "text": "hi", "annotations": null});
let part: InputOutputMessageContent = serde_json::from_value(json).unwrap();
match part {
InputOutputMessageContent::OutputText(t) => {
assert!(t.annotations.is_empty());
}
_ => panic!("expected OutputText"),
}
}
#[test]
fn assistant_message_with_explicit_null_id_and_status_deserializes() {
// `Option<T>` natively accepts null as `None`, so these explicit-null
// fields should flow through without a custom deserializer. This test
// pins that behavior against accidental regressions (e.g. if someone
// switches the field type away from `Option<_>`).
let json = serde_json::json!({
"type": "message",
"role": "assistant",
"id": null,
"status": null,
"content": [{"type": "output_text", "text": "hi", "annotations": null}]
});
let item: InputItem = serde_json::from_value(json).unwrap();
match item {
InputItem::Item(Item::Message(MessageItem::Output(out))) => {
assert!(out.id.is_none());
assert!(out.status.is_none());
assert_eq!(out.content.len(), 1);
}
other => panic!("expected Item::Message(Output), got {other:?}"),
}
}
#[test]
fn create_response_roundtrip_with_relaxed_input() {
let body = serde_json::json!({
"model": "m",
"input": [
{"type": "message", "role": "user", "content": [
{"type": "input_text", "text": "hi"}
]},
{"type": "function_call", "call_id": "c", "name": "f", "arguments": "{}"},
{"type": "message", "role": "assistant", "content": [
{"type": "output_text", "text": "\n\n"}
]},
{"type": "function_call_output", "call_id": "c", "output": "x"}
]
});
let req: CreateResponse = serde_json::from_value(body).unwrap();
let items = match &req.input {
InputParam::Items(items) => items,
_ => panic!("expected Items"),
};
assert_eq!(items.len(), 4);
assert!(matches!(
items[2],
InputItem::Item(Item::Message(MessageItem::Output(_)))
));
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment