Unverified Commit 2a95ef63 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(responses): align wire shape with OpenResponses spec + add compliance CI (#8283)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarAnant Sharma <anants@nvidia.com>
parent 4410a2c5
...@@ -1595,6 +1595,18 @@ async fn responses( ...@@ -1595,6 +1595,18 @@ async fn responses(
service_tier: request.inner.service_tier, service_tier: request.inner.service_tier,
include: request.inner.include.clone(), include: request.inner.include.clone(),
truncation: request.inner.truncation, truncation: request.inner.truncation,
// Upstream `CreateResponse` doesn't carry these yet; plumbed through so
// the response serializer can default to 0.0 without hardcoding at the
// build site. When upstream (or our shadow) adds the fields, sourcing
// from the request becomes a one-line change here.
presence_penalty: None,
frequency_penalty: None,
// Pass-through metadata — accepted on the request, echoed back on the
// response so the caller can confirm receipt. Dynamo doesn't act on
// these; see `validate_response_unsupported_fields` for rationale.
prompt_cache_key: request.inner.prompt_cache_key.clone(),
prompt_cache_retention: request.inner.prompt_cache_retention,
safety_identifier: request.inner.safety_identifier.clone(),
}; };
let request_id = request.id().to_string(); let request_id = request.id().to_string();
let (orig_request, context) = request.into_parts(); let (orig_request, context) = request.into_parts();
...@@ -1830,6 +1842,24 @@ pub fn validate_response_unsupported_fields( ...@@ -1830,6 +1842,24 @@ pub fn validate_response_unsupported_fields(
VALIDATION_PREFIX.to_string() + "`prompt` is not supported.", VALIDATION_PREFIX.to_string() + "`prompt` is not supported.",
)); ));
} }
// Reject directive fields that change semantics if silently dropped.
// `max_tool_calls` is a hard cap on tool invocations — accepting it
// without enforcement would let a caller send `max_tool_calls: 5` and
// see `max_tool_calls: null` in the response, assuming their limit was
// honored. Fail loud until real enforcement lands.
//
// Pass-through metadata fields (`prompt_cache_key`,
// `prompt_cache_retention`, `safety_identifier`) are deliberately
// accepted and echoed back on the response instead. They're hints for
// OpenAI's caching/moderation backends, not directives — Codex sends
// `prompt_cache_key` on every request — and the OpenResponses spec
// includes them on the response body, so echoing the caller's value
// makes receipt observable without needing a real backend.
if inner.max_tool_calls.is_some() {
return Some(ErrorMessage::not_implemented_error(
VALIDATION_PREFIX.to_string() + "`max_tool_calls` is not supported.",
));
}
None None
} }
...@@ -2714,6 +2744,7 @@ mod tests { ...@@ -2714,6 +2744,7 @@ mod tests {
}) })
}), }),
), ),
("max_tool_calls", Box::new(|r| r.max_tool_calls = Some(5))),
]; ];
for (field, set_field) in unsupported_cases { for (field, set_field) in unsupported_cases {
...@@ -2724,6 +2755,43 @@ mod tests { ...@@ -2724,6 +2755,43 @@ mod tests {
} }
} }
/// Pass-through metadata fields (`prompt_cache_key`,
/// `prompt_cache_retention`, `safety_identifier`) are accepted at the
/// validation layer; the response serializer echoes them back so the
/// caller can confirm receipt. Codex sends `prompt_cache_key` on every
/// request — rejecting it broke `codex exec` end-to-end.
#[test]
fn test_validate_unsupported_fields_accepts_passthrough_metadata() {
#[allow(clippy::type_complexity)]
let passthrough_cases: Vec<(&str, Box<dyn FnOnce(&mut CreateResponse)>)> = vec![
(
"prompt_cache_key",
Box::new(|r| r.prompt_cache_key = Some("ck-1".into())),
),
(
"prompt_cache_retention",
Box::new(|r| {
r.prompt_cache_retention =
Some(dynamo_protocols::types::responses::PromptCacheRetention::InMemory)
}),
),
(
"safety_identifier",
Box::new(|r| r.safety_identifier = Some("user-hash".into())),
),
];
for (field, set_field) in passthrough_cases {
let mut req = make_base_request();
(set_field)(&mut req.inner);
let result = validate_response_unsupported_fields(&req);
assert!(
result.is_none(),
"Expected `{field}` to be accepted as pass-through metadata"
);
}
}
#[test] #[test]
fn test_validate_chat_completion_required_fields_empty_messages() { fn test_validate_chat_completion_required_fields_empty_messages() {
let request = NvCreateChatCompletionRequest { let request = NvCreateChatCompletionRequest {
......
...@@ -9,9 +9,10 @@ use dynamo_protocols::types::responses::{ ...@@ -9,9 +9,10 @@ use dynamo_protocols::types::responses::{
AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem, AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem,
InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item, InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item,
MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent, MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent,
OutputTokenDetails, Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage, OutputTokenDetails, PromptCacheRetention, Reasoning, ReasoningItem, Response,
Role as ResponseRole, ServiceTier, Status, SummaryPart, SummaryTextContent, ResponseTextParam, ResponseUsage, Role as ResponseRole, ServiceTier, Status, SummaryPart,
TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam, Truncation, SummaryTextContent, TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam,
Truncation,
}; };
use dynamo_protocols::types::{ use dynamo_protocols::types::{
ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice, ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
...@@ -63,7 +64,7 @@ pub struct NvCreateResponse { ...@@ -63,7 +64,7 @@ pub struct NvCreateResponse {
pub nvext: Option<NvExt>, pub nvext: Option<NvExt>,
} }
#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Deserialize, Validate, Debug, Clone)]
pub struct NvResponse { pub struct NvResponse {
/// Flattened Response fields (includes upstream + extended spec fields). /// Flattened Response fields (includes upstream + extended spec fields).
#[serde(flatten)] #[serde(flatten)]
...@@ -73,6 +74,78 @@ pub struct NvResponse { ...@@ -73,6 +74,78 @@ pub struct NvResponse {
/// NVIDIA extension field for response metadata (worker IDs, etc.) /// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>, pub nvext: Option<serde_json::Value>,
/// OpenResponses spec requires these as non-null scalars on every response,
/// but async-openai's `Response` doesn't model them. Populated from the
/// originating request. Surfaced during serialization (see `Serialize`
/// impl below); not persisted as top-level fields on the inner struct.
#[serde(default)]
pub presence_penalty: f32,
#[serde(default)]
pub frequency_penalty: f32,
#[serde(default)]
pub store: bool,
}
/// Patch an already-serialized `Response` JSON object to match the
/// OpenResponses spec. Applied both to one-shot `NvResponse` serialization
/// and to every `Response` embedded inside a streaming event payload.
///
/// Reconciles two spec gaps between upstream async-openai's `Response` and
/// the OpenResponses spec:
///
/// 1. Fields the spec requires as `T | null` that upstream marks
/// `Option<T>` with `skip_serializing_if = Option::is_none`. These are
/// silently dropped when None; the spec wants them present as null.
/// 2. Fields the spec requires (`presence_penalty`, `frequency_penalty`,
/// `store`) that are absent from upstream `Response` entirely.
///
/// Rather than fork the upstream output chain (which would cascade into
/// `OutputItem`, streaming events, and a long tail of sub-types, per
/// `lib/protocols/CLAUDE.md`), we patch the serialized JSON. Adds a
/// single `serde_json::to_value` round-trip per response, which is
/// negligible next to tokenization/inference cost.
pub(crate) fn patch_response_for_spec(
obj: &mut serde_json::Map<String, serde_json::Value>,
presence_penalty: f32,
frequency_penalty: f32,
store: bool,
) {
for key in dynamo_protocols::types::responses::SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS {
obj.entry(*key).or_insert(serde_json::Value::Null);
}
obj.insert(
"presence_penalty".into(),
serde_json::json!(presence_penalty),
);
obj.insert(
"frequency_penalty".into(),
serde_json::json!(frequency_penalty),
);
obj.insert("store".into(), serde_json::json!(store));
}
impl Serialize for NvResponse {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut value = serde_json::to_value(&self.inner).map_err(serde::ser::Error::custom)?;
let serde_json::Value::Object(obj) = &mut value else {
return value.serialize(serializer);
};
patch_response_for_spec(
obj,
self.presence_penalty,
self.frequency_penalty,
self.store,
);
if let Some(nvext) = &self.nvext {
obj.insert("nvext".into(), nvext.clone());
}
value.serialize(serializer)
}
} }
/// Implements `NvExtProvider` for `NvCreateResponse`, /// Implements `NvExtProvider` for `NvCreateResponse`,
...@@ -244,6 +317,24 @@ fn convert_input_content_to_text(content: &[InputContent]) -> String { ...@@ -244,6 +317,24 @@ fn convert_input_content_to_text(content: &[InputContent]) -> String {
.join("") .join("")
} }
/// Counterpart to `convert_input_content_to_text` for upstream's
/// `InputContent`. Upstream's enum appears inside `FunctionCallOutput::Content`
/// and `EasyInputContent::ContentList`, neither of which is Dynamo-owned, so
/// payloads deserialized through those paths land as upstream variants.
fn convert_upstream_input_content_to_text(
content: &[dynamo_protocols::types::responses::UpstreamInputContent],
) -> String {
use dynamo_protocols::types::responses::UpstreamInputContent;
content
.iter()
.filter_map(|p| match p {
UpstreamInputContent::InputText(t) => Some(t.text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
/// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall, /// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall,
/// Reasoning, assistant EasyMessage). Chat Completions represents an assistant /// Reasoning, assistant EasyMessage). Chat Completions represents an assistant
/// turn as a single message carrying `content`, `tool_calls`, and /// turn as a single message carrying `content`, `tool_calls`, and
...@@ -406,7 +497,9 @@ fn convert_input_items_to_messages( ...@@ -406,7 +497,9 @@ fn convert_input_items_to_messages(
std::mem::take(&mut pending).flush_into(&mut messages); std::mem::take(&mut pending).flush_into(&mut messages);
let output_text = match &fco.output { let output_text = match &fco.output {
FunctionCallOutput::Text(text) => text.clone(), FunctionCallOutput::Text(text) => text.clone(),
FunctionCallOutput::Content(parts) => convert_input_content_to_text(parts), FunctionCallOutput::Content(parts) => {
convert_upstream_input_content_to_text(parts)
}
}; };
messages.push(ChatCompletionRequestMessage::Tool( messages.push(ChatCompletionRequestMessage::Tool(
ChatCompletionRequestToolMessage { ChatCompletionRequestToolMessage {
...@@ -444,7 +537,7 @@ fn convert_input_items_to_messages( ...@@ -444,7 +537,7 @@ fn convert_input_items_to_messages(
text.clone() text.clone()
} }
dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => { dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => {
convert_input_content_to_text(parts) convert_upstream_input_content_to_text(parts)
} }
}; };
match easy.role { match easy.role {
...@@ -740,6 +833,22 @@ pub struct ResponseParams { ...@@ -740,6 +833,22 @@ pub struct ResponseParams {
pub service_tier: Option<ServiceTier>, pub service_tier: Option<ServiceTier>,
pub include: Option<Vec<IncludeEnum>>, pub include: Option<Vec<IncludeEnum>>,
pub truncation: Option<Truncation>, pub truncation: Option<Truncation>,
/// OpenResponses spec requires these fields on the response body. Upstream
/// `CreateResponse` doesn't model them on the request yet, so for now they
/// pass through as `None`; the response serializer defaults to 0.0 (the
/// effective sglang default). Wired through `ResponseParams` anyway so
/// that when upstream relaxes or we shadow `CreateResponse`, threading a
/// real value becomes a one-line change at the request-extraction site.
pub presence_penalty: Option<f32>,
pub frequency_penalty: Option<f32>,
/// Pass-through metadata fields. Codex and other clients send these as
/// hints for OpenAI's caching/moderation backends; Dynamo doesn't act on
/// them, but the spec includes them on the response body so we echo back
/// what the caller sent rather than silently dropping. Echoing makes
/// receipt observable to the client without needing a real backend.
pub prompt_cache_key: Option<String>,
pub prompt_cache_retention: Option<PromptCacheRetention>,
pub safety_identifier: Option<String>,
} }
/// Normalize tools so that `FunctionTool.strict` is always set. /// Normalize tools so that `FunctionTool.strict` is always set.
...@@ -880,13 +989,13 @@ pub fn chat_completion_to_response( ...@@ -880,13 +989,13 @@ pub fn chat_completion_to_response(
.include .include
.as_ref() .as_ref()
.is_some_and(|inc| inc.contains(&IncludeEnum::MessageOutputTextLogprobs)); .is_some_and(|inc| inc.contains(&IncludeEnum::MessageOutputTextLogprobs));
if !keep_logprobs { for item in &mut output {
for item in &mut output { if let OutputItem::Message(msg) = item {
if let OutputItem::Message(msg) = item { for content in &mut msg.content {
for content in &mut msg.content { if let OutputMessageContent::OutputText(text) = content
if let OutputMessageContent::OutputText(text) = content { && (!keep_logprobs || text.logprobs.is_none())
text.logprobs = None; {
} text.logprobs = Some(Vec::new());
} }
} }
} }
...@@ -936,10 +1045,10 @@ pub fn chat_completion_to_response( ...@@ -936,10 +1045,10 @@ pub fn chat_completion_to_response(
max_output_tokens: params.max_output_tokens, max_output_tokens: params.max_output_tokens,
previous_response_id: api_context.and_then(|ctx| ctx.previous_response_id.clone()), previous_response_id: api_context.and_then(|ctx| ctx.previous_response_id.clone()),
prompt: None, prompt: None,
prompt_cache_key: None, prompt_cache_key: params.prompt_cache_key.clone(),
prompt_cache_retention: None, prompt_cache_retention: params.prompt_cache_retention,
reasoning: params.reasoning.clone(), reasoning: params.reasoning.clone(),
safety_identifier: None, safety_identifier: params.safety_identifier.clone(),
service_tier: Some(params.service_tier.unwrap_or(ServiceTier::Auto)), service_tier: Some(params.service_tier.unwrap_or(ServiceTier::Auto)),
top_logprobs: Some(0), top_logprobs: Some(0),
usage: chat_resp.usage.map(|u| ResponseUsage { usage: chat_resp.usage.map(|u| ResponseUsage {
...@@ -964,6 +1073,9 @@ pub fn chat_completion_to_response( ...@@ -964,6 +1073,9 @@ pub fn chat_completion_to_response(
Ok(NvResponse { Ok(NvResponse {
inner: response, inner: response,
nvext, nvext,
presence_penalty: params.presence_penalty.unwrap_or(0.0),
frequency_penalty: params.frequency_penalty.unwrap_or(0.0),
store: params.store.unwrap_or(false),
}) })
} }
...@@ -2475,7 +2587,10 @@ thinking ...@@ -2475,7 +2587,10 @@ thinking
} }
#[test] #[test]
fn test_include_logprobs_stripped_by_default() { fn test_include_logprobs_empty_by_default() {
// OpenResponses schema requires `logprobs` to be an array. When the
// caller did not request them via `include`, emit an empty array
// rather than null.
let chat_resp = make_chat_resp_with_text("hello"); let chat_resp = make_chat_resp_with_text("hello");
let params = ResponseParams::default(); let params = ResponseParams::default();
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap(); let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
...@@ -2484,9 +2599,10 @@ thinking ...@@ -2484,9 +2599,10 @@ thinking
if let OutputItem::Message(msg) = item { if let OutputItem::Message(msg) = item {
for content in &msg.content { for content in &msg.content {
if let OutputMessageContent::OutputText(t) = content { if let OutputMessageContent::OutputText(t) = content {
assert!( assert_eq!(
t.logprobs.is_none(), t.logprobs.as_deref(),
"logprobs should be stripped by default" Some(&[][..]),
"logprobs should be an empty array by default"
); );
} }
} }
...@@ -2543,11 +2659,35 @@ thinking ...@@ -2543,11 +2659,35 @@ thinking
assert_eq!(resp.inner.truncation, Some(Truncation::Disabled)); assert_eq!(resp.inner.truncation, Some(Truncation::Disabled));
} }
/// Validate the JSON wire shape of NvResponse. /// Pass-through metadata fields the OpenResponses spec includes on the
/// /// response body. Codex sends `prompt_cache_key` on every request; we
/// The migration to upstream async-openai v0.34 removed fields that were /// echo it back so the caller can confirm receipt without enforcing any
/// incorrectly present on our old local Response type (they belong on the /// caching semantics. Same pattern for `prompt_cache_retention` and
/// request, not the response, per the OpenAI Responses API spec). /// `safety_identifier`.
#[test]
fn test_response_echoes_passthrough_metadata() {
let chat_resp = make_chat_resp_with_text("hello");
let params = ResponseParams {
prompt_cache_key: Some("cache-key-codex-1".into()),
prompt_cache_retention: Some(PromptCacheRetention::InMemory),
safety_identifier: Some("user-abc".into()),
..Default::default()
};
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
assert_eq!(
resp.inner.prompt_cache_key.as_deref(),
Some("cache-key-codex-1")
);
assert_eq!(
resp.inner.prompt_cache_retention,
Some(PromptCacheRetention::InMemory)
);
assert_eq!(resp.inner.safety_identifier.as_deref(), Some("user-abc"));
}
/// Validate the JSON wire shape of NvResponse matches the OpenResponses
/// spec: required scalars always present, nullable-required fields
/// emitted as `null` when None.
#[test] #[test]
fn test_response_wire_format_shape() { fn test_response_wire_format_shape() {
let chat_resp = make_chat_resp_with_text("hello"); let chat_resp = make_chat_resp_with_text("hello");
...@@ -2555,14 +2695,14 @@ thinking ...@@ -2555,14 +2695,14 @@ thinking
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap(); let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
let json = serde_json::to_value(&resp).unwrap(); let json = serde_json::to_value(&resp).unwrap();
// Fields that were on our old local type but are NOT in the OpenAI // Required scalars the spec mandates on every response. Upstream
// Responses API spec -- they are request-level, not response-level. // async-openai's Response struct doesn't model these; NvResponse's
assert!(json.get("frequency_penalty").is_none()); // custom serializer injects them.
assert!(json.get("presence_penalty").is_none()); assert_eq!(json["frequency_penalty"], 0.0);
assert!(json.get("store").is_none()); assert_eq!(json["presence_penalty"], 0.0);
assert!(json.get("max_tool_calls").is_none()); assert_eq!(json["store"], false);
// Fields that should be present with expected values // Other required fields with expected values
assert_eq!(json["object"], "response"); assert_eq!(json["object"], "response");
assert_eq!(json["status"], "completed"); assert_eq!(json["status"], "completed");
assert_eq!(json["metadata"], serde_json::json!({})); assert_eq!(json["metadata"], serde_json::json!({}));
...@@ -2570,12 +2710,25 @@ thinking ...@@ -2570,12 +2710,25 @@ thinking
assert!(json["output"][0].get("id").is_some()); assert!(json["output"][0].get("id").is_some());
assert!(json["output"][0].get("status").is_some()); assert!(json["output"][0].get("status").is_some());
// Optional fields with None should be omitted (upstream uses skip_serializing_if) // Nullable-required fields must be present as null (not missing).
assert!(json.get("error").is_none()); for key in [
assert!(json.get("incomplete_details").is_none()); "error",
assert!(json.get("billing").is_none()); "incomplete_details",
assert!(json.get("conversation").is_none()); "billing",
assert!(json.get("safety_identifier").is_none()); "conversation",
"safety_identifier",
"max_tool_calls",
"instructions",
"previous_response_id",
"prompt_cache_key",
"reasoning",
] {
assert_eq!(
json.get(key),
Some(&serde_json::Value::Null),
"expected {key} to be present as null"
);
}
// nvext should be omitted when None // nvext should be omitted when None
assert!(json.get("nvext").is_none()); assert!(json.get("nvext").is_none());
......
...@@ -155,10 +155,10 @@ impl ResponseStreamConverter { ...@@ -155,10 +155,10 @@ impl ResponseStreamConverter {
.as_ref() .as_ref()
.and_then(|ctx| ctx.previous_response_id.clone()), .and_then(|ctx| ctx.previous_response_id.clone()),
prompt: None, prompt: None,
prompt_cache_key: None, prompt_cache_key: self.params.prompt_cache_key.clone(),
prompt_cache_retention: None, prompt_cache_retention: self.params.prompt_cache_retention,
reasoning: self.params.reasoning.clone(), reasoning: self.params.reasoning.clone(),
safety_identifier: None, safety_identifier: self.params.safety_identifier.clone(),
service_tier: Some(self.params.service_tier.unwrap_or(ServiceTier::Auto)), service_tier: Some(self.params.service_tier.unwrap_or(ServiceTier::Auto)),
top_logprobs: Some(0), top_logprobs: Some(0),
usage: self.usage.clone(), usage: self.usage.clone(),
...@@ -173,13 +173,13 @@ impl ResponseStreamConverter { ...@@ -173,13 +173,13 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(), sequence_number: self.next_seq(),
response: self.make_response(Status::InProgress, vec![]), response: self.make_response(Status::InProgress, vec![]),
}); });
events.push(make_sse_event(&created)); events.push(self.make_sse_event(&created));
let in_progress = ResponseStreamEvent::ResponseInProgress(ResponseInProgressEvent { let in_progress = ResponseStreamEvent::ResponseInProgress(ResponseInProgressEvent {
sequence_number: self.next_seq(), sequence_number: self.next_seq(),
response: self.make_response(Status::InProgress, vec![]), response: self.make_response(Status::InProgress, vec![]),
}); });
events.push(make_sse_event(&in_progress)); events.push(self.make_sse_event(&in_progress));
events events
} }
...@@ -249,7 +249,7 @@ impl ResponseStreamConverter { ...@@ -249,7 +249,7 @@ impl ResponseStreamConverter {
}), }),
}, },
); );
events.push(make_sse_event(&item_added)); events.push(self.make_sse_event(&item_added));
let part_added = ResponseStreamEvent::ResponseContentPartAdded( let part_added = ResponseStreamEvent::ResponseContentPartAdded(
ResponseContentPartAddedEvent { ResponseContentPartAddedEvent {
...@@ -264,7 +264,7 @@ impl ResponseStreamConverter { ...@@ -264,7 +264,7 @@ impl ResponseStreamConverter {
}), }),
}, },
); );
events.push(make_sse_event(&part_added)); events.push(self.make_sse_event(&part_added));
} }
// Emit text delta // Emit text delta
...@@ -278,7 +278,7 @@ impl ResponseStreamConverter { ...@@ -278,7 +278,7 @@ impl ResponseStreamConverter {
delta: content.to_string(), delta: content.to_string(),
logprobs: Some(vec![]), logprobs: Some(vec![]),
}); });
events.push(make_sse_event(&text_delta)); events.push(self.make_sse_event(&text_delta));
} }
// Handle tool call deltas // Handle tool call deltas
...@@ -332,7 +332,7 @@ impl ResponseStreamConverter { ...@@ -332,7 +332,7 @@ impl ResponseStreamConverter {
}), }),
}, },
); );
events.push(make_sse_event(&item_added)); events.push(self.make_sse_event(&item_added));
} }
self.function_call_items[tc_index] self.function_call_items[tc_index]
...@@ -355,7 +355,7 @@ impl ResponseStreamConverter { ...@@ -355,7 +355,7 @@ impl ResponseStreamConverter {
delta: args.clone(), delta: args.clone(),
}, },
); );
events.push(make_sse_event(&args_delta)); events.push(self.make_sse_event(&args_delta));
// Emit done + output_item.done immediately if the tool call // Emit done + output_item.done immediately if the tool call
// arrived complete in a single chunk (id + name + args all present). // arrived complete in a single chunk (id + name + args all present).
...@@ -382,7 +382,7 @@ impl ResponseStreamConverter { ...@@ -382,7 +382,7 @@ impl ResponseStreamConverter {
name: Some(fc_name.clone()), name: Some(fc_name.clone()),
}, },
); );
events.push(make_sse_event(&args_done)); events.push(self.make_sse_event(&args_done));
let item_done = ResponseStreamEvent::ResponseOutputItemDone( let item_done = ResponseStreamEvent::ResponseOutputItemDone(
ResponseOutputItemDoneEvent { ResponseOutputItemDoneEvent {
...@@ -398,7 +398,7 @@ impl ResponseStreamConverter { ...@@ -398,7 +398,7 @@ impl ResponseStreamConverter {
}), }),
}, },
); );
events.push(make_sse_event(&item_done)); events.push(self.make_sse_event(&item_done));
} }
} }
} }
...@@ -423,7 +423,7 @@ impl ResponseStreamConverter { ...@@ -423,7 +423,7 @@ impl ResponseStreamConverter {
text: self.accumulated_text.clone(), text: self.accumulated_text.clone(),
logprobs: Some(vec![]), logprobs: Some(vec![]),
}); });
events.push(make_sse_event(&text_done)); events.push(self.make_sse_event(&text_done));
let part_done = let part_done =
ResponseStreamEvent::ResponseContentPartDone(ResponseContentPartDoneEvent { ResponseStreamEvent::ResponseContentPartDone(ResponseContentPartDoneEvent {
...@@ -437,7 +437,7 @@ impl ResponseStreamConverter { ...@@ -437,7 +437,7 @@ impl ResponseStreamConverter {
logprobs: Some(vec![]), logprobs: Some(vec![]),
}), }),
}); });
events.push(make_sse_event(&part_done)); events.push(self.make_sse_event(&part_done));
let item_done = let item_done =
ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent { ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
...@@ -455,7 +455,7 @@ impl ResponseStreamConverter { ...@@ -455,7 +455,7 @@ impl ResponseStreamConverter {
status: OutputStatus::Completed, status: OutputStatus::Completed,
}), }),
}); });
events.push(make_sse_event(&item_done)); events.push(self.make_sse_event(&item_done));
} }
// Close any function call items not already done inline // Close any function call items not already done inline
...@@ -483,7 +483,7 @@ impl ResponseStreamConverter { ...@@ -483,7 +483,7 @@ impl ResponseStreamConverter {
name: Some(fc_name.clone()), name: Some(fc_name.clone()),
}, },
); );
events.push(make_sse_event(&args_done)); events.push(self.make_sse_event(&args_done));
let item_done = let item_done =
ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent { ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
...@@ -498,7 +498,7 @@ impl ResponseStreamConverter { ...@@ -498,7 +498,7 @@ impl ResponseStreamConverter {
status: Some(OutputStatus::Completed), status: Some(OutputStatus::Completed),
}), }),
}); });
events.push(make_sse_event(&item_done)); events.push(self.make_sse_event(&item_done));
} }
// Build the final output vector from accumulated state // Build the final output vector from accumulated state
...@@ -534,7 +534,7 @@ impl ResponseStreamConverter { ...@@ -534,7 +534,7 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(), sequence_number: self.next_seq(),
response: self.make_response(Status::Completed, output), response: self.make_response(Status::Completed, output),
}); });
events.push(make_sse_event(&completed)); events.push(self.make_sse_event(&completed));
events events
} }
...@@ -547,16 +547,33 @@ impl ResponseStreamConverter { ...@@ -547,16 +547,33 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(), sequence_number: self.next_seq(),
response: self.make_response(Status::Failed, vec![]), response: self.make_response(Status::Failed, vec![]),
}); });
events.push(make_sse_event(&failed)); events.push(self.make_sse_event(&failed));
events events
} }
} }
fn make_sse_event(event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> { impl ResponseStreamConverter {
let event_type = get_event_type(event); /// Serialize a stream event, patching any embedded `response` object to
let data = serde_json::to_string(event)?; /// satisfy the OpenResponses schema. Takes `&self` so spec-required
Ok(Event::default().event(event_type).data(data)) /// sampling params can be sourced from the originating request via
/// `self.params` rather than hardcoded at each emit site.
fn make_sse_event(&self, event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> {
let event_type = get_event_type(event);
let mut value = serde_json::to_value(event)?;
if let serde_json::Value::Object(ref mut obj) = value
&& let Some(serde_json::Value::Object(inner)) = obj.get_mut("response")
{
super::patch_response_for_spec(
inner,
self.params.presence_penalty.unwrap_or(0.0),
self.params.frequency_penalty.unwrap_or(0.0),
self.params.store.unwrap_or(false),
);
}
let data = serde_json::to_string(&value)?;
Ok(Event::default().event(event_type).data(data))
}
} }
fn get_event_type(event: &ResponseStreamEvent) -> &'static str { fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
...@@ -677,22 +694,7 @@ mod tests { ...@@ -677,22 +694,7 @@ mod tests {
}; };
fn default_params() -> ResponseParams { fn default_params() -> ResponseParams {
ResponseParams { ResponseParams::default()
model: None,
temperature: None,
top_p: None,
max_output_tokens: None,
parallel_tool_calls: None,
store: None,
tools: None,
tool_choice: None,
instructions: None,
reasoning: None,
text: None,
service_tier: None,
include: None,
truncation: None,
}
} }
fn tool_call_chunk( fn tool_call_chunk(
......
...@@ -35,6 +35,13 @@ use serde::{Deserialize, Serialize}; ...@@ -35,6 +35,13 @@ use serde::{Deserialize, Serialize};
// shadow their upstream counterparts where no dual-side conflict exists. // shadow their upstream counterparts where no dual-side conflict exists.
pub use async_openai::types::responses::*; pub use async_openai::types::responses::*;
// Re-export upstream's pre-shadow `InputContent` under an explicit alias.
// Needed because `FunctionCallOutput::Content` and `EasyInputContent::ContentList`
// are non-owned upstream types that carry upstream's original `InputContent`
// inline, so downstream consumers occasionally need to name it alongside the
// Dynamo-owned shadow defined further down this module.
pub use async_openai::types::responses::InputContent as UpstreamInputContent;
// Re-export from parent module for backward compat. // Re-export from parent module for backward compat.
pub use crate::types::ImageDetail; pub use crate::types::ImageDetail;
pub use crate::types::ReasoningEffort; pub use crate::types::ReasoningEffort;
...@@ -51,6 +58,40 @@ pub type ResponseStream = std::pin::Pin< ...@@ -51,6 +58,40 @@ pub type ResponseStream = std::pin::Pin<
Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>, Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
>; >;
/// Fields on upstream `Response` that the OpenResponses spec requires as
/// `T | null` but async-openai declares as `Option<T>` with
/// `skip_serializing_if = Option::is_none` — meaning `None` disappears from
/// the wire shape, where the spec wants an explicit `null`.
///
/// Colocated here (next to the upstream `Response` re-export) rather than in
/// `lib/llm/src/protocols/openai/responses/mod.rs` so that when upstream's
/// `Response` gains a new nullable-required field, the reviewer editing this
/// module is looking directly at the authoritative list. Keep sorted
/// alphabetically; entries must match serde field names on `Response` exactly.
///
/// Any field we unconditionally populate ourselves during response
/// construction (e.g. `metadata`, `parallel_tool_calls`, `temperature`,
/// `text`, `tool_choice`, `tools`, `top_p`, `top_logprobs`, `truncation`,
/// `service_tier`, `background`) is deliberately absent — it's always
/// present on the wire, so listing it here would be noise.
pub const SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS: &[&str] = &[
"billing",
"completed_at",
"conversation",
"error",
"incomplete_details",
"instructions",
"max_output_tokens",
"max_tool_calls",
"previous_response_id",
"prompt",
"prompt_cache_key",
"prompt_cache_retention",
"reasoning",
"safety_identifier",
"usage",
];
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Input-side assistant message (relaxed vs upstream OutputMessage) // Input-side assistant message (relaxed vs upstream OutputMessage)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
...@@ -68,6 +109,19 @@ where ...@@ -68,6 +109,19 @@ where
Option::<Vec<T>>::deserialize(deserializer).map(Option::unwrap_or_default) Option::<Vec<T>>::deserialize(deserializer).map(Option::unwrap_or_default)
} }
/// Deserialize `null` or a missing field as `T::default()`. Scalar counterpart
/// to `deserialize_null_as_empty_vec` — plain `#[serde(default)]` rejects
/// explicit `null` because serde tries to deserialize the null into `T` and
/// fails. Real clients emit `null` for unset enum-ish fields (e.g. OpenAI
/// Agents SDK sending `"detail": null` on `input_image` parts).
fn deserialize_null_as_default<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
T: Deserialize<'de> + Default,
D: serde::Deserializer<'de>,
{
Option::<T>::deserialize(deserializer).map(Option::unwrap_or_default)
}
/// Relaxed counterpart to upstream `OutputTextContent` for input-side content. /// Relaxed counterpart to upstream `OutputTextContent` for input-side content.
/// `annotations` tolerates both missing and explicit `null`; upstream requires /// `annotations` tolerates both missing and explicit `null`; upstream requires
/// it to be a present non-null array. /// it to be a present non-null array.
...@@ -107,6 +161,45 @@ pub struct InputOutputMessage { ...@@ -107,6 +161,45 @@ pub struct InputOutputMessage {
pub status: Option<OutputStatus>, pub status: Option<OutputStatus>,
} }
// ---------------------------------------------------------------------------
// Input-side image / content / message (shadow upstream, relaxed shapes)
// ---------------------------------------------------------------------------
/// Relaxed counterpart to upstream `InputImageContent`. `detail` defaults to
/// `ImageDetail::Auto` when the client omits it — OpenAI's hosted API and the
/// OpenResponses spec both accept this shape, but upstream's struct marks
/// `detail` as required.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputImageContent {
#[serde(default, deserialize_with = "deserialize_null_as_default")]
pub detail: ImageDetail,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub file_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub image_url: Option<String>,
}
/// Parts of an input message: text, image, or file. Mirrors upstream
/// `InputContent` but routes `InputImage` through the Dynamo-owned relaxed
/// `InputImageContent` above.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InputContent {
InputText(InputTextContent),
InputImage(InputImageContent),
InputFile(InputFileContent),
}
/// User / system / developer input message. Shadows upstream `InputMessage`
/// so we can route through the Dynamo-owned `InputContent` chain.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
pub struct InputMessage {
pub content: Vec<InputContent>,
pub role: InputRole,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub status: Option<OutputStatus>,
}
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
// Input-side Item / Message / InputItem / InputParam (shadow upstream) // Input-side Item / Message / InputItem / InputParam (shadow upstream)
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
...@@ -271,6 +364,33 @@ mod tests { ...@@ -271,6 +364,33 @@ mod tests {
} }
} }
#[test]
fn input_image_without_detail_defaults_to_auto() {
let json = serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/cat.jpg"
});
let content: InputContent = serde_json::from_value(json).unwrap();
match content {
InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
other => panic!("expected InputImage, got {other:?}"),
}
}
#[test]
fn input_image_with_explicit_null_detail_defaults_to_auto() {
let json = serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/cat.jpg",
"detail": null
});
let content: InputContent = serde_json::from_value(json).unwrap();
match content {
InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
other => panic!("expected InputImage, got {other:?}"),
}
}
#[test] #[test]
fn assistant_message_without_content_field_deserializes() { fn assistant_message_without_content_field_deserializes() {
// Bare assistant shell — no `content` field at all. Seen in real // Bare assistant shell — no `content` field at all. Seen in real
......
...@@ -232,6 +232,7 @@ markers = [ ...@@ -232,6 +232,7 @@ markers = [
"post_merge: marks tests to run after merge", "post_merge: marks tests to run after merge",
"parallel: marks tests that can run in parallel with pytest-xdist", "parallel: marks tests that can run in parallel with pytest-xdist",
"nightly: marks tests to run nightly", "nightly: marks tests to run nightly",
"frontend_api_surface_compliance: marks tests that validate Dynamo's HTTP API surface (Responses/Anthropic wire shape, tool-call routing) against upstream compliance harnesses",
"weekly: marks tests to run weekly", "weekly: marks tests to run weekly",
"release: marks tests to run on release pipelines", "release: marks tests to run on release pipelines",
"gpu_0: marks tests that don't require GPU", "gpu_0: marks tests that don't require GPU",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment