Unverified Commit 2a95ef63 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(responses): align wire shape with OpenResponses spec + add compliance CI (#8283)


Signed-off-by: default avatarAnant Sharma <anants@nvidia.com>
Co-authored-by: default avatarAnant Sharma <anants@nvidia.com>
parent 4410a2c5
......@@ -1595,6 +1595,18 @@ async fn responses(
service_tier: request.inner.service_tier,
include: request.inner.include.clone(),
truncation: request.inner.truncation,
// Upstream `CreateResponse` doesn't carry these yet; plumbed through so
// the response serializer can default to 0.0 without hardcoding at the
// build site. When upstream (or our shadow) adds the fields, sourcing
// from the request becomes a one-line change here.
presence_penalty: None,
frequency_penalty: None,
// Pass-through metadata — accepted on the request, echoed back on the
// response so the caller can confirm receipt. Dynamo doesn't act on
// these; see `validate_response_unsupported_fields` for rationale.
prompt_cache_key: request.inner.prompt_cache_key.clone(),
prompt_cache_retention: request.inner.prompt_cache_retention,
safety_identifier: request.inner.safety_identifier.clone(),
};
let request_id = request.id().to_string();
let (orig_request, context) = request.into_parts();
......@@ -1830,6 +1842,24 @@ pub fn validate_response_unsupported_fields(
VALIDATION_PREFIX.to_string() + "`prompt` is not supported.",
));
}
// Reject directive fields that change semantics if silently dropped.
// `max_tool_calls` is a hard cap on tool invocations — accepting it
// without enforcement would let a caller send `max_tool_calls: 5` and
// see `max_tool_calls: null` in the response, assuming their limit was
// honored. Fail loud until real enforcement lands.
//
// Pass-through metadata fields (`prompt_cache_key`,
// `prompt_cache_retention`, `safety_identifier`) are deliberately
// accepted and echoed back on the response instead. They're hints for
// OpenAI's caching/moderation backends, not directives — Codex sends
// `prompt_cache_key` on every request — and the OpenResponses spec
// includes them on the response body, so echoing the caller's value
// makes receipt observable without needing a real backend.
if inner.max_tool_calls.is_some() {
return Some(ErrorMessage::not_implemented_error(
VALIDATION_PREFIX.to_string() + "`max_tool_calls` is not supported.",
));
}
None
}
......@@ -2714,6 +2744,7 @@ mod tests {
})
}),
),
("max_tool_calls", Box::new(|r| r.max_tool_calls = Some(5))),
];
for (field, set_field) in unsupported_cases {
......@@ -2724,6 +2755,43 @@ mod tests {
}
}
/// Pass-through metadata fields (`prompt_cache_key`,
/// `prompt_cache_retention`, `safety_identifier`) are accepted at the
/// validation layer; the response serializer echoes them back so the
/// caller can confirm receipt. Codex sends `prompt_cache_key` on every
/// request — rejecting it broke `codex exec` end-to-end.
#[test]
fn test_validate_unsupported_fields_accepts_passthrough_metadata() {
#[allow(clippy::type_complexity)]
let passthrough_cases: Vec<(&str, Box<dyn FnOnce(&mut CreateResponse)>)> = vec![
(
"prompt_cache_key",
Box::new(|r| r.prompt_cache_key = Some("ck-1".into())),
),
(
"prompt_cache_retention",
Box::new(|r| {
r.prompt_cache_retention =
Some(dynamo_protocols::types::responses::PromptCacheRetention::InMemory)
}),
),
(
"safety_identifier",
Box::new(|r| r.safety_identifier = Some("user-hash".into())),
),
];
for (field, set_field) in passthrough_cases {
let mut req = make_base_request();
(set_field)(&mut req.inner);
let result = validate_response_unsupported_fields(&req);
assert!(
result.is_none(),
"Expected `{field}` to be accepted as pass-through metadata"
);
}
}
#[test]
fn test_validate_chat_completion_required_fields_empty_messages() {
let request = NvCreateChatCompletionRequest {
......
......@@ -9,9 +9,10 @@ use dynamo_protocols::types::responses::{
AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem,
InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item,
MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent,
OutputTokenDetails, Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage,
Role as ResponseRole, ServiceTier, Status, SummaryPart, SummaryTextContent,
TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam, Truncation,
OutputTokenDetails, PromptCacheRetention, Reasoning, ReasoningItem, Response,
ResponseTextParam, ResponseUsage, Role as ResponseRole, ServiceTier, Status, SummaryPart,
SummaryTextContent, TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam,
Truncation,
};
use dynamo_protocols::types::{
ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
......@@ -63,7 +64,7 @@ pub struct NvCreateResponse {
pub nvext: Option<NvExt>,
}
#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
#[derive(ToSchema, Deserialize, Validate, Debug, Clone)]
pub struct NvResponse {
/// Flattened Response fields (includes upstream + extended spec fields).
#[serde(flatten)]
......@@ -73,6 +74,78 @@ pub struct NvResponse {
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
/// OpenResponses spec requires these as non-null scalars on every response,
/// but async-openai's `Response` doesn't model them. Populated from the
/// originating request. Surfaced during serialization (see `Serialize`
/// impl below); not persisted as top-level fields on the inner struct.
#[serde(default)]
pub presence_penalty: f32,
#[serde(default)]
pub frequency_penalty: f32,
#[serde(default)]
pub store: bool,
}
/// Patch an already-serialized `Response` JSON object to match the
/// OpenResponses spec. Applied both to one-shot `NvResponse` serialization
/// and to every `Response` embedded inside a streaming event payload.
///
/// Reconciles two spec gaps between upstream async-openai's `Response` and
/// the OpenResponses spec:
///
/// 1. Fields the spec requires as `T | null` that upstream marks
/// `Option<T>` with `skip_serializing_if = Option::is_none`. These are
/// silently dropped when None; the spec wants them present as null.
/// 2. Fields the spec requires (`presence_penalty`, `frequency_penalty`,
/// `store`) that are absent from upstream `Response` entirely.
///
/// Rather than fork the upstream output chain (which would cascade into
/// `OutputItem`, streaming events, and a long tail of sub-types, per
/// `lib/protocols/CLAUDE.md`), we patch the serialized JSON. Adds a
/// single `serde_json::to_value` round-trip per response, which is
/// negligible next to tokenization/inference cost.
pub(crate) fn patch_response_for_spec(
obj: &mut serde_json::Map<String, serde_json::Value>,
presence_penalty: f32,
frequency_penalty: f32,
store: bool,
) {
for key in dynamo_protocols::types::responses::SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS {
obj.entry(*key).or_insert(serde_json::Value::Null);
}
obj.insert(
"presence_penalty".into(),
serde_json::json!(presence_penalty),
);
obj.insert(
"frequency_penalty".into(),
serde_json::json!(frequency_penalty),
);
obj.insert("store".into(), serde_json::json!(store));
}
impl Serialize for NvResponse {
fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
let mut value = serde_json::to_value(&self.inner).map_err(serde::ser::Error::custom)?;
let serde_json::Value::Object(obj) = &mut value else {
return value.serialize(serializer);
};
patch_response_for_spec(
obj,
self.presence_penalty,
self.frequency_penalty,
self.store,
);
if let Some(nvext) = &self.nvext {
obj.insert("nvext".into(), nvext.clone());
}
value.serialize(serializer)
}
}
/// Implements `NvExtProvider` for `NvCreateResponse`,
......@@ -244,6 +317,24 @@ fn convert_input_content_to_text(content: &[InputContent]) -> String {
.join("")
}
/// Counterpart to `convert_input_content_to_text` for upstream's
/// `InputContent`. Upstream's enum appears inside `FunctionCallOutput::Content`
/// and `EasyInputContent::ContentList`, neither of which is Dynamo-owned, so
/// payloads deserialized through those paths land as upstream variants.
fn convert_upstream_input_content_to_text(
content: &[dynamo_protocols::types::responses::UpstreamInputContent],
) -> String {
use dynamo_protocols::types::responses::UpstreamInputContent;
content
.iter()
.filter_map(|p| match p {
UpstreamInputContent::InputText(t) => Some(t.text.as_str()),
_ => None,
})
.collect::<Vec<_>>()
.join("")
}
/// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall,
/// Reasoning, assistant EasyMessage). Chat Completions represents an assistant
/// turn as a single message carrying `content`, `tool_calls`, and
......@@ -406,7 +497,9 @@ fn convert_input_items_to_messages(
std::mem::take(&mut pending).flush_into(&mut messages);
let output_text = match &fco.output {
FunctionCallOutput::Text(text) => text.clone(),
FunctionCallOutput::Content(parts) => convert_input_content_to_text(parts),
FunctionCallOutput::Content(parts) => {
convert_upstream_input_content_to_text(parts)
}
};
messages.push(ChatCompletionRequestMessage::Tool(
ChatCompletionRequestToolMessage {
......@@ -444,7 +537,7 @@ fn convert_input_items_to_messages(
text.clone()
}
dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => {
convert_input_content_to_text(parts)
convert_upstream_input_content_to_text(parts)
}
};
match easy.role {
......@@ -740,6 +833,22 @@ pub struct ResponseParams {
pub service_tier: Option<ServiceTier>,
pub include: Option<Vec<IncludeEnum>>,
pub truncation: Option<Truncation>,
/// OpenResponses spec requires these fields on the response body. Upstream
/// `CreateResponse` doesn't model them on the request yet, so for now they
/// pass through as `None`; the response serializer defaults to 0.0 (the
/// effective sglang default). Wired through `ResponseParams` anyway so
/// that when upstream relaxes or we shadow `CreateResponse`, threading a
/// real value becomes a one-line change at the request-extraction site.
pub presence_penalty: Option<f32>,
pub frequency_penalty: Option<f32>,
/// Pass-through metadata fields. Codex and other clients send these as
/// hints for OpenAI's caching/moderation backends; Dynamo doesn't act on
/// them, but the spec includes them on the response body so we echo back
/// what the caller sent rather than silently dropping. Echoing makes
/// receipt observable to the client without needing a real backend.
pub prompt_cache_key: Option<String>,
pub prompt_cache_retention: Option<PromptCacheRetention>,
pub safety_identifier: Option<String>,
}
/// Normalize tools so that `FunctionTool.strict` is always set.
......@@ -880,13 +989,13 @@ pub fn chat_completion_to_response(
.include
.as_ref()
.is_some_and(|inc| inc.contains(&IncludeEnum::MessageOutputTextLogprobs));
if !keep_logprobs {
for item in &mut output {
if let OutputItem::Message(msg) = item {
for content in &mut msg.content {
if let OutputMessageContent::OutputText(text) = content {
text.logprobs = None;
}
if let OutputMessageContent::OutputText(text) = content
&& (!keep_logprobs || text.logprobs.is_none())
{
text.logprobs = Some(Vec::new());
}
}
}
......@@ -936,10 +1045,10 @@ pub fn chat_completion_to_response(
max_output_tokens: params.max_output_tokens,
previous_response_id: api_context.and_then(|ctx| ctx.previous_response_id.clone()),
prompt: None,
prompt_cache_key: None,
prompt_cache_retention: None,
prompt_cache_key: params.prompt_cache_key.clone(),
prompt_cache_retention: params.prompt_cache_retention,
reasoning: params.reasoning.clone(),
safety_identifier: None,
safety_identifier: params.safety_identifier.clone(),
service_tier: Some(params.service_tier.unwrap_or(ServiceTier::Auto)),
top_logprobs: Some(0),
usage: chat_resp.usage.map(|u| ResponseUsage {
......@@ -964,6 +1073,9 @@ pub fn chat_completion_to_response(
Ok(NvResponse {
inner: response,
nvext,
presence_penalty: params.presence_penalty.unwrap_or(0.0),
frequency_penalty: params.frequency_penalty.unwrap_or(0.0),
store: params.store.unwrap_or(false),
})
}
......@@ -2475,7 +2587,10 @@ thinking
}
#[test]
fn test_include_logprobs_stripped_by_default() {
fn test_include_logprobs_empty_by_default() {
// OpenResponses schema requires `logprobs` to be an array. When the
// caller did not request them via `include`, emit an empty array
// rather than null.
let chat_resp = make_chat_resp_with_text("hello");
let params = ResponseParams::default();
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
......@@ -2484,9 +2599,10 @@ thinking
if let OutputItem::Message(msg) = item {
for content in &msg.content {
if let OutputMessageContent::OutputText(t) = content {
assert!(
t.logprobs.is_none(),
"logprobs should be stripped by default"
assert_eq!(
t.logprobs.as_deref(),
Some(&[][..]),
"logprobs should be an empty array by default"
);
}
}
......@@ -2543,11 +2659,35 @@ thinking
assert_eq!(resp.inner.truncation, Some(Truncation::Disabled));
}
/// Validate the JSON wire shape of NvResponse.
///
/// The migration to upstream async-openai v0.34 removed fields that were
/// incorrectly present on our old local Response type (they belong on the
/// request, not the response, per the OpenAI Responses API spec).
/// Pass-through metadata fields the OpenResponses spec includes on the
/// response body. Codex sends `prompt_cache_key` on every request; we
/// echo it back so the caller can confirm receipt without enforcing any
/// caching semantics. Same pattern for `prompt_cache_retention` and
/// `safety_identifier`.
#[test]
fn test_response_echoes_passthrough_metadata() {
let chat_resp = make_chat_resp_with_text("hello");
let params = ResponseParams {
prompt_cache_key: Some("cache-key-codex-1".into()),
prompt_cache_retention: Some(PromptCacheRetention::InMemory),
safety_identifier: Some("user-abc".into()),
..Default::default()
};
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
assert_eq!(
resp.inner.prompt_cache_key.as_deref(),
Some("cache-key-codex-1")
);
assert_eq!(
resp.inner.prompt_cache_retention,
Some(PromptCacheRetention::InMemory)
);
assert_eq!(resp.inner.safety_identifier.as_deref(), Some("user-abc"));
}
/// Validate the JSON wire shape of NvResponse matches the OpenResponses
/// spec: required scalars always present, nullable-required fields
/// emitted as `null` when None.
#[test]
fn test_response_wire_format_shape() {
let chat_resp = make_chat_resp_with_text("hello");
......@@ -2555,14 +2695,14 @@ thinking
let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
let json = serde_json::to_value(&resp).unwrap();
// Fields that were on our old local type but are NOT in the OpenAI
// Responses API spec -- they are request-level, not response-level.
assert!(json.get("frequency_penalty").is_none());
assert!(json.get("presence_penalty").is_none());
assert!(json.get("store").is_none());
assert!(json.get("max_tool_calls").is_none());
// Required scalars the spec mandates on every response. Upstream
// async-openai's Response struct doesn't model these; NvResponse's
// custom serializer injects them.
assert_eq!(json["frequency_penalty"], 0.0);
assert_eq!(json["presence_penalty"], 0.0);
assert_eq!(json["store"], false);
// Fields that should be present with expected values
// Other required fields with expected values
assert_eq!(json["object"], "response");
assert_eq!(json["status"], "completed");
assert_eq!(json["metadata"], serde_json::json!({}));
......@@ -2570,12 +2710,25 @@ thinking
assert!(json["output"][0].get("id").is_some());
assert!(json["output"][0].get("status").is_some());
// Optional fields with None should be omitted (upstream uses skip_serializing_if)
assert!(json.get("error").is_none());
assert!(json.get("incomplete_details").is_none());
assert!(json.get("billing").is_none());
assert!(json.get("conversation").is_none());
assert!(json.get("safety_identifier").is_none());
// Nullable-required fields must be present as null (not missing).
for key in [
"error",
"incomplete_details",
"billing",
"conversation",
"safety_identifier",
"max_tool_calls",
"instructions",
"previous_response_id",
"prompt_cache_key",
"reasoning",
] {
assert_eq!(
json.get(key),
Some(&serde_json::Value::Null),
"expected {key} to be present as null"
);
}
// nvext should be omitted when None
assert!(json.get("nvext").is_none());
......
......@@ -155,10 +155,10 @@ impl ResponseStreamConverter {
.as_ref()
.and_then(|ctx| ctx.previous_response_id.clone()),
prompt: None,
prompt_cache_key: None,
prompt_cache_retention: None,
prompt_cache_key: self.params.prompt_cache_key.clone(),
prompt_cache_retention: self.params.prompt_cache_retention,
reasoning: self.params.reasoning.clone(),
safety_identifier: None,
safety_identifier: self.params.safety_identifier.clone(),
service_tier: Some(self.params.service_tier.unwrap_or(ServiceTier::Auto)),
top_logprobs: Some(0),
usage: self.usage.clone(),
......@@ -173,13 +173,13 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(),
response: self.make_response(Status::InProgress, vec![]),
});
events.push(make_sse_event(&created));
events.push(self.make_sse_event(&created));
let in_progress = ResponseStreamEvent::ResponseInProgress(ResponseInProgressEvent {
sequence_number: self.next_seq(),
response: self.make_response(Status::InProgress, vec![]),
});
events.push(make_sse_event(&in_progress));
events.push(self.make_sse_event(&in_progress));
events
}
......@@ -249,7 +249,7 @@ impl ResponseStreamConverter {
}),
},
);
events.push(make_sse_event(&item_added));
events.push(self.make_sse_event(&item_added));
let part_added = ResponseStreamEvent::ResponseContentPartAdded(
ResponseContentPartAddedEvent {
......@@ -264,7 +264,7 @@ impl ResponseStreamConverter {
}),
},
);
events.push(make_sse_event(&part_added));
events.push(self.make_sse_event(&part_added));
}
// Emit text delta
......@@ -278,7 +278,7 @@ impl ResponseStreamConverter {
delta: content.to_string(),
logprobs: Some(vec![]),
});
events.push(make_sse_event(&text_delta));
events.push(self.make_sse_event(&text_delta));
}
// Handle tool call deltas
......@@ -332,7 +332,7 @@ impl ResponseStreamConverter {
}),
},
);
events.push(make_sse_event(&item_added));
events.push(self.make_sse_event(&item_added));
}
self.function_call_items[tc_index]
......@@ -355,7 +355,7 @@ impl ResponseStreamConverter {
delta: args.clone(),
},
);
events.push(make_sse_event(&args_delta));
events.push(self.make_sse_event(&args_delta));
// Emit done + output_item.done immediately if the tool call
// arrived complete in a single chunk (id + name + args all present).
......@@ -382,7 +382,7 @@ impl ResponseStreamConverter {
name: Some(fc_name.clone()),
},
);
events.push(make_sse_event(&args_done));
events.push(self.make_sse_event(&args_done));
let item_done = ResponseStreamEvent::ResponseOutputItemDone(
ResponseOutputItemDoneEvent {
......@@ -398,7 +398,7 @@ impl ResponseStreamConverter {
}),
},
);
events.push(make_sse_event(&item_done));
events.push(self.make_sse_event(&item_done));
}
}
}
......@@ -423,7 +423,7 @@ impl ResponseStreamConverter {
text: self.accumulated_text.clone(),
logprobs: Some(vec![]),
});
events.push(make_sse_event(&text_done));
events.push(self.make_sse_event(&text_done));
let part_done =
ResponseStreamEvent::ResponseContentPartDone(ResponseContentPartDoneEvent {
......@@ -437,7 +437,7 @@ impl ResponseStreamConverter {
logprobs: Some(vec![]),
}),
});
events.push(make_sse_event(&part_done));
events.push(self.make_sse_event(&part_done));
let item_done =
ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
......@@ -455,7 +455,7 @@ impl ResponseStreamConverter {
status: OutputStatus::Completed,
}),
});
events.push(make_sse_event(&item_done));
events.push(self.make_sse_event(&item_done));
}
// Close any function call items not already done inline
......@@ -483,7 +483,7 @@ impl ResponseStreamConverter {
name: Some(fc_name.clone()),
},
);
events.push(make_sse_event(&args_done));
events.push(self.make_sse_event(&args_done));
let item_done =
ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
......@@ -498,7 +498,7 @@ impl ResponseStreamConverter {
status: Some(OutputStatus::Completed),
}),
});
events.push(make_sse_event(&item_done));
events.push(self.make_sse_event(&item_done));
}
// Build the final output vector from accumulated state
......@@ -534,7 +534,7 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(),
response: self.make_response(Status::Completed, output),
});
events.push(make_sse_event(&completed));
events.push(self.make_sse_event(&completed));
events
}
......@@ -547,16 +547,33 @@ impl ResponseStreamConverter {
sequence_number: self.next_seq(),
response: self.make_response(Status::Failed, vec![]),
});
events.push(make_sse_event(&failed));
events.push(self.make_sse_event(&failed));
events
}
}
fn make_sse_event(event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> {
impl ResponseStreamConverter {
/// Serialize a stream event, patching any embedded `response` object to
/// satisfy the OpenResponses schema. Takes `&self` so spec-required
/// sampling params can be sourced from the originating request via
/// `self.params` rather than hardcoded at each emit site.
fn make_sse_event(&self, event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> {
let event_type = get_event_type(event);
let data = serde_json::to_string(event)?;
let mut value = serde_json::to_value(event)?;
if let serde_json::Value::Object(ref mut obj) = value
&& let Some(serde_json::Value::Object(inner)) = obj.get_mut("response")
{
super::patch_response_for_spec(
inner,
self.params.presence_penalty.unwrap_or(0.0),
self.params.frequency_penalty.unwrap_or(0.0),
self.params.store.unwrap_or(false),
);
}
let data = serde_json::to_string(&value)?;
Ok(Event::default().event(event_type).data(data))
}
}
fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
......@@ -677,22 +694,7 @@ mod tests {
};
fn default_params() -> ResponseParams {
ResponseParams {
model: None,
temperature: None,
top_p: None,
max_output_tokens: None,
parallel_tool_calls: None,
store: None,
tools: None,
tool_choice: None,
instructions: None,
reasoning: None,
text: None,
service_tier: None,
include: None,
truncation: None,
}
ResponseParams::default()
}
fn tool_call_chunk(
......
......@@ -35,6 +35,13 @@ use serde::{Deserialize, Serialize};
// shadow their upstream counterparts where no dual-side conflict exists.
pub use async_openai::types::responses::*;
// Re-export upstream's pre-shadow `InputContent` under an explicit alias.
// Needed because `FunctionCallOutput::Content` and `EasyInputContent::ContentList`
// are non-owned upstream types that carry upstream's original `InputContent`
// inline, so downstream consumers occasionally need to name it alongside the
// Dynamo-owned shadow defined further down this module.
pub use async_openai::types::responses::InputContent as UpstreamInputContent;
// Re-export from parent module for backward compat.
pub use crate::types::ImageDetail;
pub use crate::types::ReasoningEffort;
......@@ -51,6 +58,40 @@ pub type ResponseStream = std::pin::Pin<
Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
>;
/// Fields on upstream `Response` that the OpenResponses spec requires as
/// `T | null` but async-openai declares as `Option<T>` with
/// `skip_serializing_if = Option::is_none` — meaning `None` disappears from
/// the wire shape, where the spec wants an explicit `null`.
///
/// Colocated here (next to the upstream `Response` re-export) rather than in
/// `lib/llm/src/protocols/openai/responses/mod.rs` so that when upstream's
/// `Response` gains a new nullable-required field, the reviewer editing this
/// module is looking directly at the authoritative list. Keep sorted
/// alphabetically; entries must match serde field names on `Response` exactly.
///
/// Any field we unconditionally populate ourselves during response
/// construction (e.g. `metadata`, `parallel_tool_calls`, `temperature`,
/// `text`, `tool_choice`, `tools`, `top_p`, `top_logprobs`, `truncation`,
/// `service_tier`, `background`) is deliberately absent — it's always
/// present on the wire, so listing it here would be noise.
pub const SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS: &[&str] = &[
"billing",
"completed_at",
"conversation",
"error",
"incomplete_details",
"instructions",
"max_output_tokens",
"max_tool_calls",
"previous_response_id",
"prompt",
"prompt_cache_key",
"prompt_cache_retention",
"reasoning",
"safety_identifier",
"usage",
];
// ---------------------------------------------------------------------------
// Input-side assistant message (relaxed vs upstream OutputMessage)
// ---------------------------------------------------------------------------
......@@ -68,6 +109,19 @@ where
Option::<Vec<T>>::deserialize(deserializer).map(Option::unwrap_or_default)
}
/// Deserialize `null` or a missing field as `T::default()`. Scalar counterpart
/// to `deserialize_null_as_empty_vec` — plain `#[serde(default)]` rejects
/// explicit `null` because serde tries to deserialize the null into `T` and
/// fails. Real clients emit `null` for unset enum-ish fields (e.g. OpenAI
/// Agents SDK sending `"detail": null` on `input_image` parts).
fn deserialize_null_as_default<'de, T, D>(deserializer: D) -> Result<T, D::Error>
where
T: Deserialize<'de> + Default,
D: serde::Deserializer<'de>,
{
Option::<T>::deserialize(deserializer).map(Option::unwrap_or_default)
}
/// Relaxed counterpart to upstream `OutputTextContent` for input-side content.
/// `annotations` tolerates both missing and explicit `null`; upstream requires
/// it to be a present non-null array.
......@@ -107,6 +161,45 @@ pub struct InputOutputMessage {
pub status: Option<OutputStatus>,
}
// ---------------------------------------------------------------------------
// Input-side image / content / message (shadow upstream, relaxed shapes)
// ---------------------------------------------------------------------------
/// Relaxed counterpart to upstream `InputImageContent`. `detail` defaults to
/// `ImageDetail::Auto` when the client omits it — OpenAI's hosted API and the
/// OpenResponses spec both accept this shape, but upstream's struct marks
/// `detail` as required.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputImageContent {
#[serde(default, deserialize_with = "deserialize_null_as_default")]
pub detail: ImageDetail,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub file_id: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub image_url: Option<String>,
}
/// Parts of an input message: text, image, or file. Mirrors upstream
/// `InputContent` but routes `InputImage` through the Dynamo-owned relaxed
/// `InputImageContent` above.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")]
pub enum InputContent {
InputText(InputTextContent),
InputImage(InputImageContent),
InputFile(InputFileContent),
}
/// User / system / developer input message. Shadows upstream `InputMessage`
/// so we can route through the Dynamo-owned `InputContent` chain.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
pub struct InputMessage {
pub content: Vec<InputContent>,
pub role: InputRole,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub status: Option<OutputStatus>,
}
// ---------------------------------------------------------------------------
// Input-side Item / Message / InputItem / InputParam (shadow upstream)
// ---------------------------------------------------------------------------
......@@ -271,6 +364,33 @@ mod tests {
}
}
#[test]
fn input_image_without_detail_defaults_to_auto() {
let json = serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/cat.jpg"
});
let content: InputContent = serde_json::from_value(json).unwrap();
match content {
InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
other => panic!("expected InputImage, got {other:?}"),
}
}
#[test]
fn input_image_with_explicit_null_detail_defaults_to_auto() {
let json = serde_json::json!({
"type": "input_image",
"image_url": "https://example.com/cat.jpg",
"detail": null
});
let content: InputContent = serde_json::from_value(json).unwrap();
match content {
InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
other => panic!("expected InputImage, got {other:?}"),
}
}
#[test]
fn assistant_message_without_content_field_deserializes() {
// Bare assistant shell — no `content` field at all. Seen in real
......
......@@ -232,6 +232,7 @@ markers = [
"post_merge: marks tests to run after merge",
"parallel: marks tests that can run in parallel with pytest-xdist",
"nightly: marks tests to run nightly",
"frontend_api_surface_compliance: marks tests that validate Dynamo's HTTP API surface (Responses/Anthropic wire shape, tool-call routing) against upstream compliance harnesses",
"weekly: marks tests to run weekly",
"release: marks tests to run on release pipelines",
"gpu_0: marks tests that don't require GPU",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment