fix(responses): align wire shape with OpenResponses spec + add compliance CI (#8283)

Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Anant Sharma <anants@nvidia.com>

fix(responses): align wire shape with OpenResponses spec + add compliance CI (#8283)
Signed-off-by: Anant Sharma <anants@nvidia.com> Co-authored-by: Anant Sharma <anants@nvidia.com>
2a95ef63 · ishandhanani · GitHub · 4410a2c5 · 2a95ef63 · 2a95ef63
Unverified Commit 2a95ef63 authored Apr 22, 2026 by ishandhanani Committed by GitHub Apr 22, 2026
6 changed files
--- a/lib/llm/src/http/service/openai.rs
+++ b/lib/llm/src/http/service/openai.rs
@@ -1595,6 +1595,18 @@ async fn responses(
        service_tier: request.inner.service_tier,
        include: request.inner.include.clone(),
        truncation: request.inner.truncation,
+        // Upstream `CreateResponse` doesn't carry these yet; plumbed through so
+        // the response serializer can default to 0.0 without hardcoding at the
+        // build site. When upstream (or our shadow) adds the fields, sourcing
+        // from the request becomes a one-line change here.
+        presence_penalty: None,
+        frequency_penalty: None,
+        // Pass-through metadata — accepted on the request, echoed back on the
+        // response so the caller can confirm receipt. Dynamo doesn't act on
+        // these; see `validate_response_unsupported_fields` for rationale.
+        prompt_cache_key: request.inner.prompt_cache_key.clone(),
+        prompt_cache_retention: request.inner.prompt_cache_retention,
+        safety_identifier: request.inner.safety_identifier.clone(),
    };
    let request_id = request.id().to_string();
    let (orig_request, context) = request.into_parts();
@@ -1830,6 +1842,24 @@ pub fn validate_response_unsupported_fields(
            VALIDATION_PREFIX.to_string() + "`prompt` is not supported.",
        ));
    }
+    // Reject directive fields that change semantics if silently dropped.
+    // `max_tool_calls` is a hard cap on tool invocations — accepting it
+    // without enforcement would let a caller send `max_tool_calls: 5` and
+    // see `max_tool_calls: null` in the response, assuming their limit was
+    // honored. Fail loud until real enforcement lands.
+    //
+    // Pass-through metadata fields (`prompt_cache_key`,
+    // `prompt_cache_retention`, `safety_identifier`) are deliberately
+    // accepted and echoed back on the response instead. They're hints for
+    // OpenAI's caching/moderation backends, not directives — Codex sends
+    // `prompt_cache_key` on every request — and the OpenResponses spec
+    // includes them on the response body, so echoing the caller's value
+    // makes receipt observable without needing a real backend.
+    if inner.max_tool_calls.is_some() {
+        return Some(ErrorMessage::not_implemented_error(
+            VALIDATION_PREFIX.to_string() + "`max_tool_calls` is not supported.",
+        ));
+    }
    None
 }

@@ -2714,6 +2744,7 @@ mod tests {
                    })
                }),
            ),
+            ("max_tool_calls", Box::new(|r| r.max_tool_calls = Some(5))),
        ];

        for (field, set_field) in unsupported_cases {
@@ -2724,6 +2755,43 @@ mod tests {
        }
    }

+    /// Pass-through metadata fields (`prompt_cache_key`,
+    /// `prompt_cache_retention`, `safety_identifier`) are accepted at the
+    /// validation layer; the response serializer echoes them back so the
+    /// caller can confirm receipt. Codex sends `prompt_cache_key` on every
+    /// request — rejecting it broke `codex exec` end-to-end.
+    #[test]
+    fn test_validate_unsupported_fields_accepts_passthrough_metadata() {
+        #[allow(clippy::type_complexity)]
+        let passthrough_cases: Vec<(&str, Box<dyn FnOnce(&mut CreateResponse)>)> = vec![
+            (
+                "prompt_cache_key",
+                Box::new(|r| r.prompt_cache_key = Some("ck-1".into())),
+            ),
+            (
+                "prompt_cache_retention",
+                Box::new(|r| {
+                    r.prompt_cache_retention =
+                        Some(dynamo_protocols::types::responses::PromptCacheRetention::InMemory)
+                }),
+            ),
+            (
+                "safety_identifier",
+                Box::new(|r| r.safety_identifier = Some("user-hash".into())),
+            ),
+        ];
+
+        for (field, set_field) in passthrough_cases {
+            let mut req = make_base_request();
+            (set_field)(&mut req.inner);
+            let result = validate_response_unsupported_fields(&req);
+            assert!(
+                result.is_none(),
+                "Expected `{field}` to be accepted as pass-through metadata"
+            );
+        }
+    }
+
    #[test]
    fn test_validate_chat_completion_required_fields_empty_messages() {
        let request = NvCreateChatCompletionRequest {

--- a/lib/llm/src/protocols/openai/responses/mod.rs
+++ b/lib/llm/src/protocols/openai/responses/mod.rs
@@ -9,9 +9,10 @@ use dynamo_protocols::types::responses::{
    AssistantRole, FunctionCallOutput, FunctionToolCall, IncludeEnum, InputContent, InputItem,
    InputOutputMessageContent, InputParam, InputRole, InputTokenDetails, Instructions, Item,
    MessageItem, OutputItem, OutputMessage, OutputMessageContent, OutputStatus, OutputTextContent,
-    OutputTokenDetails, Reasoning, ReasoningItem, Response, ResponseTextParam, ResponseUsage,
-    Role as ResponseRole, ServiceTier, Status, SummaryPart, SummaryTextContent,
-    TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam, Truncation,
+    OutputTokenDetails, PromptCacheRetention, Reasoning, ReasoningItem, Response,
+    ResponseTextParam, ResponseUsage, Role as ResponseRole, ServiceTier, Status, SummaryPart,
+    SummaryTextContent, TextResponseFormatConfiguration, Tool, ToolChoiceOptions, ToolChoiceParam,
+    Truncation,
 };
 use dynamo_protocols::types::{
    ChatCompletionMessageToolCall, ChatCompletionNamedToolChoice,
@@ -63,7 +64,7 @@ pub struct NvCreateResponse {
    pub nvext: Option<NvExt>,
 }

-#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Deserialize, Validate, Debug, Clone)]
 pub struct NvResponse {
    /// Flattened Response fields (includes upstream + extended spec fields).
    #[serde(flatten)]
@@ -73,6 +74,78 @@ pub struct NvResponse {
    /// NVIDIA extension field for response metadata (worker IDs, etc.)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub nvext: Option<serde_json::Value>,
+
+    /// OpenResponses spec requires these as non-null scalars on every response,
+    /// but async-openai's `Response` doesn't model them. Populated from the
+    /// originating request. Surfaced during serialization (see `Serialize`
+    /// impl below); not persisted as top-level fields on the inner struct.
+    #[serde(default)]
+    pub presence_penalty: f32,
+    #[serde(default)]
+    pub frequency_penalty: f32,
+    #[serde(default)]
+    pub store: bool,
+}
+
+/// Patch an already-serialized `Response` JSON object to match the
+/// OpenResponses spec. Applied both to one-shot `NvResponse` serialization
+/// and to every `Response` embedded inside a streaming event payload.
+///
+/// Reconciles two spec gaps between upstream async-openai's `Response` and
+/// the OpenResponses spec:
+///
+///  1. Fields the spec requires as `T | null` that upstream marks
+///     `Option<T>` with `skip_serializing_if = Option::is_none`. These are
+///     silently dropped when None; the spec wants them present as null.
+///  2. Fields the spec requires (`presence_penalty`, `frequency_penalty`,
+///     `store`) that are absent from upstream `Response` entirely.
+///
+/// Rather than fork the upstream output chain (which would cascade into
+/// `OutputItem`, streaming events, and a long tail of sub-types, per
+/// `lib/protocols/CLAUDE.md`), we patch the serialized JSON. Adds a
+/// single `serde_json::to_value` round-trip per response, which is
+/// negligible next to tokenization/inference cost.
+pub(crate) fn patch_response_for_spec(
+    obj: &mut serde_json::Map<String, serde_json::Value>,
+    presence_penalty: f32,
+    frequency_penalty: f32,
+    store: bool,
+) {
+    for key in dynamo_protocols::types::responses::SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS {
+        obj.entry(*key).or_insert(serde_json::Value::Null);
+    }
+
+    obj.insert(
+        "presence_penalty".into(),
+        serde_json::json!(presence_penalty),
+    );
+    obj.insert(
+        "frequency_penalty".into(),
+        serde_json::json!(frequency_penalty),
+    );
+    obj.insert("store".into(), serde_json::json!(store));
+}
+
+impl Serialize for NvResponse {
+    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
+        let mut value = serde_json::to_value(&self.inner).map_err(serde::ser::Error::custom)?;
+        let serde_json::Value::Object(obj) = &mut value else {
+            return value.serialize(serializer);
+        };
+
+        patch_response_for_spec(
+            obj,
+            self.presence_penalty,
+            self.frequency_penalty,
+            self.store,
+        );
+
+        if let Some(nvext) = &self.nvext {
+            obj.insert("nvext".into(), nvext.clone());
+        }
+
+        value.serialize(serializer)
+    }
 }

 /// Implements `NvExtProvider` for `NvCreateResponse`,
@@ -244,6 +317,24 @@ fn convert_input_content_to_text(content: &[InputContent]) -> String {
        .join("")
 }

+/// Counterpart to `convert_input_content_to_text` for upstream's
+/// `InputContent`. Upstream's enum appears inside `FunctionCallOutput::Content`
+/// and `EasyInputContent::ContentList`, neither of which is Dynamo-owned, so
+/// payloads deserialized through those paths land as upstream variants.
+fn convert_upstream_input_content_to_text(
+    content: &[dynamo_protocols::types::responses::UpstreamInputContent],
+) -> String {
+    use dynamo_protocols::types::responses::UpstreamInputContent;
+    content
+        .iter()
+        .filter_map(|p| match p {
+            UpstreamInputContent::InputText(t) => Some(t.text.as_str()),
+            _ => None,
+        })
+        .collect::<Vec<_>>()
+        .join("")
+}
+
 /// Accumulator for consecutive assistant-side items (OutputMessage, FunctionCall,
 /// Reasoning, assistant EasyMessage). Chat Completions represents an assistant
 /// turn as a single message carrying `content`, `tool_calls`, and
@@ -406,7 +497,9 @@ fn convert_input_items_to_messages(
                    std::mem::take(&mut pending).flush_into(&mut messages);
                    let output_text = match &fco.output {
                        FunctionCallOutput::Text(text) => text.clone(),
-                        FunctionCallOutput::Content(parts) => convert_input_content_to_text(parts),
+                        FunctionCallOutput::Content(parts) => {
+                            convert_upstream_input_content_to_text(parts)
+                        }
                    };
                    messages.push(ChatCompletionRequestMessage::Tool(
                        ChatCompletionRequestToolMessage {
@@ -444,7 +537,7 @@ fn convert_input_items_to_messages(
                        text.clone()
                    }
                    dynamo_protocols::types::responses::EasyInputContent::ContentList(parts) => {
-                        convert_input_content_to_text(parts)
+                        convert_upstream_input_content_to_text(parts)
                    }
                };
                match easy.role {
@@ -740,6 +833,22 @@ pub struct ResponseParams {
    pub service_tier: Option<ServiceTier>,
    pub include: Option<Vec<IncludeEnum>>,
    pub truncation: Option<Truncation>,
+    /// OpenResponses spec requires these fields on the response body. Upstream
+    /// `CreateResponse` doesn't model them on the request yet, so for now they
+    /// pass through as `None`; the response serializer defaults to 0.0 (the
+    /// effective sglang default). Wired through `ResponseParams` anyway so
+    /// that when upstream relaxes or we shadow `CreateResponse`, threading a
+    /// real value becomes a one-line change at the request-extraction site.
+    pub presence_penalty: Option<f32>,
+    pub frequency_penalty: Option<f32>,
+    /// Pass-through metadata fields. Codex and other clients send these as
+    /// hints for OpenAI's caching/moderation backends; Dynamo doesn't act on
+    /// them, but the spec includes them on the response body so we echo back
+    /// what the caller sent rather than silently dropping. Echoing makes
+    /// receipt observable to the client without needing a real backend.
+    pub prompt_cache_key: Option<String>,
+    pub prompt_cache_retention: Option<PromptCacheRetention>,
+    pub safety_identifier: Option<String>,
 }

 /// Normalize tools so that `FunctionTool.strict` is always set.
@@ -880,13 +989,13 @@ pub fn chat_completion_to_response(
        .include
        .as_ref()
        .is_some_and(|inc| inc.contains(&IncludeEnum::MessageOutputTextLogprobs));
-    if !keep_logprobs {
    for item in &mut output {
        if let OutputItem::Message(msg) = item {
            for content in &mut msg.content {
-                    if let OutputMessageContent::OutputText(text) = content {
-                        text.logprobs = None;
-                    }
+                if let OutputMessageContent::OutputText(text) = content
+                    && (!keep_logprobs || text.logprobs.is_none())
+                {
+                    text.logprobs = Some(Vec::new());
                }
            }
        }
@@ -936,10 +1045,10 @@ pub fn chat_completion_to_response(
        max_output_tokens: params.max_output_tokens,
        previous_response_id: api_context.and_then(|ctx| ctx.previous_response_id.clone()),
        prompt: None,
-        prompt_cache_key: None,
-        prompt_cache_retention: None,
+        prompt_cache_key: params.prompt_cache_key.clone(),
+        prompt_cache_retention: params.prompt_cache_retention,
        reasoning: params.reasoning.clone(),
-        safety_identifier: None,
+        safety_identifier: params.safety_identifier.clone(),
        service_tier: Some(params.service_tier.unwrap_or(ServiceTier::Auto)),
        top_logprobs: Some(0),
        usage: chat_resp.usage.map(|u| ResponseUsage {
@@ -964,6 +1073,9 @@ pub fn chat_completion_to_response(
    Ok(NvResponse {
        inner: response,
        nvext,
+        presence_penalty: params.presence_penalty.unwrap_or(0.0),
+        frequency_penalty: params.frequency_penalty.unwrap_or(0.0),
+        store: params.store.unwrap_or(false),
    })
 }

@@ -2475,7 +2587,10 @@ thinking
    }

    #[test]
-    fn test_include_logprobs_stripped_by_default() {
+    fn test_include_logprobs_empty_by_default() {
+        // OpenResponses schema requires `logprobs` to be an array. When the
+        // caller did not request them via `include`, emit an empty array
+        // rather than null.
        let chat_resp = make_chat_resp_with_text("hello");
        let params = ResponseParams::default();
        let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
@@ -2484,9 +2599,10 @@ thinking
            if let OutputItem::Message(msg) = item {
                for content in &msg.content {
                    if let OutputMessageContent::OutputText(t) = content {
-                        assert!(
-                            t.logprobs.is_none(),
-                            "logprobs should be stripped by default"
+                        assert_eq!(
+                            t.logprobs.as_deref(),
+                            Some(&[][..]),
+                            "logprobs should be an empty array by default"
                        );
                    }
                }
@@ -2543,11 +2659,35 @@ thinking
        assert_eq!(resp.inner.truncation, Some(Truncation::Disabled));
    }

-    /// Validate the JSON wire shape of NvResponse.
-    ///
-    /// The migration to upstream async-openai v0.34 removed fields that were
-    /// incorrectly present on our old local Response type (they belong on the
-    /// request, not the response, per the OpenAI Responses API spec).
+    /// Pass-through metadata fields the OpenResponses spec includes on the
+    /// response body. Codex sends `prompt_cache_key` on every request; we
+    /// echo it back so the caller can confirm receipt without enforcing any
+    /// caching semantics. Same pattern for `prompt_cache_retention` and
+    /// `safety_identifier`.
+    #[test]
+    fn test_response_echoes_passthrough_metadata() {
+        let chat_resp = make_chat_resp_with_text("hello");
+        let params = ResponseParams {
+            prompt_cache_key: Some("cache-key-codex-1".into()),
+            prompt_cache_retention: Some(PromptCacheRetention::InMemory),
+            safety_identifier: Some("user-abc".into()),
+            ..Default::default()
+        };
+        let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
+        assert_eq!(
+            resp.inner.prompt_cache_key.as_deref(),
+            Some("cache-key-codex-1")
+        );
+        assert_eq!(
+            resp.inner.prompt_cache_retention,
+            Some(PromptCacheRetention::InMemory)
+        );
+        assert_eq!(resp.inner.safety_identifier.as_deref(), Some("user-abc"));
+    }
+
+    /// Validate the JSON wire shape of NvResponse matches the OpenResponses
+    /// spec: required scalars always present, nullable-required fields
+    /// emitted as `null` when None.
    #[test]
    fn test_response_wire_format_shape() {
        let chat_resp = make_chat_resp_with_text("hello");
@@ -2555,14 +2695,14 @@ thinking
        let resp = chat_completion_to_response(chat_resp, &params, None).unwrap();
        let json = serde_json::to_value(&resp).unwrap();

-        // Fields that were on our old local type but are NOT in the OpenAI
-        // Responses API spec -- they are request-level, not response-level.
-        assert!(json.get("frequency_penalty").is_none());
-        assert!(json.get("presence_penalty").is_none());
-        assert!(json.get("store").is_none());
-        assert!(json.get("max_tool_calls").is_none());
+        // Required scalars the spec mandates on every response. Upstream
+        // async-openai's Response struct doesn't model these; NvResponse's
+        // custom serializer injects them.
+        assert_eq!(json["frequency_penalty"], 0.0);
+        assert_eq!(json["presence_penalty"], 0.0);
+        assert_eq!(json["store"], false);

-        // Fields that should be present with expected values
+        // Other required fields with expected values
        assert_eq!(json["object"], "response");
        assert_eq!(json["status"], "completed");
        assert_eq!(json["metadata"], serde_json::json!({}));
@@ -2570,12 +2710,25 @@ thinking
        assert!(json["output"][0].get("id").is_some());
        assert!(json["output"][0].get("status").is_some());

-        // Optional fields with None should be omitted (upstream uses skip_serializing_if)
-        assert!(json.get("error").is_none());
-        assert!(json.get("incomplete_details").is_none());
-        assert!(json.get("billing").is_none());
-        assert!(json.get("conversation").is_none());
-        assert!(json.get("safety_identifier").is_none());
+        // Nullable-required fields must be present as null (not missing).
+        for key in [
+            "error",
+            "incomplete_details",
+            "billing",
+            "conversation",
+            "safety_identifier",
+            "max_tool_calls",
+            "instructions",
+            "previous_response_id",
+            "prompt_cache_key",
+            "reasoning",
+        ] {
+            assert_eq!(
+                json.get(key),
+                Some(&serde_json::Value::Null),
+                "expected {key} to be present as null"
+            );
+        }

        // nvext should be omitted when None
        assert!(json.get("nvext").is_none());

--- a/lib/llm/src/protocols/openai/responses/stream_converter.rs
+++ b/lib/llm/src/protocols/openai/responses/stream_converter.rs
@@ -155,10 +155,10 @@ impl ResponseStreamConverter {
                .as_ref()
                .and_then(|ctx| ctx.previous_response_id.clone()),
            prompt: None,
-            prompt_cache_key: None,
-            prompt_cache_retention: None,
+            prompt_cache_key: self.params.prompt_cache_key.clone(),
+            prompt_cache_retention: self.params.prompt_cache_retention,
            reasoning: self.params.reasoning.clone(),
-            safety_identifier: None,
+            safety_identifier: self.params.safety_identifier.clone(),
            service_tier: Some(self.params.service_tier.unwrap_or(ServiceTier::Auto)),
            top_logprobs: Some(0),
            usage: self.usage.clone(),
@@ -173,13 +173,13 @@ impl ResponseStreamConverter {
            sequence_number: self.next_seq(),
            response: self.make_response(Status::InProgress, vec![]),
        });
-        events.push(make_sse_event(&created));
+        events.push(self.make_sse_event(&created));

        let in_progress = ResponseStreamEvent::ResponseInProgress(ResponseInProgressEvent {
            sequence_number: self.next_seq(),
            response: self.make_response(Status::InProgress, vec![]),
        });
-        events.push(make_sse_event(&in_progress));
+        events.push(self.make_sse_event(&in_progress));

        events
    }
@@ -249,7 +249,7 @@ impl ResponseStreamConverter {
                            }),
                        },
                    );
-                    events.push(make_sse_event(&item_added));
+                    events.push(self.make_sse_event(&item_added));

                    let part_added = ResponseStreamEvent::ResponseContentPartAdded(
                        ResponseContentPartAddedEvent {
@@ -264,7 +264,7 @@ impl ResponseStreamConverter {
                            }),
                        },
                    );
-                    events.push(make_sse_event(&part_added));
+                    events.push(self.make_sse_event(&part_added));
                }

                // Emit text delta
@@ -278,7 +278,7 @@ impl ResponseStreamConverter {
                        delta: content.to_string(),
                        logprobs: Some(vec![]),
                    });
-                events.push(make_sse_event(&text_delta));
+                events.push(self.make_sse_event(&text_delta));
            }

            // Handle tool call deltas
@@ -332,7 +332,7 @@ impl ResponseStreamConverter {
                                        }),
                                    },
                                );
-                                events.push(make_sse_event(&item_added));
+                                events.push(self.make_sse_event(&item_added));
                            }

                            self.function_call_items[tc_index]
@@ -355,7 +355,7 @@ impl ResponseStreamConverter {
                                        delta: args.clone(),
                                    },
                                );
-                            events.push(make_sse_event(&args_delta));
+                            events.push(self.make_sse_event(&args_delta));

                            // Emit done + output_item.done immediately if the tool call
                            // arrived complete in a single chunk (id + name + args all present).
@@ -382,7 +382,7 @@ impl ResponseStreamConverter {
                                            name: Some(fc_name.clone()),
                                        },
                                    );
-                                events.push(make_sse_event(&args_done));
+                                events.push(self.make_sse_event(&args_done));

                                let item_done = ResponseStreamEvent::ResponseOutputItemDone(
                                    ResponseOutputItemDoneEvent {
@@ -398,7 +398,7 @@ impl ResponseStreamConverter {
                                        }),
                                    },
                                );
-                                events.push(make_sse_event(&item_done));
+                                events.push(self.make_sse_event(&item_done));
                            }
                        }
                    }
@@ -423,7 +423,7 @@ impl ResponseStreamConverter {
                text: self.accumulated_text.clone(),
                logprobs: Some(vec![]),
            });
-            events.push(make_sse_event(&text_done));
+            events.push(self.make_sse_event(&text_done));

            let part_done =
                ResponseStreamEvent::ResponseContentPartDone(ResponseContentPartDoneEvent {
@@ -437,7 +437,7 @@ impl ResponseStreamConverter {
                        logprobs: Some(vec![]),
                    }),
                });
-            events.push(make_sse_event(&part_done));
+            events.push(self.make_sse_event(&part_done));

            let item_done =
                ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
@@ -455,7 +455,7 @@ impl ResponseStreamConverter {
                        status: OutputStatus::Completed,
                    }),
                });
-            events.push(make_sse_event(&item_done));
+            events.push(self.make_sse_event(&item_done));
        }

        // Close any function call items not already done inline
@@ -483,7 +483,7 @@ impl ResponseStreamConverter {
                    name: Some(fc_name.clone()),
                },
            );
-            events.push(make_sse_event(&args_done));
+            events.push(self.make_sse_event(&args_done));

            let item_done =
                ResponseStreamEvent::ResponseOutputItemDone(ResponseOutputItemDoneEvent {
@@ -498,7 +498,7 @@ impl ResponseStreamConverter {
                        status: Some(OutputStatus::Completed),
                    }),
                });
-            events.push(make_sse_event(&item_done));
+            events.push(self.make_sse_event(&item_done));
        }

        // Build the final output vector from accumulated state
@@ -534,7 +534,7 @@ impl ResponseStreamConverter {
            sequence_number: self.next_seq(),
            response: self.make_response(Status::Completed, output),
        });
-        events.push(make_sse_event(&completed));
+        events.push(self.make_sse_event(&completed));

        events
    }
@@ -547,16 +547,33 @@ impl ResponseStreamConverter {
            sequence_number: self.next_seq(),
            response: self.make_response(Status::Failed, vec![]),
        });
-        events.push(make_sse_event(&failed));
+        events.push(self.make_sse_event(&failed));

        events
    }
 }

-fn make_sse_event(event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> {
+impl ResponseStreamConverter {
+    /// Serialize a stream event, patching any embedded `response` object to
+    /// satisfy the OpenResponses schema. Takes `&self` so spec-required
+    /// sampling params can be sourced from the originating request via
+    /// `self.params` rather than hardcoded at each emit site.
+    fn make_sse_event(&self, event: &ResponseStreamEvent) -> Result<Event, anyhow::Error> {
        let event_type = get_event_type(event);
-    let data = serde_json::to_string(event)?;
+        let mut value = serde_json::to_value(event)?;
+        if let serde_json::Value::Object(ref mut obj) = value
+            && let Some(serde_json::Value::Object(inner)) = obj.get_mut("response")
+        {
+            super::patch_response_for_spec(
+                inner,
+                self.params.presence_penalty.unwrap_or(0.0),
+                self.params.frequency_penalty.unwrap_or(0.0),
+                self.params.store.unwrap_or(false),
+            );
+        }
+        let data = serde_json::to_string(&value)?;
        Ok(Event::default().event(event_type).data(data))
+    }
 }

 fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
@@ -677,22 +694,7 @@ mod tests {
    };

    fn default_params() -> ResponseParams {
-        ResponseParams {
-            model: None,
-            temperature: None,
-            top_p: None,
-            max_output_tokens: None,
-            parallel_tool_calls: None,
-            store: None,
-            tools: None,
-            tool_choice: None,
-            instructions: None,
-            reasoning: None,
-            text: None,
-            service_tier: None,
-            include: None,
-            truncation: None,
-        }
+        ResponseParams::default()
    }

    fn tool_call_chunk(

--- a/lib/protocols/src/types/responses/mod.rs
+++ b/lib/protocols/src/types/responses/mod.rs
@@ -35,6 +35,13 @@ use serde::{Deserialize, Serialize};
 // shadow their upstream counterparts where no dual-side conflict exists.
 pub use async_openai::types::responses::*;

+// Re-export upstream's pre-shadow `InputContent` under an explicit alias.
+// Needed because `FunctionCallOutput::Content` and `EasyInputContent::ContentList`
+// are non-owned upstream types that carry upstream's original `InputContent`
+// inline, so downstream consumers occasionally need to name it alongside the
+// Dynamo-owned shadow defined further down this module.
+pub use async_openai::types::responses::InputContent as UpstreamInputContent;
+
 // Re-export from parent module for backward compat.
 pub use crate::types::ImageDetail;
 pub use crate::types::ReasoningEffort;
@@ -51,6 +58,40 @@ pub type ResponseStream = std::pin::Pin<
    Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
 >;

+/// Fields on upstream `Response` that the OpenResponses spec requires as
+/// `T | null` but async-openai declares as `Option<T>` with
+/// `skip_serializing_if = Option::is_none` — meaning `None` disappears from
+/// the wire shape, where the spec wants an explicit `null`.
+///
+/// Colocated here (next to the upstream `Response` re-export) rather than in
+/// `lib/llm/src/protocols/openai/responses/mod.rs` so that when upstream's
+/// `Response` gains a new nullable-required field, the reviewer editing this
+/// module is looking directly at the authoritative list. Keep sorted
+/// alphabetically; entries must match serde field names on `Response` exactly.
+///
+/// Any field we unconditionally populate ourselves during response
+/// construction (e.g. `metadata`, `parallel_tool_calls`, `temperature`,
+/// `text`, `tool_choice`, `tools`, `top_p`, `top_logprobs`, `truncation`,
+/// `service_tier`, `background`) is deliberately absent — it's always
+/// present on the wire, so listing it here would be noise.
+pub const SPEC_NULLABLE_REQUIRED_RESPONSE_FIELDS: &[&str] = &[
+    "billing",
+    "completed_at",
+    "conversation",
+    "error",
+    "incomplete_details",
+    "instructions",
+    "max_output_tokens",
+    "max_tool_calls",
+    "previous_response_id",
+    "prompt",
+    "prompt_cache_key",
+    "prompt_cache_retention",
+    "reasoning",
+    "safety_identifier",
+    "usage",
+];
+
 // ---------------------------------------------------------------------------
 // Input-side assistant message (relaxed vs upstream OutputMessage)
 // ---------------------------------------------------------------------------
@@ -68,6 +109,19 @@ where
    Option::<Vec<T>>::deserialize(deserializer).map(Option::unwrap_or_default)
 }

+/// Deserialize `null` or a missing field as `T::default()`. Scalar counterpart
+/// to `deserialize_null_as_empty_vec` — plain `#[serde(default)]` rejects
+/// explicit `null` because serde tries to deserialize the null into `T` and
+/// fails. Real clients emit `null` for unset enum-ish fields (e.g. OpenAI
+/// Agents SDK sending `"detail": null` on `input_image` parts).
+fn deserialize_null_as_default<'de, T, D>(deserializer: D) -> Result<T, D::Error>
+where
+    T: Deserialize<'de> + Default,
+    D: serde::Deserializer<'de>,
+{
+    Option::<T>::deserialize(deserializer).map(Option::unwrap_or_default)
+}
+
 /// Relaxed counterpart to upstream `OutputTextContent` for input-side content.
 /// `annotations` tolerates both missing and explicit `null`; upstream requires
 /// it to be a present non-null array.
@@ -107,6 +161,45 @@ pub struct InputOutputMessage {
    pub status: Option<OutputStatus>,
 }

+// ---------------------------------------------------------------------------
+// Input-side image / content / message (shadow upstream, relaxed shapes)
+// ---------------------------------------------------------------------------
+
+/// Relaxed counterpart to upstream `InputImageContent`. `detail` defaults to
+/// `ImageDetail::Auto` when the client omits it — OpenAI's hosted API and the
+/// OpenResponses spec both accept this shape, but upstream's struct marks
+/// `detail` as required.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputImageContent {
+    #[serde(default, deserialize_with = "deserialize_null_as_default")]
+    pub detail: ImageDetail,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+}
+
+/// Parts of an input message: text, image, or file. Mirrors upstream
+/// `InputContent` but routes `InputImage` through the Dynamo-owned relaxed
+/// `InputImageContent` above.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum InputContent {
+    InputText(InputTextContent),
+    InputImage(InputImageContent),
+    InputFile(InputFileContent),
+}
+
+/// User / system / developer input message. Shadows upstream `InputMessage`
+/// so we can route through the Dynamo-owned `InputContent` chain.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+pub struct InputMessage {
+    pub content: Vec<InputContent>,
+    pub role: InputRole,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
 // ---------------------------------------------------------------------------
 // Input-side Item / Message / InputItem / InputParam (shadow upstream)
 // ---------------------------------------------------------------------------
@@ -271,6 +364,33 @@ mod tests {
        }
    }

+    #[test]
+    fn input_image_without_detail_defaults_to_auto() {
+        let json = serde_json::json!({
+            "type": "input_image",
+            "image_url": "https://example.com/cat.jpg"
+        });
+        let content: InputContent = serde_json::from_value(json).unwrap();
+        match content {
+            InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
+            other => panic!("expected InputImage, got {other:?}"),
+        }
+    }
+
+    #[test]
+    fn input_image_with_explicit_null_detail_defaults_to_auto() {
+        let json = serde_json::json!({
+            "type": "input_image",
+            "image_url": "https://example.com/cat.jpg",
+            "detail": null
+        });
+        let content: InputContent = serde_json::from_value(json).unwrap();
+        match content {
+            InputContent::InputImage(img) => assert_eq!(img.detail, ImageDetail::Auto),
+            other => panic!("expected InputImage, got {other:?}"),
+        }
+    }
+
    #[test]
    fn assistant_message_without_content_field_deserializes() {
        // Bare assistant shell — no `content` field at all. Seen in real

--- a/pyproject.toml
+++ b/pyproject.toml
@@ -232,6 +232,7 @@ markers = [
    "post_merge: marks tests to run after merge",
    "parallel: marks tests that can run in parallel with pytest-xdist",
    "nightly: marks tests to run nightly",
+    "frontend_api_surface_compliance: marks tests that validate Dynamo's HTTP API surface (Responses/Anthropic wire shape, tool-call routing) against upstream compliance harnesses",
    "weekly: marks tests to run weekly",
    "release: marks tests to run on release pipelines",
    "gpu_0: marks tests that don't require GPU",

--- a/tests/frontend/test_frontend_api_surface_compliance.py
+++ b/tests/frontend/test_frontend_api_surface_compliance.py