feat: responses API compliance with upstream type alignment (#6089)

Signed-off-by: Matej Kosec <mkosec@nvidia.com> Co-authored-by: Ishan Dhanani <ishandhanani@gmail.com>

feat: responses API compliance with upstream type alignment (#6089)
Signed-off-by: Matej Kosec <mkosec@nvidia.com> Co-authored-by: Ishan Dhanani <ishandhanani@gmail.com>
8cb47d04 · MatejKosec · GitHub · f8d0a9f9 · 8cb47d04 · 8cb47d04
Unverified Commit 8cb47d04 authored Feb 11, 2026 by MatejKosec Committed by GitHub Feb 12, 2026
20 changed files
--- a/examples/backends/sglang/launch/agg_vision.sh
+++ b/examples/backends/sglang/launch/agg_vision.sh
+#!/bin/bash
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+# Setup cleanup trap
+cleanup() {
+    echo "Cleaning up background processes..."
+    kill $DYNAMO_PID 2>/dev/null || true
+    wait $DYNAMO_PID 2>/dev/null || true
+    echo "Cleanup complete."
+}
+trap cleanup EXIT INT TERM
+
+# Default values
+MODEL="Qwen/Qwen3-VL-8B-Instruct"
+CHAT_TEMPLATE=""
+ENABLE_OTEL=false
+
+# Parse command line arguments
+EXTRA_ARGS=()
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --model-path)
+            MODEL="$2"
+            shift 2
+            ;;
+        --chat-template)
+            CHAT_TEMPLATE="$2"
+            shift 2
+            ;;
+        --enable-otel)
+            ENABLE_OTEL=true
+            shift
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo "Options:"
+            echo "  --model-path <name>      Specify model (default: $MODEL)"
+            echo "  --chat-template <name>   Specify SGLang chat template (default: $CHAT_TEMPLATE)"
+            echo "  --enable-otel            Enable OpenTelemetry tracing"
+            echo "  -h, --help               Show this help message"
+            echo ""
+            echo "Additional SGLang/Dynamo flags can be passed and will be forwarded"
+            echo "Note: System metrics are enabled by default on port 8081 (worker)"
+            exit 0
+            ;;
+        *)
+            EXTRA_ARGS+=("$1")
+            shift
+            ;;
+    esac
+done
+
+# Enable tracing if requested
+TRACE_ARGS=()
+if [ "$ENABLE_OTEL" = true ]; then
+    export DYN_LOGGING_JSONL=true
+    export OTEL_EXPORT_ENABLED=1
+    export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-http://localhost:4317}
+    TRACE_ARGS+=(--enable-trace --otlp-traces-endpoint localhost:4317)
+fi
+
+# run ingress
+# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
+OTEL_SERVICE_NAME=dynamo-frontend \
+python3 -m dynamo.frontend &
+DYNAMO_PID=$!
+
+# Build chat template args (only if explicitly set)
+TEMPLATE_ARGS=()
+if [ -n "$CHAT_TEMPLATE" ]; then
+    TEMPLATE_ARGS+=(--chat-template "$CHAT_TEMPLATE")
+fi
+
+# run worker with vision model (SGLang auto-detects chat template from HF tokenizer)
+OTEL_SERVICE_NAME=dynamo-worker DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
+python3 -m dynamo.sglang \
+  --model-path "$MODEL" \
+  --served-model-name "$MODEL" \
+  "${TEMPLATE_ARGS[@]}" \
+  --page-size 16 \
+  --tp 1 \
+  --trust-remote-code \
+  --skip-tokenizer-init \
+  --enable-metrics \
+  "${TRACE_ARGS[@]}" \
+  "${EXTRA_ARGS[@]}"
--- a/lib/async-openai/src/types/impls.rs
+++ b/lib/async-openai/src/types/impls.rs
@@ -24,23 +24,58 @@ use crate::{
 use bytes::Bytes;

 use super::{
-    AddUploadPartRequest, AudioInput, AudioResponseFormat, AudioUrl, ChatCompletionFunctionCall,
-    ChatCompletionFunctions, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage,
-    ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestDeveloperMessage,
-    ChatCompletionRequestDeveloperMessageContent, ChatCompletionRequestFunctionMessage,
-    ChatCompletionRequestMessage, ChatCompletionRequestMessageContentPartAudio,
-    ChatCompletionRequestMessageContentPartAudioUrl, ChatCompletionRequestMessageContentPartImage,
-    ChatCompletionRequestMessageContentPartText, ChatCompletionRequestMessageContentPartVideo,
-    ChatCompletionRequestSystemMessage, ChatCompletionRequestSystemMessageContent,
-    ChatCompletionRequestToolMessage, ChatCompletionRequestToolMessageContent,
-    ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent,
-    ChatCompletionRequestUserMessageContentPart, ChatCompletionToolChoiceOption, CreateFileRequest,
-    CreateImageEditRequest, CreateImageVariationRequest, CreateMessageRequestContent,
-    CreateSpeechResponse, CreateTranscriptionRequest, CreateTranslationRequest, DallE2ImageSize,
-    EmbeddingInput, FileInput, FilePurpose, FunctionName, Image, ImageInput, ImageModel,
-    ImageResponseFormat, ImageSize, ImageUrl, ImagesResponse, ModerationInput, Prompt, Role, Stop,
-    TimestampGranularity, VideoUrl,
-    responses::{CodeInterpreterContainer, Input, InputContent, Role as ResponsesRole},
+    AddUploadPartRequest,
+    AudioInput,
+    AudioResponseFormat,
+    AudioUrl,
+    ChatCompletionFunctionCall,
+    ChatCompletionFunctions,
+    ChatCompletionNamedToolChoice,
+    ChatCompletionRequestAssistantMessage,
+    ChatCompletionRequestAssistantMessageContent,
+    ChatCompletionRequestDeveloperMessage,
+    ChatCompletionRequestDeveloperMessageContent,
+    ChatCompletionRequestFunctionMessage,
+    ChatCompletionRequestMessage,
+    ChatCompletionRequestMessageContentPartAudio,
+    ChatCompletionRequestMessageContentPartAudioUrl,
+    ChatCompletionRequestMessageContentPartImage,
+    ChatCompletionRequestMessageContentPartText,
+    ChatCompletionRequestMessageContentPartVideo,
+    ChatCompletionRequestSystemMessage,
+    ChatCompletionRequestSystemMessageContent,
+    ChatCompletionRequestToolMessage,
+    ChatCompletionRequestToolMessageContent,
+    ChatCompletionRequestUserMessage,
+    ChatCompletionRequestUserMessageContent,
+    ChatCompletionRequestUserMessageContentPart,
+    ChatCompletionToolChoiceOption,
+    CreateFileRequest,
+    CreateImageEditRequest,
+    CreateImageVariationRequest,
+    CreateMessageRequestContent,
+    CreateSpeechResponse,
+    CreateTranscriptionRequest,
+    CreateTranslationRequest,
+    DallE2ImageSize,
+    EmbeddingInput,
+    FileInput,
+    FilePurpose,
+    FunctionName,
+    Image,
+    ImageInput,
+    ImageModel,
+    ImageResponseFormat,
+    ImageSize,
+    ImageUrl,
+    ImagesResponse,
+    ModerationInput,
+    Prompt,
+    Role,
+    Stop,
+    TimestampGranularity,
+    VideoUrl,
+    // responses types now have their own impls in responses/impls.rs
 };

 /// for `impl_from!(T, Enum)`, implements
@@ -1056,50 +1091,4 @@ impl AsyncTryFrom<AddUploadPartRequest> for reqwest::multipart::Form {

 // end: types to multipart form

-impl Default for Input {
-    fn default() -> Self {
-        Self::Text("".to_string())
-    }
-}
-
-impl Default for InputContent {
-    fn default() -> Self {
-        Self::TextInput("".to_string())
-    }
-}
-
-impl From<String> for Input {
-    fn from(value: String) -> Self {
-        Input::Text(value)
-    }
-}
-
-impl From<&str> for Input {
-    fn from(value: &str) -> Self {
-        Input::Text(value.to_owned())
-    }
-}
-
-impl Default for ResponsesRole {
-    fn default() -> Self {
-        Self::User
-    }
-}
-
-impl From<String> for InputContent {
-    fn from(value: String) -> Self {
-        Self::TextInput(value)
-    }
-}
-
-impl From<&str> for InputContent {
-    fn from(value: &str) -> Self {
-        Self::TextInput(value.to_owned())
-    }
-}
-
-impl Default for CodeInterpreterContainer {
-    fn default() -> Self {
-        CodeInterpreterContainer::Id("".to_string())
-    }
-}
+// Responses API impls are now in responses/impls.rs
--- a/lib/async-openai/src/types/mcp/impls.rs
+++ b/lib/async-openai/src/types/mcp/impls.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::types::mcp::{
+    MCPToolAllowedTools, MCPToolApprovalFilter, MCPToolApprovalSetting, MCPToolFilter,
+    MCPToolRequireApproval,
+};
+
+// MCPToolRequireApproval ergonomics
+
+impl From<MCPToolApprovalSetting> for MCPToolRequireApproval {
+    fn from(setting: MCPToolApprovalSetting) -> Self {
+        MCPToolRequireApproval::ApprovalSetting(setting)
+    }
+}
+
+impl From<MCPToolApprovalFilter> for MCPToolRequireApproval {
+    fn from(filter: MCPToolApprovalFilter) -> Self {
+        MCPToolRequireApproval::Filter(filter)
+    }
+}
+
+// MCPToolAllowedTools ergonomics
+
+impl From<MCPToolFilter> for MCPToolAllowedTools {
+    fn from(filter: MCPToolFilter) -> Self {
+        MCPToolAllowedTools::Filter(filter)
+    }
+}
+
+impl From<Vec<String>> for MCPToolAllowedTools {
+    fn from(tools: Vec<String>) -> Self {
+        MCPToolAllowedTools::List(tools)
+    }
+}
+
+impl From<Vec<&str>> for MCPToolAllowedTools {
+    fn from(tools: Vec<&str>) -> Self {
+        MCPToolAllowedTools::List(tools.into_iter().map(|s| s.to_string()).collect())
+    }
+}
+
+impl From<&[&str]> for MCPToolAllowedTools {
+    fn from(tools: &[&str]) -> Self {
+        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
+    }
+}
+
+impl<const N: usize> From<[&str; N]> for MCPToolAllowedTools {
+    fn from(tools: [&str; N]) -> Self {
+        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
+    }
+}
+
+impl From<&Vec<String>> for MCPToolAllowedTools {
+    fn from(tools: &Vec<String>) -> Self {
+        MCPToolAllowedTools::List(tools.clone())
+    }
+}
+
+impl From<&Vec<&str>> for MCPToolAllowedTools {
+    fn from(tools: &Vec<&str>) -> Self {
+        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
+    }
+}
+
+impl From<&str> for MCPToolAllowedTools {
+    fn from(tool: &str) -> Self {
+        MCPToolAllowedTools::List(vec![tool.to_string()])
+    }
+}
+
+impl From<String> for MCPToolAllowedTools {
+    fn from(tool: String) -> Self {
+        MCPToolAllowedTools::List(vec![tool])
+    }
+}
--- a/lib/async-openai/src/types/mcp/mcp_.rs
+++ b/lib/async-openai/src/types/mcp/mcp_.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+use crate::error::OpenAIError;
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum McpToolConnectorId {
+    ConnectorDropbox,
+    ConnectorGmail,
+    ConnectorGooglecalendar,
+    ConnectorGoogledrive,
+    ConnectorMicrosoftteams,
+    ConnectorOutlookcalendar,
+    ConnectorOutlookemail,
+    ConnectorSharepoint,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq, Default, ToSchema)]
+#[builder(
+    name = "MCPToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct MCPTool {
+    /// A label for this MCP server, used to identify it in tool calls.
+    pub server_label: String,
+
+    /// List of allowed tool names or a filter object.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_tools: Option<MCPToolAllowedTools>,
+
+    /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP
+    /// server URL or a service connector. Your application must handle the OAuth authorization
+    /// flow and provide the token here.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub authorization: Option<String>,
+
+    /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or
+    /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
+    ///
+    /// Currently supported `connector_id` values are:
+    /// - Dropbox: `connector_dropbox`
+    /// - Gmail: `connector_gmail`
+    /// - Google Calendar: `connector_googlecalendar`
+    /// - Google Drive: `connector_googledrive`
+    /// - Microsoft Teams: `connector_microsoftteams`
+    /// - Outlook Calendar: `connector_outlookcalendar`
+    /// - Outlook Email: `connector_outlookemail`
+    /// - SharePoint: `connector_sharepoint`
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub connector_id: Option<McpToolConnectorId>,
+
+    /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub headers: Option<serde_json::Value>,
+
+    /// Specify which of the MCP server's tools require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub require_approval: Option<MCPToolRequireApproval>,
+
+    /// Optional description of the MCP server, used to provide more context.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub server_description: Option<String>,
+
+    /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub server_url: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum MCPToolAllowedTools {
+    /// A string array of allowed tool names
+    List(Vec<String>),
+    /// A filter object to specify which tools are allowed.
+    Filter(MCPToolFilter),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPToolFilter {
+    /// Indicates whether or not a tool modifies data or is read-only.
+    /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
+    /// it will match this filter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub read_only: Option<bool>,
+    /// List of allowed tool names.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_names: Option<Vec<String>>,
+}
+
+/// Approval policy or filter for MCP tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum MCPToolRequireApproval {
+    /// Specify which of the MCP server's tools require approval. Can be
+    /// `always`, `never`, or a filter object associated with tools
+    /// that require approval.
+    Filter(MCPToolApprovalFilter),
+    /// Specify a single approval policy for all tools. One of `always` or
+    /// `never`. When set to `always`, all tools will require approval. When
+    /// set to `never`, all tools will not require approval.
+    ApprovalSetting(MCPToolApprovalSetting),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum MCPToolApprovalSetting {
+    Always,
+    Never,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPToolApprovalFilter {
+    /// A list of tools that always require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub always: Option<MCPToolFilter>,
+    /// A list of tools that never require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub never: Option<MCPToolFilter>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPListToolsTool {
+    /// The JSON schema describing the tool's input.
+    pub input_schema: serde_json::Value,
+    /// The name of the tool.
+    pub name: String,
+    /// Additional annotations about the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub annotations: Option<serde_json::Value>,
+    /// The description of the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+}
--- a/lib/async-openai/src/types/mcp/mod.rs
+++ b/lib/async-openai/src/types/mcp/mod.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+mod impls;
+mod mcp_;
+pub use mcp_::*;
--- a/lib/async-openai/src/types/mod.rs
+++ b/lib/async-openai/src/types/mod.rs
@@ -24,6 +24,7 @@ mod file;
 mod fine_tuning;
 mod image;
 mod invites;
+pub mod mcp;
 mod message;
 mod model;
 mod moderation;
@@ -36,6 +37,7 @@ mod projects;
 pub mod realtime;
 pub mod responses;
 mod run;
+pub mod shared;
 mod step;
 mod thread;
 mod upload;

--- a/lib/async-openai/src/types/responses.rs
+++ b/lib/async-openai/src/types/responses.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use crate::error::OpenAIError;
-pub use crate::types::{
-    CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort,
-    ResponseFormatJsonSchema,
-};
-use derive_builder::Builder;
-use futures::Stream;
-use serde::{Deserialize, Serialize};
-use serde_json::Value;
-use std::collections::HashMap;
-use std::pin::Pin;
-use utoipa::ToSchema;
-
-/// Role of messages in the API.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum Role {
-    User,
-    Assistant,
-    System,
-    Developer,
-}
-
-/// Status of input/output items.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum OutputStatus {
-    InProgress,
-    Completed,
-    Incomplete,
-}
-
-/// Input payload: raw text or structured context items.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum Input {
-    /// A text input to the model, equivalent to a text input with the user role.
-    Text(String),
-    /// A list of one or many input items to the model, containing different content types.
-    Items(Vec<InputItem>),
-}
-
-/// A context item: currently only messages.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged, rename_all = "snake_case")]
-pub enum InputItem {
-    Message(InputMessage),
-    Custom(serde_json::Value),
-}
-
-/// A message to prime the model.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "InputMessageArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InputMessage {
-    #[serde(default, rename = "type")]
-    pub kind: InputMessageType,
-    /// The role of the message input.
-    pub role: Role,
-    /// Text, image, or audio input to the model, used to generate a response. Can also contain
-    /// previous assistant responses.
-    pub content: InputContent,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
-#[serde(rename_all = "snake_case")]
-pub enum InputMessageType {
-    #[default]
-    Message,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum InputContent {
-    /// A text input to the model.
-    TextInput(String),
-    /// A list of one or many input items to the model, containing different content types.
-    InputItemContentList(Vec<ContentType>),
-}
-
-/// Parts of a message: text, image, video, file, or audio.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum ContentType {
-    /// A text input to the model.
-    InputText(InputText),
-    /// An image input to the model.
-    InputImage(InputImage),
-    /// A video input to the model.
-    InputVideo(InputVideo),
-    /// An audio input to the model.
-    InputAudio(InputAudio),
-    /// A file input to the model.
-    InputFile(InputFile),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InputText {
-    text: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "InputImageArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InputImage {
-    /// The detail level of the image to be sent to the model.
-    detail: ImageDetail,
-    /// The ID of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    file_id: Option<String>,
-    /// The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image
-    /// in a data URL.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    image_url: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "InputVideoArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InputVideo {
-    /// The detail level of the video to be sent to the model.
-    detail: ImageDetail,
-    /// The ID of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    file_id: Option<String>,
-    /// The URL of the video to be sent to the model. A fully qualified URL or base64 encoded video
-    /// in a data URL.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    video_url: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "InputAudioArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InputAudio {
-    /// The ID of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    file_id: Option<String>,
-    /// The URL of the audio to be sent to the model. A fully qualified URL or base64 encoded audio
-    /// in a data URL.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    audio_url: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "InputFileArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InputFile {
-    /// The content of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    file_data: Option<String>,
-    /// The ID of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    file_id: Option<String>,
-    /// The name of the file to be sent to the model.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    filename: Option<String>,
-}
-
-/// Builder for a Responses API request.
-#[derive(ToSchema, Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
-#[builder(
-    name = "CreateResponseArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateResponse {
-    /// Text, image, or file inputs to the model, used to generate a response.
-    /// Using value_type to prevent deep schema recursion from Input's nested content types.
-    #[schema(value_type = Object)]
-    pub input: Input,
-
-    /// Model ID used to generate the response, like `gpt-4o`.
-    /// OpenAI offers a wide range of models with different capabilities,
-    /// performance characteristics, and price points.
-    pub model: String,
-
-    /// Whether to run the model response in the background.
-    /// boolean or null.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub background: Option<bool>,
-
-    /// Specify additional output data to include in the model response.
-    ///
-    /// Supported values:
-    /// - `file_search_call.results`
-    ///   Include the search results of the file search tool call.
-    /// - `message.input_image.image_url`
-    ///   Include image URLs from the input message.
-    /// - `computer_call_output.output.image_url`
-    ///   Include image URLs from the computer call output.
-    /// - `reasoning.encrypted_content`
-    ///   Include an encrypted version of reasoning tokens in reasoning item outputs.
-    ///   This enables reasoning items to be used in multi-turn conversations when
-    ///   using the Responses API statelessly (for example, when the `store` parameter
-    ///   is set to `false`, or when an organization is enrolled in the zero-data-
-    ///   retention program).
-    ///
-    /// If `None`, no additional data is returned.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub include: Option<Vec<String>>,
-
-    /// Inserts a system (or developer) message as the first item in the model's context.
-    ///
-    /// When using along with previous_response_id, the instructions from a previous response will
-    /// not be carried over to the next response. This makes it simple to swap out system
-    /// (or developer) messages in new responses.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-
-    /// An upper bound for the number of tokens that can be generated for a
-    /// response, including visible output tokens and reasoning tokens.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-
-    /// The maximum number of total calls to built-in tools that can be processed in a response.
-    /// This maximum number applies across all built-in tool calls, not per individual tool.
-    /// Any further attempts to call a tool by the model will be ignored.
-    pub max_tool_calls: Option<u32>,
-
-    /// Arbitrary JSON metadata used as a passthrough parameter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>,
-
-    /// Whether to allow the model to run tool calls in parallel.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-
-    /// The unique ID of the previous response to the model. Use this to create
-    /// multi-turn conversations.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-
-    /// Reference to a prompt template and its variables.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt: Option<PromptConfig>,
-
-    /// **o-series models only**: Configuration options for reasoning models.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-
-    /// Specifies the latency tier to use for processing the request.
-    ///
-    /// This parameter is relevant for customers subscribed to the Scale tier service.
-    ///
-    /// Supported values:
-    /// - `auto`
-    ///   - If the Project is Scale tier enabled, the system will utilize Scale tier credits until
-    ///     they are exhausted.
-    ///   - If the Project is not Scale tier enabled, the request will be processed using the
-    ///     default service tier with a lower uptime SLA and no latency guarantee.
-    /// - `default`
-    ///   The request will be processed using the default service tier with a lower uptime SLA and
-    ///   no latency guarantee.
-    /// - `flex`
-    ///   The request will be processed with the Flex Processing service tier. Learn more.
-    ///
-    /// When not set, the default behavior is `auto`.
-    ///
-    /// When this parameter is set, the response body will include the `service_tier` utilized.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub service_tier: Option<ServiceTier>,
-
-    /// Whether to store the generated model response for later retrieval via API.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-
-    /// If set to true, the model response data will be streamed to the client as it is
-    /// generated using server-sent events.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stream: Option<bool>,
-
-    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8
-    /// will make the output more random, while lower values like 0.2 will make it
-    /// more focused and deterministic. We generally recommend altering this or
-    /// `top_p` but not both.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-
-    /// Configuration options for a text response from the model. Can be plain text
-    /// or structured JSON data.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-
-    /// How the model should select which tool (or tools) to use when generating
-    /// a response.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[schema(value_type = Object)]
-    pub tool_choice: Option<ToolChoice>,
-
-    /// An array of tools the model may call while generating a response.
-    /// Can include built-in tools (file_search, web_search_preview,
-    /// computer_use_preview) or custom function definitions.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    #[schema(value_type = Vec<Object>)]
-    pub tools: Option<Vec<ToolDefinition>>,
-
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return
-    /// at each token position, each with an associated log probability.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>, // TODO add validation of range
-
-    /// An alternative to sampling with temperature, called nucleus sampling,
-    /// where the model considers the results of the tokens with top_p probability
-    /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
-    /// are considered. We generally recommend altering this or `temperature` but
-    /// not both.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-
-    /// The truncation strategy to use for the model response:
-    /// - `auto`: drop items in the middle to fit context window.
-    /// - `disabled`: error if exceeding context window.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<Truncation>,
-
-    /// A unique identifier representing your end-user, which can help OpenAI to
-    /// monitor and detect abuse.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-}
-
-/// Service tier request options.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct PromptConfig {
-    /// The unique identifier of the prompt template to use.
-    pub id: String,
-
-    /// Optional version of the prompt template.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub version: Option<String>,
-
-    /// Optional map of values to substitute in for variables in your prompt. The substitution
-    /// values can either be strings, or other Response input types like images or files.
-    /// For now only supporting Strings.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub variables: Option<HashMap<String, String>>,
-}
-
-/// Service tier request options.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ServiceTier {
-    Auto,
-    Default,
-    Flex,
-}
-
-/// Truncation strategies.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum Truncation {
-    Auto,
-    Disabled,
-}
-
-/// o-series reasoning settings.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "ReasoningConfigArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ReasoningConfig {
-    /// Constrain effort on reasoning.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub effort: Option<ReasoningEffort>,
-    /// Summary mode for reasoning.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub summary: Option<ReasoningSummary>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ReasoningSummary {
-    Auto,
-    Concise,
-    Detailed,
-}
-
-/// Configuration for text response format.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct TextConfig {
-    /// Defines the format: plain text, JSON object, or JSON schema.
-    pub format: TextResponseFormat,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum TextResponseFormat {
-    /// The type of response format being defined: `text`
-    Text,
-    /// The type of response format being defined: `json_object`
-    JsonObject,
-    /// The type of response format being defined: `json_schema`
-    JsonSchema(ResponseFormatJsonSchema),
-}
-
-/// Definitions for model-callable tools.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum ToolDefinition {
-    /// File search tool.
-    FileSearch(FileSearch),
-    /// Custom function call.
-    Function(Function),
-    /// Web search preview tool.
-    WebSearchPreview(WebSearchPreview),
-    /// Virtual computer control tool.
-    ComputerUsePreview(ComputerUsePreview),
-    /// Remote Model Context Protocol server.
-    Mcp(Mcp),
-    /// Python code interpreter tool.
-    CodeInterpreter(CodeInterpreter),
-    /// Image generation tool.
-    ImageGeneration(ImageGeneration),
-    /// Local shell command execution tool.
-    LocalShell,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "FileSearchArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct FileSearch {
-    /// The IDs of the vector stores to search.
-    pub vector_store_ids: Vec<String>,
-    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_num_results: Option<u32>,
-    /// A filter to apply.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub filters: Option<Filter>,
-    /// Ranking options for search.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub ranking_options: Option<RankingOptions>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "FunctionArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-pub struct Function {
-    /// The name of the function to call.
-    pub name: String,
-    /// A JSON schema object describing the parameters of the function.
-    pub parameters: serde_json::Value,
-    /// Whether to enforce strict parameter validation.
-    pub strict: bool,
-    /// A description of the function. Used by the model to determine whether or not to call the
-    /// function.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "WebSearchPreviewArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-pub struct WebSearchPreview {
-    /// The user's location.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user_location: Option<Location>,
-    /// High level guidance for the amount of context window space to use for the search.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub search_context_size: Option<WebSearchContextSize>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
-#[serde(rename_all = "lowercase")]
-pub enum WebSearchContextSize {
-    Low,
-    Medium,
-    High,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "ComputerUsePreviewArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-pub struct ComputerUsePreview {
-    /// The type of computer environment to control.
-    environment: String,
-    /// The width of the computer display.
-    display_width: u32,
-    /// The height of the computer display.
-    display_height: u32,
-}
-
-/// Options for search result ranking.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct RankingOptions {
-    /// The ranker to use for the file search.
-    pub ranker: String,
-    /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will
-    /// attempt to return only the most relevant results, but may return fewer results.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub score_threshold: Option<f32>,
-}
-
-/// Filters for file search.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum Filter {
-    /// A filter used to compare a specified attribute key to a given value using a defined
-    /// comparison operation.
-    Comparison(ComparisonFilter),
-    /// Combine multiple filters using and or or.
-    Compound(CompoundFilter),
-}
-
-/// Single comparison filter.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ComparisonFilter {
-    /// Specifies the comparison operator
-    #[serde(rename = "type")]
-    pub op: ComparisonType,
-    /// The key to compare against the value.
-    pub key: String,
-    /// The value to compare against the attribute key; supports string, number, or boolean types.
-    pub value: serde_json::Value,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-pub enum ComparisonType {
-    #[serde(rename = "eq")]
-    Equals,
-    #[serde(rename = "ne")]
-    NotEquals,
-    #[serde(rename = "gt")]
-    GreaterThan,
-    #[serde(rename = "gte")]
-    GreaterThanOrEqualTo,
-    #[serde(rename = "lt")]
-    LessThan,
-    #[serde(rename = "lte")]
-    LessThanOrEqualTo,
-}
-
-/// Combine multiple filters.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CompoundFilter {
-    /// Type of operation
-    #[serde(rename = "type")]
-    pub op: ComparisonType,
-    /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter.
-    pub filters: Vec<Filter>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum CompoundType {
-    And,
-    Or,
-}
-
-/// Approximate user location for web search.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "LocationArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct Location {
-    /// The type of location approximation. Always approximate.
-    #[serde(rename = "type")]
-    pub kind: String,
-    /// Free text input for the city of the user, e.g. San Francisco.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub city: Option<String>,
-    /// The two-letter ISO country code of the user, e.g. US.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub country: Option<String>,
-    /// Free text input for the region of the user, e.g. California.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub region: Option<String>,
-    /// The IANA timezone of the user, e.g. America/Los_Angeles.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub timezone: Option<String>,
-}
-
-/// MCP (Model Context Protocol) tool configuration.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "McpArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct Mcp {
-    /// A label for this MCP server.
-    pub server_label: String,
-    /// The URL for the MCP server.
-    pub server_url: String,
-    /// List of allowed tool names or filter object.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub allowed_tools: Option<AllowedTools>,
-    /// Optional HTTP headers for the MCP server.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub headers: Option<Value>,
-    /// Approval policy or filter for tools.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub require_approval: Option<RequireApproval>,
-}
-
-/// Allowed tools configuration for MCP.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum AllowedTools {
-    /// A flat list of allowed tool names.
-    List(Vec<String>),
-    /// A filter object specifying allowed tools.
-    Filter(McpAllowedToolsFilter),
-}
-
-/// Filter object for MCP allowed tools.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpAllowedToolsFilter {
-    /// Names of tools in the filter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_names: Option<Vec<String>>,
-}
-
-/// Approval policy or filter for MCP tools.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum RequireApproval {
-    /// A blanket policy: "always" or "never".
-    Policy(RequireApprovalPolicy),
-    /// A filter object specifying which tools require approval.
-    Filter(McpApprovalFilter),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum RequireApprovalPolicy {
-    Always,
-    Never,
-}
-
-/// Filter object for MCP tool approval.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpApprovalFilter {
-    /// A list of tools that always require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub always: Option<McpAllowedToolsFilter>,
-    /// A list of tools that never require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub never: Option<McpAllowedToolsFilter>,
-}
-
-/// Container configuration for a code interpreter.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum CodeInterpreterContainer {
-    /// A simple container ID.
-    Id(String),
-    /// Auto-configured container with optional files.
-    Container(CodeInterpreterContainerKind),
-}
-
-/// Auto configuration for code interpreter container.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum CodeInterpreterContainerKind {
-    Auto {
-        /// Optional list of uploaded file IDs.
-        #[serde(skip_serializing_if = "Option::is_none")]
-        file_ids: Option<Vec<String>>,
-    },
-}
-
-/// Code interpreter tool definition.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "CodeInterpreterArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CodeInterpreter {
-    /// Container configuration for running code.
-    pub container: CodeInterpreterContainer,
-}
-
-/// Mask image input for image generation.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InputImageMask {
-    /// Base64-encoded mask image.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub image_url: Option<String>,
-    /// File ID for the mask image.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub file_id: Option<String>,
-}
-
-/// Image generation tool definition.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
-#[builder(
-    name = "ImageGenerationArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ImageGeneration {
-    /// Background type: transparent, opaque, or auto.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub background: Option<ImageGenerationBackground>,
-    /// Optional mask for inpainting.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub input_image_mask: Option<InputImageMask>,
-    /// Model to use (default: gpt-image-1).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
-    /// Moderation level (default: auto).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub moderation: Option<String>,
-    /// Compression level (0-100).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_compression: Option<u8>,
-    /// Output format: png, webp, or jpeg.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_format: Option<ImageGenerationOutputFormat>,
-    /// Number of partial images (0-3).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub partial_images: Option<u8>,
-    /// Quality: low, medium, high, or auto.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub quality: Option<ImageGenerationQuality>,
-    /// Size: e.g. "1024x1024" or auto.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub size: Option<ImageGenerationSize>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageGenerationBackground {
-    Transparent,
-    Opaque,
-    Auto,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageGenerationOutputFormat {
-    Png,
-    Webp,
-    Jpeg,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageGenerationQuality {
-    Low,
-    Medium,
-    High,
-    Auto,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageGenerationSize {
-    Auto,
-    #[serde(rename = "1024x1024")]
-    Size1024x1024,
-    #[serde(rename = "1024x1536")]
-    Size1024x1536,
-    #[serde(rename = "1536x1024")]
-    Size1536x1024,
-}
-
-/// Control how the model picks or is forced to pick a tool.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum ToolChoice {
-    /// Controls which (if any) tool is called by the model.
-    Mode(ToolChoiceMode),
-    /// Indicates that the model should use a built-in tool to generate a response.
-    Hosted {
-        /// The type of hosted tool the model should to use.
-        #[serde(rename = "type")]
-        kind: HostedToolType,
-    },
-    /// Use this option to force the model to call a specific function.
-    Function {
-        /// The name of the function to call.
-        name: String,
-    },
-}
-
-/// Simple tool-choice modes.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ToolChoiceMode {
-    /// The model will not call any tool and instead generates a message.
-    None,
-    /// The model can pick between generating a message or calling one or more tools.
-    Auto,
-    /// The model must call one or more tools.
-    Required,
-}
-
-/// Hosted tool type identifiers.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum HostedToolType {
-    FileSearch,
-    WebSearchPreview,
-    ComputerUsePreview,
-}
-
-/// Error returned by the API when a request fails.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ErrorObject {
-    /// The error code for the response.
-    pub code: String,
-    /// A human-readable description of the error.
-    pub message: String,
-}
-
-/// Details about an incomplete response.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct IncompleteDetails {
-    /// The reason why the response is incomplete.
-    pub reason: String,
-}
-
-/// A simple text output from the model.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct OutputText {
-    /// The annotations of the text output.
-    pub annotations: Vec<Annotation>,
-    /// The text output from the model.
-    pub text: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum Annotation {
-    /// A citation to a file.
-    FileCitation(FileCitation),
-    /// A citation for a web resource used to generate a model response.
-    UrlCitation(UrlCitation),
-    /// A path to a file.
-    FilePath(FilePath),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileCitation {
-    /// The ID of the file.
-    file_id: String,
-    /// The index of the file in the list of files.
-    index: u32,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct UrlCitation {
-    /// The index of the last character of the URL citation in the message.
-    end_index: u32,
-    /// The index of the first character of the URL citation in the message.
-    start_index: u32,
-    /// The title of the web resource.
-    title: String,
-    /// The URL of the web resource.
-    url: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FilePath {
-    /// The ID of the file.
-    file_id: String,
-    /// The index of the file in the list of files.
-    index: u32,
-}
-
-/// A refusal explanation from the model.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Refusal {
-    /// The refusal explanationfrom the model.
-    pub refusal: String,
-}
-
-/// A message generated by the model.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct OutputMessage {
-    /// The content of the output message.
-    pub content: Vec<Content>,
-    /// The unique ID of the output message.
-    pub id: String,
-    /// The role of the output message. Always assistant.
-    pub role: Role,
-    /// The status of the message input.
-    pub status: OutputStatus,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum Content {
-    /// A text output from the model.
-    OutputText(OutputText),
-    /// A refusal from the model.
-    Refusal(Refusal),
-}
-
-/// Nested content within an output message.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum OutputContent {
-    /// An output message from the model.
-    Message(OutputMessage),
-    /// The results of a file search tool call.
-    FileSearchCall(FileSearchCallOutput),
-    /// A tool call to run a function.
-    FunctionCall(FunctionCall),
-    /// The results of a web search tool call.
-    WebSearchCall(WebSearchCallOutput),
-    /// A tool call to a computer use tool.
-    ComputerCall(ComputerCallOutput),
-    /// A description of the chain of thought used by a reasoning model while generating a response.
-    /// Be sure to include these items in your input to the Responses API for subsequent turns of a
-    /// conversation if you are manually managing context.
-    Reasoning(ReasoningItem),
-    /// Image generation tool call output.
-    ImageGenerationCall(ImageGenerationCallOutput),
-    /// Code interpreter tool call output.
-    CodeInterpreterCall(CodeInterpreterCallOutput),
-    /// Local shell tool call output.
-    LocalShellCall(LocalShellCallOutput),
-    /// MCP tool invocation output.
-    McpCall(McpCallOutput),
-    /// MCP list-tools output.
-    McpListTools(McpListToolsOutput),
-    /// MCP approval request output.
-    McpApprovalRequest(McpApprovalRequestOutput),
-}
-
-/// A reasoning item representing the model's chain of thought, including summary paragraphs.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ReasoningItem {
-    /// Unique identifier of the reasoning content.
-    pub id: String,
-    /// The summarized chain-of-thought paragraphs.
-    pub summary: Vec<SummaryText>,
-    /// The encrypted content of the reasoning item - populated when a response is generated with
-    /// `reasoning.encrypted_content` in the `include` parameter.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub encrypted_content: Option<String>,
-    /// The status of the reasoning item.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub status: Option<OutputStatus>,
-}
-
-/// A single summary text fragment from reasoning.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct SummaryText {
-    /// A short summary of the reasoning used by the model.
-    pub text: String,
-}
-
-/// File search tool call output.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileSearchCallOutput {
-    /// The unique ID of the file search tool call.
-    pub id: String,
-    /// The queries used to search for files.
-    pub queries: Vec<String>,
-    /// The status of the file search tool call.
-    pub status: FileSearchCallOutputStatus,
-    /// The results of the file search tool call.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub results: Option<Vec<FileSearchResult>>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum FileSearchCallOutputStatus {
-    InProgress,
-    Searching,
-    Incomplete,
-    Failed,
-    Completed,
-}
-
-/// A single result from a file search.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FileSearchResult {
-    /// The unique ID of the file.
-    pub file_id: String,
-    /// The name of the file.
-    pub filename: String,
-    /// The relevance score of the file - a value between 0 and 1.
-    pub score: f32,
-    /// The text that was retrieved from the file.
-    pub text: String,
-    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
-    /// additional information about the object in a structured format, and querying for objects
-    /// API or the dashboard. Keys are strings with a maximum length of 64 characters
-    /// . Values are strings with a maximum length of 512 characters, booleans, or numbers.
-    pub attributes: HashMap<String, serde_json::Value>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct SafetyCheck {
-    /// The ID of the safety check.
-    pub id: String,
-    /// The type/code of the pending safety check.
-    pub code: String,
-    /// Details about the pending safety check.
-    pub message: String,
-}
-
-/// Web search tool call output.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct WebSearchCallOutput {
-    /// The unique ID of the web search tool call.
-    pub id: String,
-    /// The status of the web search tool call.
-    pub status: String,
-}
-
-/// Output from a computer tool call.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ComputerCallOutput {
-    pub action: ComputerCallAction,
-    /// An identifier used when responding to the tool call with output.
-    pub call_id: String,
-    /// The unique ID of the computer call.
-    pub id: String,
-    /// The pending safety checks for the computer call.
-    pub pending_safety_checks: Vec<SafetyCheck>,
-    /// The status of the item.
-    pub status: OutputStatus,
-}
-
-/// A point in 2D space.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Point {
-    pub x: i32,
-    pub y: i32,
-}
-
-/// Represents all user‐triggered actions.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum ComputerCallAction {
-    /// A click action.
-    Click(Click),
-
-    /// A double-click action.
-    DoubleClick(DoubleClick),
-
-    /// A drag action.
-    Drag(Drag),
-
-    /// A keypress action.
-    KeyPress(KeyPress),
-
-    /// A mouse move action.
-    Move(MoveAction),
-
-    /// A screenshot action.
-    Screenshot,
-
-    /// A scroll action.
-    Scroll(Scroll),
-
-    /// A type (text entry) action.
-    Type(TypeAction),
-
-    /// A wait (no-op) action.
-    Wait,
-}
-
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum ButtonPress {
-    Left,
-    Right,
-    Wheel,
-    Back,
-    Forward,
-}
-
-/// A click action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Click {
-    /// Which mouse button was pressed.
-    pub button: ButtonPress,
-    /// X‐coordinate of the click.
-    pub x: i32,
-    /// Y‐coordinate of the click.
-    pub y: i32,
-}
-
-/// A double click action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct DoubleClick {
-    /// X‐coordinate of the double click.
-    pub x: i32,
-    /// Y‐coordinate of the double click.
-    pub y: i32,
-}
-
-/// A drag action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Drag {
-    /// The path of points the cursor drags through.
-    pub path: Vec<Point>,
-    /// X‐coordinate at the end of the drag.
-    pub x: i32,
-    /// Y‐coordinate at the end of the drag.
-    pub y: i32,
-}
-
-/// A keypress action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct KeyPress {
-    /// The list of keys to press (e.g. `["Control", "C"]`).
-    pub keys: Vec<String>,
-}
-
-/// A mouse move action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct MoveAction {
-    /// X‐coordinate to move to.
-    pub x: i32,
-    /// Y‐coordinate to move to.
-    pub y: i32,
-}
-
-/// A scroll action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct Scroll {
-    /// Horizontal scroll distance.
-    pub scroll_x: i32,
-    /// Vertical scroll distance.
-    pub scroll_y: i32,
-    /// X‐coordinate where the scroll began.
-    pub x: i32,
-    /// Y‐coordinate where the scroll began.
-    pub y: i32,
-}
-
-/// A typing (text entry) action.
-#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
-pub struct TypeAction {
-    /// The text to type.
-    pub text: String,
-}
-
-/// Metadata for a function call request.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FunctionCall {
-    /// The unique ID of the function tool call.
-    pub id: String,
-    /// The unique ID of the function tool call generated by the model.
-    pub call_id: String,
-    /// The name of the function to run.
-    pub name: String,
-    /// A JSON string of the arguments to pass to the function.
-    pub arguments: String,
-    /// The status of the item.
-    pub status: OutputStatus,
-}
-
-/// Output of an image generation request.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ImageGenerationCallOutput {
-    /// Unique ID of the image generation call.
-    pub id: String,
-    /// Base64-encoded generated image, or null.
-    pub result: Option<String>,
-    /// Status of the image generation call.
-    pub status: String,
-}
-
-/// Output of a code interpreter request.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterCallOutput {
-    /// The code that was executed.
-    pub code: String,
-    /// Unique ID of the call.
-    pub id: String,
-    /// Status of the tool call.
-    pub status: String,
-    /// ID of the container used to run the code.
-    pub container_id: String,
-    /// The results of the execution: logs or files.
-    pub results: Vec<CodeInterpreterResult>,
-}
-
-/// Individual result from a code interpreter: either logs or files.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum CodeInterpreterResult {
-    /// Text logs from the execution.
-    Logs(CodeInterpreterTextOutput),
-    /// File outputs from the execution.
-    Files(CodeInterpreterFileOutput),
-}
-
-/// The output containing execution logs.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterTextOutput {
-    /// The logs of the code interpreter tool call.
-    pub logs: String,
-}
-
-/// The output containing file references.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterFileOutput {
-    /// List of file IDs produced.
-    pub files: Vec<CodeInterpreterFile>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CodeInterpreterFile {
-    /// The ID of the file.
-    file_id: String,
-    /// The MIME type of the file.
-    mime_type: String,
-}
-
-/// Output of a local shell command request.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct LocalShellCallOutput {
-    /// Details of the exec action.
-    pub action: LocalShellAction,
-    /// Unique call identifier for responding to the tool call.
-    pub call_id: String,
-    /// Unique ID of the local shell call.
-    pub id: String,
-    /// Status of the local shell call.
-    pub status: String,
-}
-
-/// Define the shape of a local shell action (exec).
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct LocalShellAction {
-    /// The command to run.
-    pub command: Vec<String>,
-    /// Environment variables to set for the command.
-    pub env: HashMap<String, String>,
-    /// Optional timeout for the command (ms).
-    pub timeout_ms: Option<u64>,
-    /// Optional user to run the command as.
-    pub user: Option<String>,
-    /// Optional working directory for the command.
-    pub working_directory: Option<String>,
-}
-
-/// Output of an MCP server tool invocation.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpCallOutput {
-    /// JSON string of the arguments passed.
-    pub arguments: String,
-    /// Unique ID of the MCP call.
-    pub id: String,
-    /// Name of the tool invoked.
-    pub name: String,
-    /// Label of the MCP server.
-    pub server_label: String,
-    /// Error message from the call, if any.
-    pub error: Option<String>,
-    /// Output from the call, if any.
-    pub output: Option<String>,
-}
-
-/// Output listing tools available on an MCP server.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpListToolsOutput {
-    /// Unique ID of the list request.
-    pub id: String,
-    /// Label of the MCP server.
-    pub server_label: String,
-    /// Tools available on the server with metadata.
-    pub tools: Vec<McpToolInfo>,
-    /// Error message if listing failed.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<String>,
-}
-
-/// Information about a single tool on an MCP server.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpToolInfo {
-    /// The name of the tool.
-    pub name: String,
-    /// The JSON schema describing the tool's input.
-    pub input_schema: Value,
-    /// Additional annotations about the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<Value>,
-    /// The description of the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-}
-
-/// Output representing a human approval request for an MCP tool.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct McpApprovalRequestOutput {
-    /// JSON string of arguments for the tool.
-    pub arguments: String,
-    /// Unique ID of the approval request.
-    pub id: String,
-    /// Name of the tool requiring approval.
-    pub name: String,
-    /// Label of the MCP server making the request.
-    pub server_label: String,
-}
-
-/// Usage statistics for a response.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Usage {
-    /// The number of input tokens.
-    pub input_tokens: u32,
-    /// A detailed breakdown of the input tokens.
-    pub input_tokens_details: PromptTokensDetails,
-    /// The number of output tokens.
-    pub output_tokens: u32,
-    /// A detailed breakdown of the output tokens.
-    pub output_tokens_details: CompletionTokensDetails,
-    /// The total number of tokens used.
-    pub total_tokens: u32,
-}
-
-/// The complete response returned by the Responses API.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Response {
-    /// Unix timestamp (in seconds) when this Response was created.
-    pub created_at: u64,
-
-    /// Error object if the API failed to generate a response.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<ErrorObject>,
-
-    /// Unique identifier for this response.
-    pub id: String,
-
-    /// Details about why the response is incomplete, if any.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub incomplete_details: Option<IncompleteDetails>,
-
-    /// Instructions that were inserted as the first item in context.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-
-    /// The value of `max_output_tokens` that was honored.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-
-    /// Metadata tags/values that were attached to this response.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>,
-
-    /// Model ID used to generate the response.
-    pub model: String,
-
-    /// The object type – always `response`.
-    pub object: String,
-
-    /// The array of content items generated by the model.
-    pub output: Vec<OutputContent>,
-
-    /// SDK-only convenience property that contains the aggregated text output from all
-    /// `output_text` items in the `output` array, if any are present.
-    /// Supported in the Python and JavaScript SDKs.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output_text: Option<String>,
-
-    /// Whether parallel tool calls were enabled.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-
-    /// Previous response ID, if creating part of a multi-turn conversation.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-
-    /// Reasoning configuration echoed back (effort, summary settings).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-
-    /// Whether to store the generated model response for later retrieval via API.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-
-    /// The service tier that actually processed this response.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub service_tier: Option<ServiceTier>,
-
-    /// The status of the response generation.
-    pub status: Status,
-
-    /// Sampling temperature that was used.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-
-    /// Text format configuration echoed back (plain, json_object, json_schema).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-
-    /// How the model chose or was forced to choose a tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
-
-    /// Tool definitions that were provided.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
-
-    /// Nucleus sampling cutoff that was used.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-
-    /// Truncation strategy that was applied.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<Truncation>,
-
-    /// Token usage statistics for this request.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub usage: Option<Usage>,
-
-    /// End-user ID for which this response was generated.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum Status {
-    Completed,
-    Failed,
-    InProgress,
-    Incomplete,
-}
-
-/// Event types for streaming responses from the Responses API
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[non_exhaustive] // Future-proof against breaking changes
-pub enum ResponseEvent {
-    /// Response creation started
-    #[serde(rename = "response.created")]
-    ResponseCreated(ResponseCreated),
-    /// Processing in progress
-    #[serde(rename = "response.in_progress")]
-    ResponseInProgress(ResponseInProgress),
-    /// Response completed (different from done)
-    #[serde(rename = "response.completed")]
-    ResponseCompleted(ResponseCompleted),
-    /// Response failed
-    #[serde(rename = "response.failed")]
-    ResponseFailed(ResponseFailed),
-    /// Response incomplete
-    #[serde(rename = "response.incomplete")]
-    ResponseIncomplete(ResponseIncomplete),
-    /// Response queued
-    #[serde(rename = "response.queued")]
-    ResponseQueued(ResponseQueued),
-    /// Output item added
-    #[serde(rename = "response.output_item.added")]
-    ResponseOutputItemAdded(ResponseOutputItemAdded),
-    /// Content part added
-    #[serde(rename = "response.content_part.added")]
-    ResponseContentPartAdded(ResponseContentPartAdded),
-    /// Text delta update
-    #[serde(rename = "response.output_text.delta")]
-    ResponseOutputTextDelta(ResponseOutputTextDelta),
-    /// Text output completed
-    #[serde(rename = "response.output_text.done")]
-    ResponseOutputTextDone(ResponseOutputTextDone),
-    /// Refusal delta update
-    #[serde(rename = "response.refusal.delta")]
-    ResponseRefusalDelta(ResponseRefusalDelta),
-    /// Refusal completed
-    #[serde(rename = "response.refusal.done")]
-    ResponseRefusalDone(ResponseRefusalDone),
-    /// Content part completed
-    #[serde(rename = "response.content_part.done")]
-    ResponseContentPartDone(ResponseContentPartDone),
-    /// Output item completed
-    #[serde(rename = "response.output_item.done")]
-    ResponseOutputItemDone(ResponseOutputItemDone),
-    /// Function call arguments delta
-    #[serde(rename = "response.function_call_arguments.delta")]
-    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta),
-    /// Function call arguments completed
-    #[serde(rename = "response.function_call_arguments.done")]
-    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone),
-    /// File search call in progress
-    #[serde(rename = "response.file_search_call.in_progress")]
-    ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgress),
-    /// File search call searching
-    #[serde(rename = "response.file_search_call.searching")]
-    ResponseFileSearchCallSearching(ResponseFileSearchCallSearching),
-    /// File search call completed
-    #[serde(rename = "response.file_search_call.completed")]
-    ResponseFileSearchCallCompleted(ResponseFileSearchCallCompleted),
-    /// Web search call in progress
-    #[serde(rename = "response.web_search_call.in_progress")]
-    ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgress),
-    /// Web search call searching
-    #[serde(rename = "response.web_search_call.searching")]
-    ResponseWebSearchCallSearching(ResponseWebSearchCallSearching),
-    /// Web search call completed
-    #[serde(rename = "response.web_search_call.completed")]
-    ResponseWebSearchCallCompleted(ResponseWebSearchCallCompleted),
-    /// Reasoning summary part added
-    #[serde(rename = "response.reasoning_summary_part.added")]
-    ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAdded),
-    /// Reasoning summary part done
-    #[serde(rename = "response.reasoning_summary_part.done")]
-    ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDone),
-    /// Reasoning summary text delta
-    #[serde(rename = "response.reasoning_summary_text.delta")]
-    ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDelta),
-    /// Reasoning summary text done
-    #[serde(rename = "response.reasoning_summary_text.done")]
-    ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDone),
-    /// Reasoning summary delta
-    #[serde(rename = "response.reasoning_summary.delta")]
-    ResponseReasoningSummaryDelta(ResponseReasoningSummaryDelta),
-    /// Reasoning summary done
-    #[serde(rename = "response.reasoning_summary.done")]
-    ResponseReasoningSummaryDone(ResponseReasoningSummaryDone),
-    /// Image generation call in progress
-    #[serde(rename = "response.image_generation_call.in_progress")]
-    ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgress),
-    /// Image generation call generating
-    #[serde(rename = "response.image_generation_call.generating")]
-    ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGenerating),
-    /// Image generation call partial image
-    #[serde(rename = "response.image_generation_call.partial_image")]
-    ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImage),
-    /// Image generation call completed
-    #[serde(rename = "response.image_generation_call.completed")]
-    ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompleted),
-    /// MCP call arguments delta
-    #[serde(rename = "response.mcp_call_arguments.delta")]
-    ResponseMcpCallArgumentsDelta(ResponseMcpCallArgumentsDelta),
-    /// MCP call arguments done
-    #[serde(rename = "response.mcp_call_arguments.done")]
-    ResponseMcpCallArgumentsDone(ResponseMcpCallArgumentsDone),
-    /// MCP call completed
-    #[serde(rename = "response.mcp_call.completed")]
-    ResponseMcpCallCompleted(ResponseMcpCallCompleted),
-    /// MCP call failed
-    #[serde(rename = "response.mcp_call.failed")]
-    ResponseMcpCallFailed(ResponseMcpCallFailed),
-    /// MCP call in progress
-    #[serde(rename = "response.mcp_call.in_progress")]
-    ResponseMcpCallInProgress(ResponseMcpCallInProgress),
-    /// MCP list tools completed
-    #[serde(rename = "response.mcp_list_tools.completed")]
-    ResponseMcpListToolsCompleted(ResponseMcpListToolsCompleted),
-    /// MCP list tools failed
-    #[serde(rename = "response.mcp_list_tools.failed")]
-    ResponseMcpListToolsFailed(ResponseMcpListToolsFailed),
-    /// MCP list tools in progress
-    #[serde(rename = "response.mcp_list_tools.in_progress")]
-    ResponseMcpListToolsInProgress(ResponseMcpListToolsInProgress),
-    /// Code interpreter call in progress
-    #[serde(rename = "response.code_interpreter_call.in_progress")]
-    ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgress),
-    /// Code interpreter call interpreting
-    #[serde(rename = "response.code_interpreter_call.interpreting")]
-    ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpreting),
-    /// Code interpreter call completed
-    #[serde(rename = "response.code_interpreter_call.completed")]
-    ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompleted),
-    /// Code interpreter call code delta
-    #[serde(rename = "response.code_interpreter_call_code.delta")]
-    ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDelta),
-    /// Code interpreter call code done
-    #[serde(rename = "response.code_interpreter_call_code.done")]
-    ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDone),
-    /// Output text annotation added
-    #[serde(rename = "response.output_text.annotation.added")]
-    ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAdded),
-    /// Error occurred
-    #[serde(rename = "error")]
-    ResponseError(ResponseError),
-
-    /// Unknown event type
-    #[serde(untagged)]
-    Unknown(serde_json::Value),
-}
-
-/// Stream of response events
-pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>;
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCreated {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseInProgress {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputItemAdded {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item: OutputItem,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseContentPartAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub part: ContentPart,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub delta: String,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<serde_json::Value>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseContentPartDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub part: ContentPart,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputItemDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item: OutputItem,
-}
-
-/// Response completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCompleted {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response failed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFailed {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response incomplete event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseIncomplete {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Response queued event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseQueued {
-    pub sequence_number: u64,
-    pub response: ResponseMetadata,
-}
-
-/// Text output completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub text: String,
-    pub logprobs: Option<Vec<serde_json::Value>>,
-}
-
-/// Refusal delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseRefusalDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub delta: String,
-}
-
-/// Refusal done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseRefusalDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub refusal: String,
-}
-
-/// Function call arguments delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFunctionCallArgumentsDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub delta: String,
-}
-
-/// Function call arguments done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFunctionCallArgumentsDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub arguments: String,
-}
-
-/// Error event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseError {
-    pub sequence_number: u64,
-    pub code: Option<String>,
-    pub message: String,
-    pub param: Option<String>,
-}
-
-/// File search call in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// File search call searching event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallSearching {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// File search call completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseFileSearchCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call searching event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallSearching {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Web search call completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseWebSearchCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Reasoning summary part added event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryPartAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub part: serde_json::Value, // Could be more specific but using Value for flexibility
-}
-
-/// Reasoning summary part done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryPartDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub part: serde_json::Value,
-}
-
-/// Reasoning summary text delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryTextDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub delta: String,
-}
-
-/// Reasoning summary text done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryTextDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub text: String,
-}
-
-/// Reasoning summary delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryDelta {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub delta: serde_json::Value,
-}
-
-/// Reasoning summary done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseReasoningSummaryDone {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub summary_index: u32,
-    pub text: String,
-}
-
-/// Image generation call in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Image generation call generating event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallGenerating {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Image generation call partial image event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallPartialImage {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub partial_image_index: u32,
-    pub partial_image_b64: String,
-}
-
-/// Image generation call completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseImageGenerationCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call arguments delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallArgumentsDelta {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub delta: String,
-}
-
-/// MCP call arguments done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallArgumentsDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub arguments: String,
-}
-
-/// MCP call completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call failed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallFailed {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP call in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools failed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsFailed {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// MCP list tools in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMcpListToolsInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call in progress event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallInProgress {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call interpreting event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallInterpreting {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call completed event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCompleted {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-}
-
-/// Code interpreter call code delta event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCodeDelta {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub delta: String,
-}
-
-/// Code interpreter call code done event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseCodeInterpreterCallCodeDone {
-    pub sequence_number: u64,
-    pub output_index: u32,
-    pub item_id: String,
-    pub code: String,
-}
-
-/// Response metadata
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseMetadata {
-    pub id: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub object: Option<String>,
-    pub created_at: u64,
-    pub status: Status,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub usage: Option<Usage>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub error: Option<ErrorObject>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub incomplete_details: Option<IncompleteDetails>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub input: Option<Input>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub instructions: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_output_tokens: Option<u32>,
-    /// Whether the model was run in background mode
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub background: Option<bool>,
-    /// The service tier that was actually used
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub service_tier: Option<ServiceTier>,
-    /// The effective value of top_logprobs parameter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_logprobs: Option<u32>,
-    /// The effective value of max_tool_calls parameter
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub max_tool_calls: Option<u32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub output: Option<Vec<OutputItem>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parallel_tool_calls: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub previous_response_id: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub reasoning: Option<ReasoningConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub text: Option<TextConfig>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_choice: Option<ToolChoice>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tools: Option<Vec<ToolDefinition>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub truncation: Option<Truncation>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>,
-    /// Prompt cache key for improved performance
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt_cache_key: Option<String>,
-    /// Safety identifier for content filtering
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub safety_identifier: Option<String>,
-}
-
-/// Output item
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct OutputItem {
-    pub id: String,
-    #[serde(rename = "type")]
-    pub item_type: String,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub status: Option<String>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub content: Option<Vec<ContentPart>>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub role: Option<String>,
-    /// For reasoning items - summary paragraphs
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub summary: Option<Vec<serde_json::Value>>,
-}
-
-/// Content part
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ContentPart {
-    #[serde(rename = "type")]
-    pub part_type: String,
-    pub text: Option<String>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<Vec<serde_json::Value>>,
-    #[serde(default, skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<Vec<serde_json::Value>>,
-}
-
-// ===== RESPONSE COLLECTOR =====
-
-/// Collects streaming response events into a complete response
-/// Output text annotation added event
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct ResponseOutputTextAnnotationAdded {
-    pub sequence_number: u64,
-    pub item_id: String,
-    pub output_index: u32,
-    pub content_index: u32,
-    pub annotation_index: u32,
-    pub annotation: TextAnnotation,
-}
-
-/// Text annotation object for output text
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[non_exhaustive]
-pub struct TextAnnotation {
-    #[serde(rename = "type")]
-    pub annotation_type: String,
-    pub text: String,
-    pub start: u32,
-    pub end: u32,
-}
--- a/lib/async-openai/src/types/responses/api.rs
+++ b/lib/async-openai/src/types/responses/api.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::error::OpenAIError;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+use crate::types::responses::{IncludeParam, ListOrder};
+
+/// Query parameters for listing conversation items.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq, ToSchema)]
+#[builder(name = "ListConversationItemsQueryArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ListConversationItemsQuery {
+    /// A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub limit: Option<u32>,
+    /// The order to return the input items in. Default is `desc`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub order: Option<ListOrder>,
+    /// An item ID to list items after, used in pagination.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub after: Option<String>,
+    /// Specify additional output data to include in the model response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<IncludeParam>>,
+}
+
+/// Query parameters for getting a response.
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq, ToSchema)]
+#[builder(name = "GetResponseQueryArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct GetResponseQuery {
+    /// Additional fields to include in the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<String>>,
+    /// If set to true, the model response data will be streamed to the client as it is generated using server-sent events.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+    /// The sequence number of the event after which to start streaming.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub starting_after: Option<u32>,
+    /// When true, stream obfuscation will be enabled.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include_obfuscation: Option<bool>,
+}
+
+/// Query parameters for listing input items.
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq, ToSchema)]
+#[builder(name = "ListInputItemsQueryArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ListInputItemsQuery {
+    /// A limit on the number of objects to be returned. Limit can range between 1 and 100, and the default is 20.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub limit: Option<u32>,
+    /// The order to return the input items in. Default is `desc`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub order: Option<ListOrder>,
+    /// An item ID to list items after, used in pagination.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub after: Option<String>,
+    /// Additional fields to include in the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<String>>,
+}
+
+/// Query parameters for getting a conversation item.
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq, ToSchema)]
+#[builder(name = "GetConversationItemQueryArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct GetConversationItemQuery {
+    /// Additional fields to include in the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<IncludeParam>>,
+}
+
+/// Query parameters for creating conversation items.
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq, ToSchema)]
+#[builder(name = "CreateConversationItemsQueryArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateConversationItemsQuery {
+    /// Additional fields to include in the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<IncludeParam>>,
+}
--- a/lib/async-openai/src/types/responses/conversation.rs
+++ b/lib/async-openai/src/types/responses/conversation.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+use crate::{
+    error::OpenAIError,
+    types::responses::{
+        AnyItemReference, CodeInterpreterToolCall, ComputerToolCall, CustomToolCall,
+        CustomToolCallOutput, FileSearchToolCall, ImageGenToolCall, InputFileContent,
+        InputImageContent, InputItem, InputTextContent, LocalShellToolCall,
+        LocalShellToolCallOutput, MCPApprovalRequest, MCPApprovalResponse, MCPListTools,
+        MCPToolCall, OutputTextContent, ReasoningItem, ReasoningTextContent, RefusalContent,
+        WebSearchToolCall,
+    },
+};
+
+/// Represents a conversation object.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+pub struct ConversationResource {
+    /// The unique ID of the conversation.
+    pub id: String,
+    /// The object type, which is always `conversation`.
+    pub object: String,
+    /// Set of 16 key-value pairs that can be attached to an object.
+    #[schema(value_type = Object)]
+    pub metadata: Option<serde_json::Value>,
+    /// The time at which the conversation was created, measured in seconds since the Unix epoch.
+    pub created_at: u64,
+}
+
+/// Request to create a conversation.
+/// openapi spec type: CreateConversationBody
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq, ToSchema)]
+#[builder(name = "CreateConversationRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateConversationRequest {
+    /// Set of 16 key-value pairs that can be attached to an object.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[schema(value_type = Object)]
+    pub metadata: Option<serde_json::Value>,
+
+    /// Initial items to include in the conversation context. You may add up to 20 items at a time.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub items: Option<Vec<InputItem>>,
+}
+
+/// Request to update a conversation.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq, ToSchema)]
+#[builder(name = "UpdateConversationRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct UpdateConversationRequest {
+    /// Set of 16 key-value pairs that can be attached to an object.
+    #[schema(value_type = Object)]
+    pub metadata: Option<serde_json::Value>,
+}
+
+/// Represents a deleted conversation.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+pub struct DeleteConversationResponse {
+    /// The unique ID of the deleted conversation.
+    pub id: String,
+    /// The object type, which is always `conversation.deleted`.
+    pub object: String,
+    /// Whether the conversation was successfully deleted.
+    pub deleted: bool,
+}
+
+/// Request to create conversation items.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq, ToSchema)]
+#[builder(name = "CreateConversationItemsRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateConversationItemsRequest {
+    /// The items to add to the conversation. You may add up to 20 items at a time.
+    pub items: Vec<InputItem>,
+}
+
+/// A list of Conversation items.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+pub struct ConversationItemList {
+    /// The type of object returned, must be `list`.
+    pub object: String,
+    /// A list of conversation items.
+    pub data: Vec<ConversationItem>,
+    /// Whether there are more items available.
+    pub has_more: bool,
+    /// The ID of the first item in the list.
+    pub first_id: Option<String>,
+    /// The ID of the last item in the list.
+    pub last_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum MessageStatus {
+    InProgress,
+    Incomplete,
+    Completed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum MessageRole {
+    Unknown,
+    User,
+    Assistant,
+    System,
+    Critic,
+    Discriminator,
+    Developer,
+    Tool,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct TextContent {
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct SummaryTextContent {
+    /// A summary of the reasoning output from the model so far.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ComputerScreenContent {
+    /// The URL of the screenshot image.
+    pub image_url: Option<String>,
+    ///  The identifier of an uploaded file that contains the screenshot.
+    pub file_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum MessageContent {
+    InputText(InputTextContent),
+    OutputText(OutputTextContent),
+    Text(TextContent),
+    SummaryText(SummaryTextContent),
+    ReasoningText(ReasoningTextContent),
+    Refusal(RefusalContent),
+    InputImage(InputImageContent),
+    ComputerScreen(ComputerScreenContent),
+    InputFile(InputFileContent),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct Message {
+    /// The unique ID of the message.
+    pub id: String,
+    /// The status of item. One of `in_progress`, `completed`, or `incomplete`. Populated when items are
+    /// returned via API.
+    pub status: MessageStatus,
+    /// The role of the message. One of `unknown`, `user`, `assistant`, `system`, `critic`,
+    /// `discriminator`, `developer`, or `tool`.
+    pub role: MessageRole,
+    /// The content of the message.
+    pub content: Vec<MessageContent>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ConversationItem {
+    Message(Message),
+    FileSearchCall(FileSearchToolCall),
+    WebSearchCall(WebSearchToolCall),
+    ImageGenerationCall(ImageGenToolCall),
+    ComputerCall(ComputerToolCall),
+    Reasoning(ReasoningItem),
+    CodeInterpreterCall(CodeInterpreterToolCall),
+    LocalShellCall(LocalShellToolCall),
+    LocalShellCallOutput(LocalShellToolCallOutput),
+    McpListTools(MCPListTools),
+    McpApprovalRequest(MCPApprovalRequest),
+    McpApprovalResponse(MCPApprovalResponse),
+    McpCall(MCPToolCall),
+    CustomToolCall(CustomToolCall),
+    CustomToolCallOutput(CustomToolCallOutput),
+    #[serde(untagged)]
+    ItemReference(AnyItemReference),
+}
+
+/// Additional fields to include in the response.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum IncludeParam {
+    /// Include the sources of the web search tool call.
+    #[serde(rename = "web_search_call.action.sources")]
+    WebSearchCallActionSources,
+    /// Include the outputs of python code execution in code interpreter tool call items.
+    #[serde(rename = "code_interpreter_call.outputs")]
+    CodeInterpreterCallOutputs,
+    /// Include image urls from the computer call output.
+    #[serde(rename = "computer_call_output.output.image_url")]
+    ComputerCallOutputOutputImageUrl,
+    /// Include the search results of the file search tool call.
+    #[serde(rename = "file_search_call.results")]
+    FileSearchCallResults,
+    /// Include image urls from the input message.
+    #[serde(rename = "message.input_image.image_url")]
+    MessageInputImageImageUrl,
+    /// Include logprobs with assistant messages.
+    #[serde(rename = "message.output_text.logprobs")]
+    MessageOutputTextLogprobs,
+    /// Include an encrypted version of reasoning tokens in reasoning item outputs.
+    #[serde(rename = "reasoning.encrypted_content")]
+    ReasoningEncryptedContent,
+}
+
+/// The order to return items in.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ListOrder {
+    /// Return items in ascending order.
+    Asc,
+    /// Return items in descending order.
+    Desc,
+}
--- a/lib/async-openai/src/types/responses/impls.rs
+++ b/lib/async-openai/src/types/responses/impls.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::types::mcp::MCPTool;
+use crate::types::responses::{
+    ApplyPatchToolCallItemParam, ApplyPatchToolCallOutputItemParam, CodeInterpreterContainerAuto,
+    CodeInterpreterTool, CodeInterpreterToolCall, CodeInterpreterToolContainer,
+    ComputerCallOutputItemParam, ComputerToolCall, ComputerUsePreviewTool, ConversationParam,
+    CustomToolCall, CustomToolCallOutput, CustomToolParam, EasyInputContent, EasyInputMessage,
+    FileSearchTool, FileSearchToolCall, FunctionCallOutput, FunctionCallOutputItemParam,
+    FunctionShellCallItemParam, FunctionShellCallOutputItemParam, FunctionTool, FunctionToolCall,
+    ImageGenTool, ImageGenToolCall, InputContent, InputFileContent, InputImageContent, InputItem,
+    InputMessage, InputParam, InputTextContent, Item, ItemReference, ItemReferenceType,
+    LocalShellToolCall, LocalShellToolCallOutput, MCPApprovalRequest, MCPApprovalResponse,
+    MCPListTools, MCPToolCall, MessageItem, MessageType, OutputMessage, OutputMessageContent,
+    OutputTextContent, Prompt, Reasoning, ReasoningEffort, ReasoningItem, ReasoningSummary,
+    RefusalContent, ResponseFormatJsonSchema, ResponsePromptVariables, ResponseStreamOptions,
+    ResponseTextParam, Role, TextResponseFormatConfiguration, Tool, ToolChoiceCustom,
+    ToolChoiceFunction, ToolChoiceMCP, ToolChoiceOptions, ToolChoiceParam, ToolChoiceTypes,
+    WebSearchTool, WebSearchToolCall,
+};
+
+impl<S: Into<String>> From<S> for EasyInputMessage {
+    fn from(value: S) -> Self {
+        EasyInputMessage {
+            r#type: MessageType::Message,
+            role: Role::User,
+            content: EasyInputContent::Text(value.into()),
+        }
+    }
+}
+
+impl From<EasyInputMessage> for InputItem {
+    fn from(msg: EasyInputMessage) -> Self {
+        InputItem::EasyMessage(msg)
+    }
+}
+
+// InputItem ergonomics
+
+impl From<InputMessage> for InputItem {
+    fn from(msg: InputMessage) -> Self {
+        InputItem::Item(Item::Message(MessageItem::Input(msg)))
+    }
+}
+
+impl From<Item> for InputItem {
+    fn from(item: Item) -> Self {
+        InputItem::Item(item)
+    }
+}
+
+impl From<ItemReference> for InputItem {
+    fn from(item: ItemReference) -> Self {
+        InputItem::ItemReference(item)
+    }
+}
+
+// InputParam ergonomics: from InputItem
+
+impl From<InputItem> for InputParam {
+    fn from(item: InputItem) -> Self {
+        InputParam::Items(vec![item])
+    }
+}
+
+impl From<Item> for InputParam {
+    fn from(item: Item) -> Self {
+        InputParam::Items(vec![InputItem::Item(item)])
+    }
+}
+
+impl From<MessageItem> for InputParam {
+    fn from(item: MessageItem) -> Self {
+        InputParam::Items(vec![InputItem::Item(Item::Message(item))])
+    }
+}
+
+impl From<InputMessage> for InputParam {
+    fn from(msg: InputMessage) -> Self {
+        InputParam::Items(vec![InputItem::Item(Item::Message(MessageItem::Input(
+            msg,
+        )))])
+    }
+}
+
+impl<I: Into<InputItem>> From<Vec<I>> for InputParam {
+    fn from(items: Vec<I>) -> Self {
+        InputParam::Items(items.into_iter().map(|item| item.into()).collect())
+    }
+}
+
+impl<I: Into<InputItem>, const N: usize> From<[I; N]> for InputParam {
+    fn from(items: [I; N]) -> Self {
+        InputParam::Items(items.into_iter().map(|item| item.into()).collect())
+    }
+}
+
+// InputParam ergonomics: from string "family"
+
+impl From<&str> for InputParam {
+    fn from(value: &str) -> Self {
+        InputParam::Text(value.into())
+    }
+}
+
+impl From<String> for InputParam {
+    fn from(value: String) -> Self {
+        InputParam::Text(value)
+    }
+}
+
+impl From<&String> for InputParam {
+    fn from(value: &String) -> Self {
+        InputParam::Text(value.clone())
+    }
+}
+
+// InputParam ergonomics: from vector family
+
+macro_rules! impl_inputparam_easy_from_collection {
+    // Vec<T>
+    ($t:ty, $map:expr, $clone:expr) => {
+        impl From<Vec<$t>> for InputParam {
+            fn from(values: Vec<$t>) -> Self {
+                InputParam::Items(
+                    values
+                        .into_iter()
+                        .map(|value| {
+                            InputItem::EasyMessage(EasyInputMessage {
+                                r#type: MessageType::Message,
+                                role: Role::User,
+                                content: EasyInputContent::Text($map(value)),
+                            })
+                        })
+                        .collect(),
+                )
+            }
+        }
+        // &[T; N]
+        impl<const N: usize> From<[$t; N]> for InputParam {
+            fn from(values: [$t; N]) -> Self {
+                InputParam::Items(
+                    values
+                        .into_iter()
+                        .map(|value| {
+                            InputItem::EasyMessage(EasyInputMessage {
+                                r#type: MessageType::Message,
+                                role: Role::User,
+                                content: EasyInputContent::Text($map(value)),
+                            })
+                        })
+                        .collect(),
+                )
+            }
+        }
+        // &Vec<T>
+        impl From<&Vec<$t>> for InputParam {
+            fn from(values: &Vec<$t>) -> Self {
+                InputParam::Items(
+                    values
+                        .iter()
+                        .map(|value| {
+                            InputItem::EasyMessage(EasyInputMessage {
+                                r#type: MessageType::Message,
+                                role: Role::User,
+                                content: EasyInputContent::Text($clone(value)),
+                            })
+                        })
+                        .collect(),
+                )
+            }
+        }
+    };
+}
+
+// Apply for &str
+impl_inputparam_easy_from_collection!(&str, |v: &str| v.to_string(), |v: &str| v.to_string());
+// Apply for String
+impl_inputparam_easy_from_collection!(String, |v: String| v, |v: &String| v.clone());
+// Apply for &String
+impl_inputparam_easy_from_collection!(&String, |v: &String| v.clone(), |v: &String| v.clone());
+
+// ConversationParam ergonomics
+
+impl<S: Into<String>> From<S> for ConversationParam {
+    fn from(id: S) -> Self {
+        ConversationParam::ConversationID(id.into())
+    }
+}
+
+// ToolChoiceParam ergonomics
+
+impl From<ToolChoiceOptions> for ToolChoiceParam {
+    fn from(mode: ToolChoiceOptions) -> Self {
+        ToolChoiceParam::Mode(mode)
+    }
+}
+
+impl From<ToolChoiceTypes> for ToolChoiceParam {
+    fn from(tool_type: ToolChoiceTypes) -> Self {
+        ToolChoiceParam::Hosted(tool_type)
+    }
+}
+
+impl<S: Into<String>> From<S> for ToolChoiceParam {
+    fn from(name: S) -> Self {
+        ToolChoiceParam::Function(ToolChoiceFunction { name: name.into() })
+    }
+}
+
+impl From<ToolChoiceFunction> for ToolChoiceParam {
+    fn from(function: ToolChoiceFunction) -> Self {
+        ToolChoiceParam::Function(function)
+    }
+}
+
+impl From<ToolChoiceMCP> for ToolChoiceParam {
+    fn from(mcp: ToolChoiceMCP) -> Self {
+        ToolChoiceParam::Mcp(mcp)
+    }
+}
+
+impl From<ToolChoiceCustom> for ToolChoiceParam {
+    fn from(custom: ToolChoiceCustom) -> Self {
+        ToolChoiceParam::Custom(custom)
+    }
+}
+
+// ResponseTextParam ergonomics
+
+impl From<TextResponseFormatConfiguration> for ResponseTextParam {
+    fn from(format: TextResponseFormatConfiguration) -> Self {
+        ResponseTextParam {
+            format,
+            verbosity: None,
+        }
+    }
+}
+
+impl From<ResponseFormatJsonSchema> for ResponseTextParam {
+    fn from(schema: ResponseFormatJsonSchema) -> Self {
+        ResponseTextParam {
+            format: TextResponseFormatConfiguration::JsonSchema(schema),
+            verbosity: None,
+        }
+    }
+}
+
+// ResponseStreamOptions ergonomics
+
+impl From<bool> for ResponseStreamOptions {
+    fn from(include_obfuscation: bool) -> Self {
+        ResponseStreamOptions {
+            include_obfuscation: Some(include_obfuscation),
+        }
+    }
+}
+
+// Reasoning ergonomics
+
+impl From<ReasoningEffort> for Reasoning {
+    fn from(effort: ReasoningEffort) -> Self {
+        Reasoning {
+            effort: Some(effort),
+            summary: None,
+        }
+    }
+}
+
+impl From<ReasoningSummary> for Reasoning {
+    fn from(summary: ReasoningSummary) -> Self {
+        Reasoning {
+            effort: None,
+            summary: Some(summary),
+        }
+    }
+}
+
+// Prompt ergonomics
+
+impl<S: Into<String>> From<S> for Prompt {
+    fn from(id: S) -> Self {
+        Prompt {
+            id: id.into(),
+            version: None,
+            variables: None,
+        }
+    }
+}
+
+// InputTextContent ergonomics
+
+impl<S: Into<String>> From<S> for InputTextContent {
+    fn from(text: S) -> Self {
+        InputTextContent { text: text.into() }
+    }
+}
+
+// InputContent ergonomics
+
+impl From<InputTextContent> for InputContent {
+    fn from(content: InputTextContent) -> Self {
+        InputContent::InputText(content)
+    }
+}
+
+impl From<InputImageContent> for InputContent {
+    fn from(content: InputImageContent) -> Self {
+        InputContent::InputImage(content)
+    }
+}
+
+impl From<InputFileContent> for InputContent {
+    fn from(content: InputFileContent) -> Self {
+        InputContent::InputFile(content)
+    }
+}
+
+impl<S: Into<String>> From<S> for InputContent {
+    fn from(text: S) -> Self {
+        InputContent::InputText(InputTextContent { text: text.into() })
+    }
+}
+
+// ResponsePromptVariables ergonomics
+
+impl From<InputContent> for ResponsePromptVariables {
+    fn from(content: InputContent) -> Self {
+        ResponsePromptVariables::Content(content)
+    }
+}
+
+impl<S: Into<String>> From<S> for ResponsePromptVariables {
+    fn from(text: S) -> Self {
+        ResponsePromptVariables::String(text.into())
+    }
+}
+
+// MessageItem ergonomics
+
+impl From<InputMessage> for MessageItem {
+    fn from(msg: InputMessage) -> Self {
+        MessageItem::Input(msg)
+    }
+}
+
+impl From<OutputMessage> for MessageItem {
+    fn from(msg: OutputMessage) -> Self {
+        MessageItem::Output(msg)
+    }
+}
+
+// FunctionCallOutput ergonomics
+
+impl From<&str> for FunctionCallOutput {
+    fn from(text: &str) -> Self {
+        FunctionCallOutput::Text(text.to_string())
+    }
+}
+
+impl From<String> for FunctionCallOutput {
+    fn from(text: String) -> Self {
+        FunctionCallOutput::Text(text)
+    }
+}
+
+impl From<Vec<InputContent>> for FunctionCallOutput {
+    fn from(content: Vec<InputContent>) -> Self {
+        FunctionCallOutput::Content(content)
+    }
+}
+
+// RefusalContent ergonomics
+
+impl<S: Into<String>> From<S> for RefusalContent {
+    fn from(refusal: S) -> Self {
+        RefusalContent {
+            refusal: refusal.into(),
+        }
+    }
+}
+
+// OutputMessageContent ergonomics
+
+impl From<OutputTextContent> for OutputMessageContent {
+    fn from(content: OutputTextContent) -> Self {
+        OutputMessageContent::OutputText(content)
+    }
+}
+
+impl From<RefusalContent> for OutputMessageContent {
+    fn from(content: RefusalContent) -> Self {
+        OutputMessageContent::Refusal(content)
+    }
+}
+
+// Item ergonomics
+
+impl From<MessageItem> for Item {
+    fn from(item: MessageItem) -> Self {
+        Item::Message(item)
+    }
+}
+
+impl From<FileSearchToolCall> for Item {
+    fn from(call: FileSearchToolCall) -> Self {
+        Item::FileSearchCall(call)
+    }
+}
+
+impl From<ComputerToolCall> for Item {
+    fn from(call: ComputerToolCall) -> Self {
+        Item::ComputerCall(call)
+    }
+}
+
+impl From<ComputerCallOutputItemParam> for Item {
+    fn from(output: ComputerCallOutputItemParam) -> Self {
+        Item::ComputerCallOutput(output)
+    }
+}
+
+impl From<WebSearchToolCall> for Item {
+    fn from(call: WebSearchToolCall) -> Self {
+        Item::WebSearchCall(call)
+    }
+}
+
+impl From<FunctionToolCall> for Item {
+    fn from(call: FunctionToolCall) -> Self {
+        Item::FunctionCall(call)
+    }
+}
+
+impl From<FunctionCallOutputItemParam> for Item {
+    fn from(output: FunctionCallOutputItemParam) -> Self {
+        Item::FunctionCallOutput(output)
+    }
+}
+
+impl From<ReasoningItem> for Item {
+    fn from(item: ReasoningItem) -> Self {
+        Item::Reasoning(item)
+    }
+}
+
+impl From<ImageGenToolCall> for Item {
+    fn from(call: ImageGenToolCall) -> Self {
+        Item::ImageGenerationCall(call)
+    }
+}
+
+impl From<CodeInterpreterToolCall> for Item {
+    fn from(call: CodeInterpreterToolCall) -> Self {
+        Item::CodeInterpreterCall(call)
+    }
+}
+
+impl From<LocalShellToolCall> for Item {
+    fn from(call: LocalShellToolCall) -> Self {
+        Item::LocalShellCall(call)
+    }
+}
+
+impl From<LocalShellToolCallOutput> for Item {
+    fn from(output: LocalShellToolCallOutput) -> Self {
+        Item::LocalShellCallOutput(output)
+    }
+}
+
+impl From<FunctionShellCallItemParam> for Item {
+    fn from(call: FunctionShellCallItemParam) -> Self {
+        Item::ShellCall(call)
+    }
+}
+
+impl From<FunctionShellCallOutputItemParam> for Item {
+    fn from(output: FunctionShellCallOutputItemParam) -> Self {
+        Item::ShellCallOutput(output)
+    }
+}
+
+impl From<ApplyPatchToolCallItemParam> for Item {
+    fn from(call: ApplyPatchToolCallItemParam) -> Self {
+        Item::ApplyPatchCall(call)
+    }
+}
+
+impl From<ApplyPatchToolCallOutputItemParam> for Item {
+    fn from(output: ApplyPatchToolCallOutputItemParam) -> Self {
+        Item::ApplyPatchCallOutput(output)
+    }
+}
+
+impl From<MCPListTools> for Item {
+    fn from(tools: MCPListTools) -> Self {
+        Item::McpListTools(tools)
+    }
+}
+
+impl From<MCPApprovalRequest> for Item {
+    fn from(request: MCPApprovalRequest) -> Self {
+        Item::McpApprovalRequest(request)
+    }
+}
+
+impl From<MCPApprovalResponse> for Item {
+    fn from(response: MCPApprovalResponse) -> Self {
+        Item::McpApprovalResponse(response)
+    }
+}
+
+impl From<MCPToolCall> for Item {
+    fn from(call: MCPToolCall) -> Self {
+        Item::McpCall(call)
+    }
+}
+
+impl From<CustomToolCallOutput> for Item {
+    fn from(output: CustomToolCallOutput) -> Self {
+        Item::CustomToolCallOutput(output)
+    }
+}
+
+impl From<CustomToolCall> for Item {
+    fn from(call: CustomToolCall) -> Self {
+        Item::CustomToolCall(call)
+    }
+}
+
+// Tool ergonomics
+
+impl From<FunctionTool> for Tool {
+    fn from(tool: FunctionTool) -> Self {
+        Tool::Function(tool)
+    }
+}
+
+impl From<FileSearchTool> for Tool {
+    fn from(tool: FileSearchTool) -> Self {
+        Tool::FileSearch(tool)
+    }
+}
+
+impl From<ComputerUsePreviewTool> for Tool {
+    fn from(tool: ComputerUsePreviewTool) -> Self {
+        Tool::ComputerUsePreview(tool)
+    }
+}
+
+impl From<WebSearchTool> for Tool {
+    fn from(tool: WebSearchTool) -> Self {
+        Tool::WebSearch(tool)
+    }
+}
+
+impl From<MCPTool> for Tool {
+    fn from(tool: MCPTool) -> Self {
+        Tool::Mcp(tool)
+    }
+}
+
+impl From<CodeInterpreterTool> for Tool {
+    fn from(tool: CodeInterpreterTool) -> Self {
+        Tool::CodeInterpreter(tool)
+    }
+}
+
+impl From<ImageGenTool> for Tool {
+    fn from(tool: ImageGenTool) -> Self {
+        Tool::ImageGeneration(tool)
+    }
+}
+
+impl From<CustomToolParam> for Tool {
+    fn from(tool: CustomToolParam) -> Self {
+        Tool::Custom(tool)
+    }
+}
+
+// Vec<Tool> ergonomics
+
+impl From<Tool> for Vec<Tool> {
+    fn from(tool: Tool) -> Self {
+        vec![tool]
+    }
+}
+
+impl From<FunctionTool> for Vec<Tool> {
+    fn from(tool: FunctionTool) -> Self {
+        vec![Tool::Function(tool)]
+    }
+}
+
+impl From<FileSearchTool> for Vec<Tool> {
+    fn from(tool: FileSearchTool) -> Self {
+        vec![Tool::FileSearch(tool)]
+    }
+}
+
+impl From<ComputerUsePreviewTool> for Vec<Tool> {
+    fn from(tool: ComputerUsePreviewTool) -> Self {
+        vec![Tool::ComputerUsePreview(tool)]
+    }
+}
+
+impl From<WebSearchTool> for Vec<Tool> {
+    fn from(tool: WebSearchTool) -> Self {
+        vec![Tool::WebSearch(tool)]
+    }
+}
+
+impl From<MCPTool> for Vec<Tool> {
+    fn from(tool: MCPTool) -> Self {
+        vec![Tool::Mcp(tool)]
+    }
+}
+
+impl From<CodeInterpreterTool> for Vec<Tool> {
+    fn from(tool: CodeInterpreterTool) -> Self {
+        vec![Tool::CodeInterpreter(tool)]
+    }
+}
+
+impl From<ImageGenTool> for Vec<Tool> {
+    fn from(tool: ImageGenTool) -> Self {
+        vec![Tool::ImageGeneration(tool)]
+    }
+}
+
+impl From<CustomToolParam> for Vec<Tool> {
+    fn from(tool: CustomToolParam) -> Self {
+        vec![Tool::Custom(tool)]
+    }
+}
+
+// EasyInputContent ergonomics
+
+impl Default for EasyInputContent {
+    fn default() -> Self {
+        Self::Text("".to_string())
+    }
+}
+
+impl From<String> for EasyInputContent {
+    fn from(value: String) -> Self {
+        Self::Text(value)
+    }
+}
+
+impl From<&str> for EasyInputContent {
+    fn from(value: &str) -> Self {
+        Self::Text(value.to_owned())
+    }
+}
+
+// Defaults
+
+impl Default for CodeInterpreterToolContainer {
+    fn default() -> Self {
+        Self::Auto(CodeInterpreterContainerAuto::default())
+    }
+}
+
+impl Default for InputParam {
+    fn default() -> Self {
+        Self::Text(String::new())
+    }
+}
+
+impl ItemReference {
+    /// Create a new item reference with the given ID.
+    pub fn new(id: impl Into<String>) -> Self {
+        Self {
+            r#type: Some(ItemReferenceType::ItemReference),
+            id: id.into(),
+        }
+    }
+}
--- a/lib/async-openai/src/types/responses/mod.rs
+++ b/lib/async-openai/src/types/responses/mod.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+mod api;
+mod conversation;
+mod impls;
+mod response;
+mod sdk;
+mod stream;
+
+pub use api::*;
+pub use conversation::*;
+pub use response::*;
+pub use stream::*;
+
+// Re-export shared types used by responses
+pub use crate::types::shared::ComparisonFilter;
+pub use crate::types::shared::ComparisonType;
+pub use crate::types::shared::CompoundFilter;
+pub use crate::types::shared::CompoundType;
+pub use crate::types::shared::CustomGrammarFormatParam;
+pub use crate::types::shared::Filter;
+pub use crate::types::shared::GrammarSyntax;
+pub use crate::types::shared::InputTokenDetails;
+pub use crate::types::shared::OutputTokenDetails;
+pub use crate::types::shared::ResponseUsage;
+
+// Re-export types from parent module that response.rs imports via `crate::types::responses::`
+pub use crate::types::ImageDetail;
+pub use crate::types::ReasoningEffort;
+pub use crate::types::ResponseFormatJsonSchema;
+
+/// Stream of response events
+pub type ResponseStream = std::pin::Pin<
+    Box<dyn futures::Stream<Item = Result<ResponseStreamEvent, crate::error::OpenAIError>> + Send>,
+>;
+
+// Backward-compatible type aliases for Dynamo consumer code migration.
+// These map old Dynamo type names to the upstream names.
+// TODO: Remove these once all consumer code is fully migrated.
+pub type Input = InputParam;
+pub type PromptConfig = Prompt;
+pub type TextConfig = ResponseTextParam;
+pub type TextResponseFormat = TextResponseFormatConfiguration;
--- a/lib/async-openai/src/types/responses/response.rs
+++ b/lib/async-openai/src/types/responses/response.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::error::OpenAIError;
+use crate::types::mcp::{MCPListToolsTool, MCPTool};
+use crate::types::responses::{
+    CustomGrammarFormatParam, Filter, ImageDetail, ReasoningEffort, ResponseFormatJsonSchema,
+    ResponseUsage,
+};
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use utoipa::ToSchema;
+
+/// Role of messages in the API.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    #[default]
+    User,
+    Assistant,
+    System,
+    Developer,
+}
+
+/// Status of input/output items.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum OutputStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum InputParam {
+    ///  A text input to the model, equivalent to a text input with the
+    /// `user` role.
+    Text(String),
+    /// A list of one or many input items to the model, containing
+    /// different content types.
+    Items(Vec<InputItem>),
+}
+
+/// Content item used to generate a response.
+///
+/// This is a properly discriminated union based on the `type` field, using Rust's
+/// type-safe enum with serde's tag attribute for efficient deserialization.
+///
+/// # OpenAPI Specification
+/// Corresponds to the `Item` schema in the OpenAPI spec with a `type` discriminator.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[schema(no_recursion)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Item {
+    /// A message (type: "message").
+    /// Can represent InputMessage (user/system/developer) or OutputMessage (assistant).
+    ///
+    /// InputMessage:
+    ///     A message input to the model with a role indicating instruction following hierarchy.
+    ///     Instructions given with the developer or system role take precedence over instructions given with the user role.
+    /// OutputMessage:
+    ///     A message output from the model.
+    Message(MessageItem),
+
+    /// The results of a file search tool call. See the
+    /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search) for more information.
+    FileSearchCall(FileSearchToolCall),
+
+    /// A tool call to a computer use tool. See the
+    /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use) for more information.
+    ComputerCall(ComputerToolCall),
+
+    /// The output of a computer tool call.
+    ComputerCallOutput(ComputerCallOutputItemParam),
+
+    /// The results of a web search tool call. See the
+    /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search) for more information.
+    WebSearchCall(WebSearchToolCall),
+
+    /// A tool call to run a function. See the
+    ///
+    /// [function calling guide](https://platform.openai.com/docs/guides/function-calling) for more information.
+    FunctionCall(FunctionToolCall),
+
+    /// The output of a function tool call.
+    FunctionCallOutput(FunctionCallOutputItemParam),
+
+    /// A description of the chain of thought used by a reasoning model while generating
+    /// a response. Be sure to include these items in your `input` to the Responses API
+    /// for subsequent turns of a conversation if you are manually
+    /// [managing context](https://platform.openai.com/docs/guides/conversation-state).
+    Reasoning(ReasoningItem),
+
+    /// A compaction item generated by the [`v1/responses/compact` API](https://platform.openai.com/docs/api-reference/responses/compact).
+    Compaction(CompactionSummaryItemParam),
+
+    /// An image generation request made by the model.
+    ImageGenerationCall(ImageGenToolCall),
+
+    /// A tool call to run code.
+    CodeInterpreterCall(CodeInterpreterToolCall),
+
+    /// A tool call to run a command on the local shell.
+    LocalShellCall(LocalShellToolCall),
+
+    /// The output of a local shell tool call.
+    LocalShellCallOutput(LocalShellToolCallOutput),
+
+    /// A tool representing a request to execute one or more shell commands.
+    ShellCall(FunctionShellCallItemParam),
+
+    /// The streamed output items emitted by a shell tool call.
+    ShellCallOutput(FunctionShellCallOutputItemParam),
+
+    /// A tool call representing a request to create, delete, or update files using diff patches.
+    ApplyPatchCall(ApplyPatchToolCallItemParam),
+
+    /// The streamed output emitted by an apply patch tool call.
+    ApplyPatchCallOutput(ApplyPatchToolCallOutputItemParam),
+
+    /// A list of tools available on an MCP server.
+    McpListTools(MCPListTools),
+
+    /// A request for human approval of a tool invocation.
+    McpApprovalRequest(MCPApprovalRequest),
+
+    /// A response to an MCP approval request.
+    McpApprovalResponse(MCPApprovalResponse),
+
+    /// An invocation of a tool on an MCP server.
+    McpCall(MCPToolCall),
+
+    /// The output of a custom tool call from your code, being sent back to the model.
+    CustomToolCallOutput(CustomToolCallOutput),
+
+    /// A call to a custom tool created by the model.
+    CustomToolCall(CustomToolCall),
+}
+
+/// Input item that can be used in the context for generating a response.
+///
+/// This represents the OpenAPI `InputItem` schema which is an `anyOf`:
+/// 1. `EasyInputMessage` - Simple, user-friendly message input (can use string content)
+/// 2. `Item` - Structured items with proper type discrimination (including InputMessage, OutputMessage, tool calls)
+/// 3. `ItemReferenceParam` - Reference to an existing item by ID (type can be null)
+///
+/// Uses untagged deserialization because these types overlap in structure.
+/// Order matters: more specific structures are tried first.
+///
+/// # OpenAPI Specification
+/// Corresponds to the `InputItem` schema: `anyOf[EasyInputMessage, Item, ItemReferenceParam]`
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum InputItem {
+    /// A reference to an existing item by ID.
+    /// Has a required `id` field and optional `type` (can be "item_reference" or null).
+    /// Must be tried first as it's the most minimal structure.
+    ItemReference(ItemReference),
+
+    /// All structured items with proper type discrimination.
+    /// Includes InputMessage, OutputMessage, and all tool calls/outputs.
+    /// Uses the discriminated `Item` enum for efficient, type-safe deserialization.
+    #[schema(no_recursion)]
+    Item(Item),
+
+    /// A simple, user-friendly message input (EasyInputMessage).
+    /// Supports string content and can include assistant role for previous responses.
+    /// Must be tried last as it's the most flexible structure.
+    ///
+    /// A message input to the model with a role indicating instruction following
+    /// hierarchy. Instructions given with the `developer` or `system` role take
+    /// precedence over instructions given with the `user` role. Messages with the
+    /// `assistant` role are presumed to have been generated by the model in previous
+    /// interactions.
+    EasyMessage(EasyInputMessage),
+}
+
+/// A message item used within the `Item` enum.
+///
+/// Both InputMessage and OutputMessage have `type: "message"`, so we use an untagged
+/// enum to distinguish them based on their structure:
+/// - OutputMessage: role=assistant, required id & status fields
+/// - InputMessage: role=user/system/developer, content is `Vec<ContentType>`, optional id/status
+///
+/// Note: EasyInputMessage is NOT included here - it's a separate variant in `InputItem`,
+/// not part of the structured `Item` enum.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum MessageItem {
+    /// An output message from the model (role: assistant, has required id & status).
+    /// This must come first as it has the most specific structure (required id and status fields).
+    Output(OutputMessage),
+
+    /// A structured input message (role: user/system/developer, content is `Vec<ContentType>`).
+    /// Has structured content list and optional id/status fields.
+    ///
+    /// A message input to the model with a role indicating instruction following hierarchy.
+    /// Instructions given with the `developer` or `system` role take precedence over instructions
+    /// given with the `user` role.
+    Input(InputMessage),
+}
+
+/// A reference to an existing item by ID.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ItemReference {
+    /// The type of item to reference. Can be "item_reference" or null.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub r#type: Option<ItemReferenceType>,
+    /// The ID of the item to reference.
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ItemReferenceType {
+    ItemReference,
+}
+
+/// Output from a function call that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionCallOutputItemParam {
+    /// The unique ID of the function tool call generated by the model.
+    pub call_id: String,
+    /// Text, image, or file output of the function tool call.
+    pub output: FunctionCallOutput,
+    /// The unique ID of the function tool call output.
+    /// Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum FunctionCallOutput {
+    /// A JSON string of the output of the function tool call.
+    Text(String),
+    Content(Vec<InputContent>), // TODO use shape which allows null from OpenAPI spec?
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ComputerCallOutputItemParam {
+    /// The ID of the computer tool call that produced the output.
+    pub call_id: String,
+    /// A computer screenshot image used with the computer use tool.
+    pub output: ComputerScreenshotImage,
+    /// The safety checks reported by the API that have been acknowledged by the developer.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub acknowledged_safety_checks: Option<Vec<ComputerCallSafetyCheckParam>>,
+    /// The unique ID of the computer tool call output. Optional when creating.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the message input. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when input items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>, // TODO rename OutputStatus?
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ComputerScreenshotImageType {
+    ComputerScreenshot,
+}
+
+/// A computer screenshot image used with the computer use tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ComputerScreenshotImage {
+    /// Specifies the event type. For a computer screenshot, this property is always
+    /// set to `computer_screenshot`.
+    pub r#type: ComputerScreenshotImageType,
+    /// The identifier of an uploaded file that contains the screenshot.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+    /// The URL of the screenshot image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+}
+
+/// Output from a local shell tool call that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct LocalShellToolCallOutput {
+    /// The unique ID of the local shell tool call generated by the model.
+    pub id: String,
+
+    /// A JSON string of the output of the local shell tool call.
+    pub output: String,
+
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+/// Output from a local shell command execution.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct LocalShellOutput {
+    /// The stdout output from the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stdout: Option<String>,
+
+    /// The stderr output from the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stderr: Option<String>,
+
+    /// The exit code of the command.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub exit_code: Option<i32>,
+}
+
+/// An MCP approval response that you're providing back to the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPApprovalResponse {
+    /// The ID of the approval request being answered.
+    pub approval_request_id: String,
+
+    /// Whether the request was approved.
+    pub approve: bool,
+
+    /// The unique ID of the approval response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+
+    /// Optional reason for the decision.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reason: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum CustomToolCallOutputOutput {
+    /// A string of the output of the custom tool call.
+    Text(String),
+    /// Text, image, or file output of the custom tool call.
+    List(Vec<InputContent>),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CustomToolCallOutput {
+    /// The call ID, used to map this custom tool call output to a custom tool call.
+    pub call_id: String,
+
+    /// The output from the custom tool call generated by your code.
+    /// Can be a string or an list of output content.
+    pub output: CustomToolCallOutputOutput,
+
+    /// The unique ID of the custom tool call output in the OpenAI platform.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+}
+
+/// A simplified message input to the model (EasyInputMessage in the OpenAPI spec).
+///
+/// This is the most user-friendly way to provide messages, supporting both simple
+/// string content and structured content. Role can include `assistant` for providing
+/// previous assistant responses.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "EasyInputMessageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct EasyInputMessage {
+    /// The type of the message input. Always set to `message`.
+    pub r#type: MessageType,
+    /// The role of the message input. One of `user`, `assistant`, `system`, or `developer`.
+    pub role: Role,
+    /// Text, image, or audio input to the model, used to generate a response.
+    /// Can also contain previous assistant responses.
+    pub content: EasyInputContent,
+}
+
+/// A structured message input to the model (InputMessage in the OpenAPI spec).
+///
+/// This variant requires structured content (not a simple string) and does not support
+/// the `assistant` role (use OutputMessage for that). status is populated when items are returned via API.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "InputMessageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputMessage {
+    /// A list of one or many input items to the model, containing different content types.
+    pub content: Vec<InputContent>,
+    /// The role of the message input. One of `user`, `system`, or `developer`.
+    /// Note: `assistant` is NOT allowed here; use OutputMessage instead.
+    pub role: InputRole,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+    /////The type of the message input. Always set to `message`.
+    //pub r#type: MessageType,
+}
+
+/// The role for an input message - can only be `user`, `system`, or `developer`.
+/// This type ensures type safety by excluding the `assistant` role (use OutputMessage for that).
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum InputRole {
+    #[default]
+    User,
+    System,
+    Developer,
+}
+
+/// Content for EasyInputMessage - can be a simple string or structured list.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum EasyInputContent {
+    /// A text input to the model.
+    Text(String),
+    /// A list of one or many input items to the model, containing different content types.
+    ContentList(Vec<InputContent>),
+}
+
+/// Parts of a message: text, image, file, or audio.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum InputContent {
+    /// A text input to the model.
+    InputText(InputTextContent),
+    /// An image input to the model. Learn about
+    /// [image inputs](https://platform.openai.com/docs/guides/vision).
+    InputImage(InputImageContent),
+    /// A file input to the model.
+    InputFile(InputFileContent),
+    /// A video input to the model.
+    InputVideo(InputVideoContent),
+    /// An audio input to the model.
+    InputAudio(InputAudioContent),
+}
+
+/// Video content for input messages.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct InputVideoContent {
+    /// The video input - can be a URL or base64 encoded data.
+    pub video: String,
+}
+
+/// Audio content for input messages.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct InputAudioContent {
+    /// The audio input - can be a URL or base64 encoded data.
+    pub audio: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct InputTextContent {
+    /// The text input to the model.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "InputImageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputImageContent {
+    /// The detail level of the image to be sent to the model. One of `high`, `low`, or `auto`.
+    /// Defaults to `auto`.
+    #[serde(default)]
+    pub detail: ImageDetail,
+    /// The ID of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+    /// The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image
+    /// in a data URL.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "InputFileArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputFileContent {
+    /// The content of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_data: Option<String>,
+    /// The ID of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_id: Option<String>,
+    /// The URL of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_url: Option<String>,
+    /// The name of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    filename: Option<String>,
+}
+
+/// The conversation that this response belonged to. Input items and output items from this
+/// response were automatically added to this conversation.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct Conversation {
+    /// The unique ID of the conversation that this response was associated with.
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum ConversationParam {
+    /// The unique ID of the conversation.
+    ConversationID(String),
+    /// The conversation that this response belongs to.
+    Object(Conversation),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, ToSchema)]
+pub enum IncludeEnum {
+    #[serde(rename = "file_search_call.results")]
+    FileSearchCallResults,
+    #[serde(rename = "web_search_call.results")]
+    WebSearchCallResults,
+    #[serde(rename = "web_search_call.action.sources")]
+    WebSearchCallActionSources,
+    #[serde(rename = "message.input_image.image_url")]
+    MessageInputImageImageUrl,
+    #[serde(rename = "computer_call_output.output.image_url")]
+    ComputerCallOutputOutputImageUrl,
+    #[serde(rename = "code_interpreter_call.outputs")]
+    CodeInterpreterCallOutputs,
+    #[serde(rename = "reasoning.encrypted_content")]
+    ReasoningEncryptedContent,
+    #[serde(rename = "message.output_text.logprobs")]
+    MessageOutputTextLogprobs,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseStreamOptions {
+    /// When true, stream obfuscation will be enabled. Stream obfuscation adds
+    /// random characters to an `obfuscation` field on streaming delta events to
+    /// normalize payload sizes as a mitigation to certain side-channel attacks.
+    /// These obfuscation fields are included by default, but add a small amount
+    /// of overhead to the data stream. You can set `include_obfuscation` to
+    /// false to optimize for bandwidth if you trust the network links between
+    /// your application and the OpenAI API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include_obfuscation: Option<bool>,
+}
+
+/// Builder for a Responses API request.
+#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq, ToSchema)]
+#[builder(
+    name = "CreateResponseArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateResponse {
+    /// Whether to run the model response in the background.
+    /// [Learn more](https://platform.openai.com/docs/guides/background).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<bool>,
+
+    /// The conversation that this response belongs to. Items from this conversation are prepended to
+    ///  `input_items` for this response request.
+    ///
+    /// Input items and output items from this response are automatically added to this conversation after
+    /// this response completes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<ConversationParam>,
+
+    /// Specify additional output data to include in the model response. Currently supported
+    /// values are:
+    ///
+    /// - `web_search_call.action.sources`: Include the sources of the web search tool call.
+    ///
+    /// - `code_interpreter_call.outputs`: Includes the outputs of python code execution in code
+    ///   interpreter tool call items.
+    ///
+    /// - `computer_call_output.output.image_url`: Include image urls from the computer call
+    ///   output.
+    ///
+    /// - `file_search_call.results`: Include the search results of the file search tool call.
+    ///
+    /// - `message.input_image.image_url`: Include image urls from the input message.
+    ///
+    /// - `message.output_text.logprobs`: Include logprobs with assistant messages.
+    ///
+    /// - `reasoning.encrypted_content`: Includes an encrypted version of reasoning tokens in
+    ///   reasoning item outputs. This enables reasoning items to be used in multi-turn
+    ///   conversations when using the Responses API statelessly (like when the `store` parameter is
+    ///   set to `false`, or when an organization is enrolled in the zero data retention program).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<IncludeEnum>>,
+
+    /// Text, image, or file inputs to the model, used to generate a response.
+    ///
+    /// Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Image inputs](https://platform.openai.com/docs/guides/images)
+    /// - [File inputs](https://platform.openai.com/docs/guides/pdf-files)
+    /// - [Conversation state](https://platform.openai.com/docs/guides/conversation-state)
+    /// - [Function calling](https://platform.openai.com/docs/guides/function-calling)
+    pub input: InputParam,
+
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When using along with `previous_response_id`, the instructions from a previous
+    /// response will not be carried over to the next response. This makes it simple
+    /// to swap out system (or developer) messages in new responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// An upper bound for the number of tokens that can be generated for a response, including
+    /// visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+
+    /// The maximum number of total calls to built-in tools that can be processed in a response. This
+    /// maximum number applies across all built-in tool calls, not per individual tool. Any further
+    /// attempts to call a tool by the model will be ignored.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tool_calls: Option<u32>,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be
+    /// useful for storing additional information about the object in a structured
+    /// format, and querying for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are
+    /// strings with a maximum length of 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    #[schema(value_type = Object)]
+    pub metadata: Option<serde_json::Value>,
+
+    /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI
+    /// offers a wide range of models with different capabilities, performance
+    /// characteristics, and price points. Refer to the [model guide](https://platform.openai.com/docs/models)
+    /// to browse and compare available models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Whether to allow the model to run tool calls in parallel.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// The unique ID of the previous response to the model. Use this to create multi-turn conversations.
+    /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt: Option<Prompt>,
+
+    /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces
+    /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_key: Option<String>,
+
+    /// The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching,
+    /// which keeps cached prefixes active for longer, up to a maximum of 24 hours. [Learn
+    /// more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_retention: Option<PromptCacheRetention>,
+
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
+
+    /// A stable identifier used to help detect users of your application that may be violating OpenAI's
+    /// usage policies.
+    ///
+    /// The IDs should be a string that uniquely identifies each user. We recommend hashing their username
+    /// or email address, in order to avoid sending us any identifying information. [Learn
+    /// more](https://platform.openai.com/docs/guides/safety-best-practices#safety-identifiers).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+
+    /// Specifies the processing type used for serving the request.
+    /// - If set to 'auto', then the request will be processed with the service tier configured in the Project settings. Unless otherwise configured, the Project will use 'default'.
+    /// - If set to 'default', then the request will be processed with the standard pricing and performance for the selected model.
+    /// - If set to '[flex](https://platform.openai.com/docs/guides/flex-processing)' or '[priority](https://openai.com/api-priority-processing/)', then the request will be processed with the corresponding service tier.
+    /// - When not set, the default behavior is 'auto'.
+    ///
+    /// When the `service_tier` parameter is set, the response body will include the `service_tier` value based on the processing mode actually used to serve the request. This response value may be different from the value set in the parameter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<ServiceTier>,
+
+    /// Whether to store the generated model response for later retrieval via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+
+    /// If set to true, the model response data will be streamed to the client
+    /// as it is generated using [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
+    /// See the [Streaming section below](https://platform.openai.com/docs/api-reference/responses-streaming)
+    /// for more information.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+
+    /// Options for streaming responses. Only set this when you set `stream: true`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream_options: Option<ResponseStreamOptions>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8
+    /// will make the output more random, while lower values like 0.2 will make it
+    /// more focused and deterministic. We generally recommend altering this or
+    /// `top_p` but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data. Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<ResponseTextParam>,
+
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response. See the `tools` parameter to see how to specify which tools
+    /// the model can call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoiceParam>,
+
+    /// An array of tools the model may call while generating a response. You
+    /// can specify which tool to use by setting the `tool_choice` parameter.
+    ///
+    /// We support the following categories of tools:
+    /// - **Built-in tools**: Tools that are provided by OpenAI that extend the
+    ///   model's capabilities, like [web search](https://platform.openai.com/docs/guides/tools-web-search)
+    ///   or [file search](https://platform.openai.com/docs/guides/tools-file-search). Learn more about
+    ///   [built-in tools](https://platform.openai.com/docs/guides/tools).
+    /// - **MCP Tools**: Integrations with third-party systems via custom MCP servers
+    ///   or predefined connectors such as Google Drive and SharePoint. Learn more about
+    ///   [MCP Tools](https://platform.openai.com/docs/guides/tools-connectors-mcp).
+    /// - **Function calls (custom tools)**: Functions that are defined by you,
+    ///   enabling the model to call your own code with strongly typed arguments
+    ///   and outputs. Learn more about
+    ///   [function calling](https://platform.openai.com/docs/guides/function-calling). You can also use
+    ///   custom tools to call your own code.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<Tool>>,
+
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each
+    /// token position, each with an associated log probability.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_logprobs: Option<u8>,
+
+    /// An alternative to sampling with temperature, called nucleus sampling,
+    /// where the model considers the results of the tokens with top_p probability
+    /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
+    /// are considered.
+    ///
+    /// We generally recommend altering this or `temperature` but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    ///The truncation strategy to use for the model response.
+    /// - `auto`: If the input to this Response exceeds
+    ///   the model's context window size, the model will truncate the
+    ///   response to fit the context window by dropping items from the beginning of the conversation.
+    /// - `disabled` (default): If the input size will exceed the context window
+    ///   size for a model, the request will fail with a 400 error.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum ResponsePromptVariables {
+    String(String),
+    Content(InputContent),
+    Custom(serde_json::Value),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct Prompt {
+    /// The unique identifier of the prompt template to use.
+    pub id: String,
+
+    /// Optional version of the prompt template.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub version: Option<String>,
+
+    /// Optional map of values to substitute in for variables in your
+    /// prompt. The substitution values can either be strings, or other
+    /// Response input types like images or files.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub variables: Option<ResponsePromptVariables>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ServiceTier {
+    #[default]
+    Auto,
+    Default,
+    Flex,
+    Scale,
+    Priority,
+}
+
+/// Truncation strategies.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum Truncation {
+    Auto,
+    Disabled,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct Billing {
+    pub payer: String,
+}
+
+/// o-series reasoning settings.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "ReasoningArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct Reasoning {
+    /// Constrains effort on reasoning for
+    /// [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    /// Currently supported values are `minimal`, `low`, `medium`, and `high`. Reducing
+    /// reasoning effort can result in faster responses and fewer tokens used
+    /// on reasoning in a response.
+    ///
+    /// Note: The `gpt-5-pro` model defaults to (and only supports) `high` reasoning effort.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<ReasoningEffort>,
+    /// A summary of the reasoning performed by the model. This can be
+    /// useful for debugging and understanding the model's reasoning process.
+    /// One of `auto`, `concise`, or `detailed`.
+    ///
+    /// `concise` is supported for `computer-use-preview` models and all reasoning models after
+    /// `gpt-5`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<ReasoningSummary>,
+}
+
+/// o-series reasoning settings.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum Verbosity {
+    Low,
+    Medium,
+    High,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ReasoningSummary {
+    Auto,
+    Concise,
+    Detailed,
+}
+
+/// The retention policy for the prompt cache.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+pub enum PromptCacheRetention {
+    #[serde(rename = "in-memory")]
+    InMemory,
+    #[serde(rename = "24h")]
+    Hours24,
+}
+
+/// Configuration for text response format.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseTextParam {
+    /// An object specifying the format that the model must output.
+    ///
+    /// Configuring `{ "type": "json_schema" }` enables Structured Outputs,
+    /// which ensures the model will match your supplied JSON schema. Learn more in the
+    /// [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+    ///
+    /// The default format is `{ "type": "text" }` with no additional options.
+    ///
+    /// **Not recommended for gpt-4o and newer models:**
+    ///
+    /// Setting to `{ "type": "json_object" }` enables the older JSON mode, which
+    /// ensures the message the model generates is valid JSON. Using `json_schema`
+    /// is preferred for models that support it.
+    pub format: TextResponseFormatConfiguration,
+
+    /// Constrains the verbosity of the model's response. Lower values will result in
+    /// more concise responses, while higher values will result in more verbose responses.
+    ///
+    /// Currently supported values are `low`, `medium`, and `high`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub verbosity: Option<Verbosity>,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum TextResponseFormatConfiguration {
+    /// Default response format. Used to generate text responses.
+    Text,
+    /// JSON object response format. An older method of generating JSON responses.
+    /// Using `json_schema` is recommended for models that support it.
+    /// Note that the model will not generate JSON without a system or user message
+    /// instructing it to do so.
+    JsonObject,
+    /// JSON Schema response format. Used to generate structured JSON responses.
+    /// Learn more about [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs).
+    JsonSchema(ResponseFormatJsonSchema),
+}
+
+/// Definitions for model-callable tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Tool {
+    /// Defines a function in your own code the model can choose to call. Learn more about [function
+    /// calling](https://platform.openai.com/docs/guides/tools).
+    Function(FunctionTool),
+    /// A tool that searches for relevant content from uploaded files. Learn more about the [file search
+    /// tool](https://platform.openai.com/docs/guides/tools-file-search).
+    FileSearch(FileSearchTool),
+    /// A tool that controls a virtual computer. Learn more about the [computer
+    /// use tool](https://platform.openai.com/docs/guides/tools-computer-use).
+    ComputerUsePreview(ComputerUsePreviewTool),
+    /// Search the Internet for sources related to the prompt. Learn more about the
+    /// [web search tool](https://platform.openai.com/docs/guides/tools-web-search).
+    WebSearch(WebSearchTool),
+    /// type: web_search_2025_08_26
+    #[serde(rename = "web_search_2025_08_26")]
+    WebSearch20250826(WebSearchTool),
+    /// Give the model access to additional tools via remote Model Context Protocol
+    /// (MCP) servers. [Learn more about MCP](https://platform.openai.com/docs/guides/tools-remote-mcp).
+    Mcp(MCPTool),
+    /// A tool that runs Python code to help generate a response to a prompt.
+    CodeInterpreter(CodeInterpreterTool),
+    /// A tool that generates images using a model like `gpt-image-1`.
+    ImageGeneration(ImageGenTool),
+    /// A tool that allows the model to execute shell commands in a local environment.
+    LocalShell,
+    /// A tool that allows the model to execute shell commands.
+    Shell,
+    /// A custom tool that processes input using a specified format. Learn more about   [custom
+    /// tools](https://platform.openai.com/docs/guides/function-calling#custom-tools)
+    Custom(CustomToolParam),
+    /// This tool searches the web for relevant results to use in a response. Learn more about the [web search
+    ///tool](https://platform.openai.com/docs/guides/tools-web-search).
+    WebSearchPreview(WebSearchTool),
+    /// type: web_search_preview_2025_03_11
+    #[serde(rename = "web_search_preview_2025_03_11")]
+    WebSearchPreview20250311(WebSearchTool),
+    /// Allows the assistant to create, delete, or update files using unified diffs.
+    ApplyPatch,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+pub struct CustomToolParam {
+    /// The name of the custom tool, used to identify it in tool calls.
+    pub name: String,
+    /// Optional description of the custom tool, used to provide more context.
+    pub description: Option<String>,
+    /// The input format for the custom tool. Default is unconstrained text.
+    pub format: CustomToolParamFormat,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(tag = "type", rename_all = "lowercase")]
+pub enum CustomToolParamFormat {
+    /// Unconstrained free-form text.
+    #[default]
+    Text,
+    /// A grammar defined by the user.
+    Grammar(CustomGrammarFormatParam),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "FileSearchToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct FileSearchTool {
+    /// The IDs of the vector stores to search.
+    pub vector_store_ids: Vec<String>,
+    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_num_results: Option<u32>,
+    /// A filter to apply.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub filters: Option<Filter>,
+    /// Ranking options for search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ranking_options: Option<RankingOptions>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "FunctionToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct FunctionTool {
+    /// The name of the function to call.
+    pub name: String,
+    /// A JSON schema object describing the parameters of the function.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parameters: Option<serde_json::Value>,
+    /// Whether to enforce strict parameter validation. Default `true`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub strict: Option<bool>,
+    /// A description of the function. Used by the model to determine whether or not to call the
+    /// function.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchToolFilters {
+    /// Allowed domains for the search. If not provided, all domains are allowed.
+    /// Subdomains of the provided domains are allowed as well.
+    ///
+    /// Example: `["pubmed.ncbi.nlm.nih.gov"]`
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_domains: Option<Vec<String>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "WebSearchToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct WebSearchTool {
+    /// Filters for the search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub filters: Option<WebSearchToolFilters>,
+    /// The approximate location of the user.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user_location: Option<WebSearchApproximateLocation>,
+    /// High level guidance for the amount of context window space to use for the search. One of `low`,
+    /// `medium`, or `high`. `medium` is the default.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub search_context_size: Option<WebSearchToolSearchContextSize>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum WebSearchToolSearchContextSize {
+    Low,
+    #[default]
+    Medium,
+    High,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ComputerEnvironment {
+    Windows,
+    Mac,
+    Linux,
+    Ubuntu,
+    #[default]
+    Browser,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "ComputerUsePreviewToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct ComputerUsePreviewTool {
+    /// The type of computer environment to control.
+    environment: ComputerEnvironment,
+    /// The width of the computer display.
+    display_width: u32,
+    /// The height of the computer display.
+    display_height: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+pub enum RankVersionType {
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "default-2024-11-15")]
+    Default20241115,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct HybridSearch {
+    /// The weight of the embedding in the reciprocal ranking fusion.
+    pub embedding_weight: f32,
+    /// The weight of the text in the reciprocal ranking fusion.
+    pub text_weight: f32,
+}
+
+/// Options for search result ranking.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct RankingOptions {
+    /// Weights that control how reciprocal rank fusion balances semantic embedding matches versus
+    /// sparse keyword matches when hybrid search is enabled.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub hybrid_search: Option<HybridSearch>,
+    /// The ranker to use for the file search.
+    pub ranker: RankVersionType,
+    /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will
+    /// attempt to return only the most relevant results, but may return fewer results.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub score_threshold: Option<f32>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum WebSearchApproximateLocationType {
+    #[default]
+    Approximate,
+}
+
+/// Approximate user location for web search.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "WebSearchApproximateLocationArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct WebSearchApproximateLocation {
+    /// The type of location approximation. Always `approximate`.
+    pub r#type: WebSearchApproximateLocationType,
+    /// Free text input for the city of the user, e.g. `San Francisco`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub city: Option<String>,
+    /// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user,
+    /// e.g. `US`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub country: Option<String>,
+    /// Free text input for the region of the user, e.g. `California`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub region: Option<String>,
+    /// The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g.
+    /// `America/Los_Angeles`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub timezone: Option<String>,
+}
+
+/// Container configuration for a code interpreter.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum CodeInterpreterToolContainer {
+    /// Configuration for a code interpreter container. Optionally specify the IDs of the
+    /// files to run the code on.
+    Auto(CodeInterpreterContainerAuto),
+
+    /// The container ID.
+    #[serde(untagged)]
+    ContainerID(String),
+}
+
+/// Auto configuration for code interpreter container.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+pub struct CodeInterpreterContainerAuto {
+    /// An optional list of uploaded files to make available to your code.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_ids: Option<Vec<String>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub memory_limit: Option<u64>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "CodeInterpreterToolArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CodeInterpreterTool {
+    /// The code interpreter container. Can be a container ID or an object that
+    /// specifies uploaded file IDs to make available to your code, along with an
+    /// optional `memory_limit` setting.
+    pub container: CodeInterpreterToolContainer,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ImageGenToolInputImageMask {
+    /// Base64-encoded mask image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+    /// File ID for the mask image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum InputFidelity {
+    #[default]
+    High,
+    Low,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolModeration {
+    #[default]
+    Auto,
+    Low,
+}
+
+/// Image generation tool definition.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder, ToSchema)]
+#[builder(
+    name = "ImageGenerationArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ImageGenTool {
+    /// Background type for the generated image. One of `transparent`,
+    /// `opaque`, or `auto`. Default: `auto`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<ImageGenToolBackground>,
+    /// Control how much effort the model will exert to match the style and features, especially facial features,
+    /// of input images. This parameter is only supported for `gpt-image-1`. Unsupported
+    /// for `gpt-image-1-mini`. Supports `high` and `low`. Defaults to `low`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_fidelity: Option<InputFidelity>,
+    /// Optional mask for inpainting. Contains `image_url`
+    /// (string, optional) and `file_id` (string, optional).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_image_mask: Option<ImageGenToolInputImageMask>,
+    /// The image generation model to use. Default: `gpt-image-1`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    /// Moderation level for the generated image. Default: `auto`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub moderation: Option<ImageGenToolModeration>,
+    /// Compression level for the output image. Default: 100.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_compression: Option<u8>,
+    /// The output format of the generated image. One of `png`, `webp`, or
+    /// `jpeg`. Default: `png`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_format: Option<ImageGenToolOutputFormat>,
+    /// Number of partial images to generate in streaming mode, from 0 (default value) to 3.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub partial_images: Option<u8>,
+    /// The quality of the generated image. One of `low`, `medium`, `high`,
+    /// or `auto`. Default: `auto`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub quality: Option<ImageGenToolQuality>,
+    /// The size of the generated image. One of `1024x1024`, `1024x1536`,
+    /// `1536x1024`, or `auto`. Default: `auto`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub size: Option<ImageGenToolSize>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolBackground {
+    Transparent,
+    Opaque,
+    #[default]
+    Auto,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolOutputFormat {
+    #[default]
+    Png,
+    Webp,
+    Jpeg,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolQuality {
+    Low,
+    Medium,
+    High,
+    #[default]
+    Auto,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenToolSize {
+    #[default]
+    Auto,
+    #[serde(rename = "1024x1024")]
+    Size1024x1024,
+    #[serde(rename = "1024x1536")]
+    Size1024x1536,
+    #[serde(rename = "1536x1024")]
+    Size1536x1024,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ToolChoiceAllowedMode {
+    Auto,
+    Required,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ToolChoiceAllowed {
+    /// Constrains the tools available to the model to a pre-defined set.
+    ///
+    /// `auto` allows the model to pick from among the allowed tools and generate a
+    /// message.
+    ///
+    /// `required` requires the model to call one or more of the allowed tools.
+    pub mode: ToolChoiceAllowedMode,
+    /// A list of tool definitions that the model should be allowed to call.
+    ///
+    /// For the Responses API, the list of tool definitions might look like:
+    /// ```json
+    /// [
+    ///   { "type": "function", "name": "get_weather" },
+    ///   { "type": "mcp", "server_label": "deepwiki" },
+    ///   { "type": "image_generation" }
+    /// ]
+    /// ```
+    pub tools: Vec<serde_json::Value>,
+}
+
+/// The type of hosted tool the model should to use. Learn more about
+/// [built-in tools](https://platform.openai.com/docs/guides/tools).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolChoiceTypes {
+    FileSearch,
+    WebSearchPreview,
+    ComputerUsePreview,
+    CodeInterpreter,
+    ImageGeneration,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ToolChoiceFunction {
+    /// The name of the function to call.
+    pub name: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ToolChoiceMCP {
+    /// The name of the tool to call on the server.
+    pub name: String,
+    /// The label of the MCP server to use.
+    pub server_label: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ToolChoiceCustom {
+    /// The name of the custom tool to call.
+    pub name: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolChoiceParam {
+    /// Constrains the tools available to the model to a pre-defined set.
+    AllowedTools(ToolChoiceAllowed),
+
+    /// Use this option to force the model to call a specific function.
+    Function(ToolChoiceFunction),
+
+    /// Use this option to force the model to call a specific tool on a remote MCP server.
+    Mcp(ToolChoiceMCP),
+
+    /// Use this option to force the model to call a custom tool.
+    Custom(ToolChoiceCustom),
+
+    /// Forces the model to call the apply_patch tool when executing a tool call.
+    ApplyPatch,
+
+    /// Forces the model to call the function shell tool when a tool call is required.
+    Shell,
+
+    /// Indicates that the model should use a built-in tool to generate a response.
+    /// [Learn more about built-in tools](https://platform.openai.com/docs/guides/tools).
+    #[serde(untagged)]
+    Hosted(ToolChoiceTypes),
+
+    /// Controls which (if any) tool is called by the model.
+    ///
+    /// `none` means the model will not call any tool and instead generates a message.
+    ///
+    /// `auto` means the model can pick between generating a message or calling one or
+    /// more tools.
+    ///
+    /// `required` means the model must call one or more tools.
+    #[serde(untagged)]
+    Mode(ToolChoiceOptions),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ToolChoiceOptions {
+    None,
+    Auto,
+    Required,
+}
+
+/// An error that occurred while generating the response.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ErrorObject {
+    /// A machine-readable error code that was returned.
+    pub code: String,
+    /// A human-readable description of the error that was returned.
+    pub message: String,
+}
+
+/// Details about an incomplete response.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct IncompleteDetails {
+    /// The reason why the response is incomplete.
+    pub reason: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct TopLogProb {
+    pub bytes: Vec<u8>,
+    pub logprob: f64,
+    pub token: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct LogProb {
+    pub bytes: Vec<u8>,
+    pub logprob: f64,
+    pub token: String,
+    pub top_logprobs: Vec<TopLogProb>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseTopLobProb {
+    /// The log probability of this token.
+    pub logprob: f64,
+    /// A possible text token.
+    pub token: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseLogProb {
+    /// The log probability of this token.
+    pub logprob: f64,
+    /// A possible text token.
+    pub token: String,
+    /// The log probability of the top 20 most likely tokens.
+    pub top_logprobs: Vec<ResponseTopLobProb>,
+}
+
+/// A simple text output from the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct OutputTextContent {
+    /// The annotations of the text output.
+    pub annotations: Vec<Annotation>,
+    pub logprobs: Option<Vec<LogProb>>,
+    /// The text output from the model.
+    pub text: String,
+}
+
+/// An annotation that applies to a span of output text.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Annotation {
+    /// A citation to a file.
+    FileCitation(FileCitationBody),
+    /// A citation for a web resource used to generate a model response.
+    UrlCitation(UrlCitationBody),
+    /// A citation for a container file used to generate a model response.
+    ContainerFileCitation(ContainerFileCitationBody),
+    /// A path to a file.
+    FilePath(FilePath),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FileCitationBody {
+    /// The ID of the file.
+    file_id: String,
+    /// The filename of the file cited.
+    filename: String,
+    /// The index of the file in the list of files.
+    index: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct UrlCitationBody {
+    /// The index of the last character of the URL citation in the message.
+    end_index: u32,
+    /// The index of the first character of the URL citation in the message.
+    start_index: u32,
+    /// The title of the web resource.
+    title: String,
+    /// The URL of the web resource.
+    url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ContainerFileCitationBody {
+    /// The ID of the container file.
+    container_id: String,
+    /// The index of the last character of the container file citation in the message.
+    end_index: u32,
+    /// The ID of the file.
+    file_id: String,
+    /// The filename of the container file cited.
+    filename: String,
+    /// The index of the first character of the container file citation in the message.
+    start_index: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FilePath {
+    /// The ID of the file.
+    file_id: String,
+    /// The index of the file in the list of files.
+    index: u32,
+}
+
+/// A refusal explanation from the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct RefusalContent {
+    /// The refusal explanation from the model.
+    pub refusal: String,
+}
+
+/// A message generated by the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct OutputMessage {
+    /// The content of the output message.
+    pub content: Vec<OutputMessageContent>,
+    /// The unique ID of the output message.
+    pub id: String,
+    /// The role of the output message. Always `assistant`.
+    pub role: AssistantRole,
+    /// The status of the message input. One of `in_progress`, `completed`, or
+    /// `incomplete`. Populated when input items are returned via API.
+    pub status: OutputStatus,
+    ///// The type of the output message. Always `message`.
+    //pub r#type: MessageType,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum MessageType {
+    #[default]
+    Message,
+}
+
+/// The role for an output message - always `assistant`.
+/// This type ensures type safety by only allowing the assistant role.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq, Default, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum AssistantRole {
+    #[default]
+    Assistant,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum OutputMessageContent {
+    /// A text output from the model.
+    OutputText(OutputTextContent),
+    /// A refusal from the model.
+    Refusal(RefusalContent),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum OutputContent {
+    /// A text output from the model.
+    OutputText(OutputTextContent),
+    /// A refusal from the model.
+    Refusal(RefusalContent),
+    /// Reasoning text from the model.
+    ReasoningText(ReasoningTextContent),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ReasoningTextContent {
+    /// The reasoning text from the model.
+    pub text: String,
+}
+
+/// A reasoning item representing the model's chain of thought, including summary paragraphs.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ReasoningItem {
+    /// Unique identifier of the reasoning content.
+    pub id: String,
+    /// Reasoning summary content.
+    pub summary: Vec<SummaryPart>,
+    /// Reasoning text content.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<Vec<ReasoningTextContent>>,
+    /// The encrypted content of the reasoning item - populated when a response is generated with
+    /// `reasoning.encrypted_content` in the `include` parameter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub encrypted_content: Option<String>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+/// A single summary text fragment from reasoning.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct Summary {
+    /// A summary of the reasoning output from the model so far.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum SummaryPart {
+    SummaryText(Summary),
+}
+
+/// File search tool call output.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FileSearchToolCall {
+    /// The unique ID of the file search tool call.
+    pub id: String,
+    /// The queries used to search for files.
+    pub queries: Vec<String>,
+    /// The status of the file search tool call. One of `in_progress`, `searching`,
+    /// `incomplete`,`failed`, or `completed`.
+    pub status: FileSearchToolCallStatus,
+    /// The results of the file search tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub results: Option<Vec<FileSearchToolCallResult>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum FileSearchToolCallStatus {
+    InProgress,
+    Searching,
+    Incomplete,
+    Failed,
+    Completed,
+}
+
+/// A single result from a file search.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FileSearchToolCallResult {
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
+    /// additional information about the object in a structured format, and querying for objects
+    /// API or the dashboard. Keys are strings with a maximum length of 64 characters
+    /// . Values are strings with a maximum length of 512 characters, booleans, or numbers.
+    pub attributes: HashMap<String, serde_json::Value>,
+    /// The unique ID of the file.
+    pub file_id: String,
+    /// The name of the file.
+    pub filename: String,
+    /// The relevance score of the file - a value between 0 and 1.
+    pub score: f32,
+    /// The text that was retrieved from the file.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ComputerCallSafetyCheckParam {
+    /// The ID of the pending safety check.
+    pub id: String,
+    /// The type of the pending safety check.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code: Option<String>,
+    /// Details about the pending safety check.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub message: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum WebSearchToolCallStatus {
+    InProgress,
+    Searching,
+    Completed,
+    Failed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchActionSearchSource {
+    /// The type of source. Always `url`.
+    pub r#type: String,
+    /// The URL of the source.
+    pub url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchActionSearch {
+    /// The search query.
+    pub query: String,
+    /// The sources used in the search.
+    pub sources: Option<Vec<WebSearchActionSearchSource>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchActionOpenPage {
+    /// The URL opened by the model.
+    pub url: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchActionFind {
+    /// The URL of the page searched for the pattern.
+    pub url: String,
+    /// The pattern or text to search for within the page.
+    pub pattern: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum WebSearchToolCallAction {
+    /// Action type "search" - Performs a web search query.
+    Search(WebSearchActionSearch),
+    /// Action type "open_page" - Opens a specific URL from search results.
+    OpenPage(WebSearchActionOpenPage),
+    /// Action type "find": Searches for a pattern within a loaded page.
+    Find(WebSearchActionFind),
+    /// Action type "find_in_page": https://platform.openai.com/docs/guides/tools-web-search#output-and-citations
+    FindInPage(WebSearchActionFind),
+}
+
+/// Web search tool call output.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct WebSearchToolCall {
+    /// An object describing the specific action taken in this web search call. Includes
+    /// details on how the model used the web (search, open_page, find, find_in_page).
+    pub action: WebSearchToolCallAction,
+    /// The unique ID of the web search tool call.
+    pub id: String,
+    /// The status of the web search tool call.
+    pub status: WebSearchToolCallStatus,
+}
+
+/// Output from a computer tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ComputerToolCall {
+    pub action: ComputerAction,
+    /// An identifier used when responding to the tool call with output.
+    pub call_id: String,
+    /// The unique ID of the computer call.
+    pub id: String,
+    /// The pending safety checks for the computer call.
+    pub pending_safety_checks: Vec<ComputerCallSafetyCheckParam>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    pub status: OutputStatus,
+}
+
+/// A point in 2D space.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct DragPoint {
+    /// The x-coordinate.
+    pub x: i32,
+    /// The y-coordinate.
+    pub y: i32,
+}
+
+/// Represents all user‐triggered actions.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ComputerAction {
+    /// A click action.
+    Click(ClickParam),
+
+    /// A double click action.
+    DoubleClick(DoubleClickAction),
+
+    /// A drag action.
+    Drag(Drag),
+
+    /// A collection of keypresses the model would like to perform.
+    Keypress(KeyPressAction),
+
+    /// A mouse move action.
+    Move(Move),
+
+    /// A screenshot action.
+    Screenshot,
+
+    /// A scroll action.
+    Scroll(Scroll),
+
+    /// An action to type in text.
+    Type(Type),
+
+    /// A wait action.
+    Wait,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+#[serde(rename_all = "lowercase")]
+pub enum ClickButtonType {
+    Left,
+    Right,
+    Wheel,
+    Back,
+    Forward,
+}
+
+/// A click action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct ClickParam {
+    /// Indicates which mouse button was pressed during the click. One of `left`,
+    /// `right`, `wheel`, `back`, or `forward`.
+    pub button: ClickButtonType,
+    /// The x-coordinate where the click occurred.
+    pub x: i32,
+    /// The y-coordinate where the click occurred.
+    pub y: i32,
+}
+
+/// A double click action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct DoubleClickAction {
+    /// The x-coordinate where the double click occurred.
+    pub x: i32,
+    /// The y-coordinate where the double click occurred.
+    pub y: i32,
+}
+
+/// A drag action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct Drag {
+    /// The path of points the cursor drags through.
+    pub path: Vec<DragPoint>,
+}
+
+/// A keypress action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct KeyPressAction {
+    /// The combination of keys the model is requesting to be pressed.
+    /// This is an array of strings, each representing a key.
+    pub keys: Vec<String>,
+}
+
+/// A mouse move action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct Move {
+    /// The x-coordinate to move to.
+    pub x: i32,
+    /// The y-coordinate to move to.
+    pub y: i32,
+}
+
+/// A scroll action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct Scroll {
+    /// The horizontal scroll distance.
+    pub scroll_x: i32,
+    /// The vertical scroll distance.
+    pub scroll_y: i32,
+    /// The x-coordinate where the scroll occurred.
+    pub x: i32,
+    /// The y-coordinate where the scroll occurred.
+    pub y: i32,
+}
+
+/// A typing (text entry) action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, ToSchema)]
+pub struct Type {
+    /// The text to type.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionToolCall {
+    /// A JSON string of the arguments to pass to the function.
+    pub arguments: String,
+    /// The unique ID of the function tool call generated by the model.
+    pub call_id: String,
+    /// The name of the function to run.
+    pub name: String,
+    /// The unique ID of the function tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The status of the item. One of `in_progress`, `completed`, or `incomplete`.
+    /// Populated when items are returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>, // TODO rename OutputStatus?
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ImageGenToolCallStatus {
+    InProgress,
+    Completed,
+    Generating,
+    Failed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ImageGenToolCall {
+    /// The unique ID of the image generation call.
+    pub id: String,
+    /// The generated image encoded in base64.
+    pub result: Option<String>,
+    /// The status of the image generation call.
+    pub status: ImageGenToolCallStatus,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum CodeInterpreterToolCallStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+    Interpreting,
+    Failed,
+}
+
+/// Output of a code interpreter request.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CodeInterpreterToolCall {
+    /// The code to run, or null if not available.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code: Option<String>,
+    /// ID of the container used to run the code.
+    pub container_id: String,
+    /// The unique ID of the code interpreter tool call.
+    pub id: String,
+    /// The outputs generated by the code interpreter, such as logs or images.
+    /// Can be null if no outputs are available.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub outputs: Option<Vec<CodeInterpreterToolCallOutput>>,
+    /// The status of the code interpreter tool call.
+    /// Valid values are `in_progress`, `completed`, `incomplete`, `interpreting`, and `failed`.
+    pub status: CodeInterpreterToolCallStatus,
+}
+
+/// Individual result from a code interpreter: either logs or files.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum CodeInterpreterToolCallOutput {
+    /// Code interpreter output logs
+    Logs(CodeInterpreterOutputLogs),
+    /// Code interpreter output image
+    Image(CodeInterpreterOutputImage),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CodeInterpreterOutputLogs {
+    /// The logs output from the code interpreter.
+    pub logs: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CodeInterpreterOutputImage {
+    /// The URL of the image output from the code interpreter.
+    pub url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CodeInterpreterFile {
+    /// The ID of the file.
+    file_id: String,
+    /// The MIME type of the file.
+    mime_type: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct LocalShellToolCall {
+    /// Execute a shell command on the server.
+    pub action: LocalShellExecAction,
+    /// The unique ID of the local shell tool call generated by the model.
+    pub call_id: String,
+    /// The unique ID of the local shell call.
+    pub id: String,
+    /// The status of the local shell call.
+    pub status: OutputStatus,
+}
+
+/// Define the shape of a local shell action (exec).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct LocalShellExecAction {
+    /// The command to run.
+    pub command: Vec<String>,
+    /// Environment variables to set for the command.
+    pub env: HashMap<String, String>,
+    /// Optional timeout in milliseconds for the command.
+    pub timeout_ms: Option<u64>,
+    /// Optional user to run the command as.
+    pub user: Option<String>,
+    /// Optional working directory to run the command in.
+    pub working_directory: Option<String>,
+}
+
+/// Commands and limits describing how to run the shell tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellActionParam {
+    /// Ordered shell commands for the execution environment to run.
+    pub commands: Vec<String>,
+    /// Maximum wall-clock time in milliseconds to allow the shell commands to run.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub timeout_ms: Option<u64>,
+    /// Maximum number of UTF-8 characters to capture from combined stdout and stderr output.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_length: Option<u64>,
+}
+
+/// Status values reported for shell tool calls.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum FunctionShellCallItemStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+}
+
+/// A tool representing a request to execute one or more shell commands.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallItemParam {
+    /// The unique ID of the shell tool call. Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The unique ID of the shell tool call generated by the model.
+    pub call_id: String,
+    /// The shell commands and limits that describe how to run the tool call.
+    pub action: FunctionShellActionParam,
+    /// The status of the shell call. One of `in_progress`, `completed`, or `incomplete`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<FunctionShellCallItemStatus>,
+}
+
+/// Indicates that the shell commands finished and returned an exit code.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutputExitOutcomeParam {
+    /// The exit code returned by the shell process.
+    pub exit_code: i32,
+}
+
+/// The exit or timeout outcome associated with this chunk.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum FunctionShellCallOutputOutcomeParam {
+    Timeout,
+    Exit(FunctionShellCallOutputExitOutcomeParam),
+}
+
+/// Captured stdout and stderr for a portion of a shell tool call output.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutputContentParam {
+    /// Captured stdout output for this chunk of the shell call.
+    pub stdout: String,
+    /// Captured stderr output for this chunk of the shell call.
+    pub stderr: String,
+    /// The exit or timeout outcome associated with this chunk.
+    pub outcome: FunctionShellCallOutputOutcomeParam,
+}
+
+/// The streamed output items emitted by a shell tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutputItemParam {
+    /// The unique ID of the shell tool call output. Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The unique ID of the shell tool call generated by the model.
+    pub call_id: String,
+    /// Captured chunks of stdout and stderr output, along with their associated outcomes.
+    pub output: Vec<FunctionShellCallOutputContentParam>,
+    /// The maximum number of UTF-8 characters captured for this shell call's combined output.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_length: Option<u64>,
+}
+
+/// Status values reported for apply_patch tool calls.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyPatchCallStatusParam {
+    InProgress,
+    Completed,
+}
+
+/// Instruction for creating a new file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchCreateFileOperationParam {
+    /// Path of the file to create relative to the workspace root.
+    pub path: String,
+    /// Unified diff content to apply when creating the file.
+    pub diff: String,
+}
+
+/// Instruction for deleting an existing file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchDeleteFileOperationParam {
+    /// Path of the file to delete relative to the workspace root.
+    pub path: String,
+}
+
+/// Instruction for updating an existing file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchUpdateFileOperationParam {
+    /// Path of the file to update relative to the workspace root.
+    pub path: String,
+    /// Unified diff content to apply to the existing file.
+    pub diff: String,
+}
+
+/// One of the create_file, delete_file, or update_file operations supplied to the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ApplyPatchOperationParam {
+    CreateFile(ApplyPatchCreateFileOperationParam),
+    DeleteFile(ApplyPatchDeleteFileOperationParam),
+    UpdateFile(ApplyPatchUpdateFileOperationParam),
+}
+
+/// A tool call representing a request to create, delete, or update files using diff patches.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchToolCallItemParam {
+    /// The unique ID of the apply patch tool call. Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The unique ID of the apply patch tool call generated by the model.
+    pub call_id: String,
+    /// The status of the apply patch tool call. One of `in_progress` or `completed`.
+    pub status: ApplyPatchCallStatusParam,
+    /// The specific create, delete, or update instruction for the apply_patch tool call.
+    pub operation: ApplyPatchOperationParam,
+}
+
+/// Outcome values reported for apply_patch tool call outputs.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyPatchCallOutputStatusParam {
+    Completed,
+    Failed,
+}
+
+/// The streamed output emitted by an apply patch tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchToolCallOutputItemParam {
+    /// The unique ID of the apply patch tool call output. Populated when this item is returned via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The unique ID of the apply patch tool call generated by the model.
+    pub call_id: String,
+    /// The status of the apply patch tool call output. One of `completed` or `failed`.
+    pub status: ApplyPatchCallOutputStatusParam,
+    /// Optional human-readable log text from the apply patch tool (e.g., patch results or errors).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output: Option<String>,
+}
+
+/// Shell exec action
+/// Execute a shell command.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellAction {
+    /// A list of commands to run.
+    pub commands: Vec<String>,
+    /// Optional timeout in milliseconds for the commands.
+    pub timeout_ms: Option<u64>,
+    /// Optional maximum number of characters to return from each command.
+    pub max_output_length: Option<u64>,
+}
+
+/// Status values reported for function shell tool calls.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum LocalShellCallStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+}
+
+/// A tool call that executes one or more shell commands in a managed environment.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCall {
+    /// The unique ID of the function shell tool call. Populated when this item is returned via API.
+    pub id: String,
+    /// The unique ID of the function shell tool call generated by the model.
+    pub call_id: String,
+    /// The shell commands and limits that describe how to run the tool call.
+    pub action: FunctionShellAction,
+    /// The status of the shell call. One of `in_progress`, `completed`, or `incomplete`.
+    pub status: LocalShellCallStatus,
+    /// The ID of the entity that created this tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// The content of a shell tool call output that was emitted.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutputContent {
+    /// The standard output that was captured.
+    pub stdout: String,
+    /// The standard error output that was captured.
+    pub stderr: String,
+    /// Represents either an exit outcome (with an exit code) or a timeout outcome for a shell call output chunk.
+    #[serde(flatten)]
+    pub outcome: FunctionShellCallOutputOutcome,
+    /// The identifier of the actor that created the item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// Function shell call outcome
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum FunctionShellCallOutputOutcome {
+    Timeout,
+    Exit(FunctionShellCallOutputExitOutcome),
+}
+
+/// Indicates that the shell commands finished and returned an exit code.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutputExitOutcome {
+    /// Exit code from the shell process.
+    pub exit_code: i32,
+}
+
+/// The output of a shell tool call that was emitted.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct FunctionShellCallOutput {
+    /// The unique ID of the shell call output. Populated when this item is returned via API.
+    pub id: String,
+    /// The unique ID of the shell tool call generated by the model.
+    pub call_id: String,
+    /// An array of shell call output contents
+    pub output: Vec<FunctionShellCallOutputContent>,
+    /// The maximum length of the shell command output. This is generated by the model and should be
+    /// passed back with the raw output.
+    pub max_output_length: Option<u64>,
+    /// The identifier of the actor that created the item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// Status values reported for apply_patch tool calls.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyPatchCallStatus {
+    InProgress,
+    Completed,
+}
+
+/// Instruction describing how to create a file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchCreateFileOperation {
+    /// Path of the file to create.
+    pub path: String,
+    /// Diff to apply.
+    pub diff: String,
+}
+
+/// Instruction describing how to delete a file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchDeleteFileOperation {
+    /// Path of the file to delete.
+    pub path: String,
+}
+
+/// Instruction describing how to update a file via the apply_patch tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchUpdateFileOperation {
+    /// Path of the file to update.
+    pub path: String,
+    /// Diff to apply.
+    pub diff: String,
+}
+
+/// One of the create_file, delete_file, or update_file operations applied via apply_patch.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ApplyPatchOperation {
+    CreateFile(ApplyPatchCreateFileOperation),
+    DeleteFile(ApplyPatchDeleteFileOperation),
+    UpdateFile(ApplyPatchUpdateFileOperation),
+}
+
+/// A tool call that applies file diffs by creating, deleting, or updating files.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchToolCall {
+    /// The unique ID of the apply patch tool call. Populated when this item is returned via API.
+    pub id: String,
+    /// The unique ID of the apply patch tool call generated by the model.
+    pub call_id: String,
+    /// The status of the apply patch tool call. One of `in_progress` or `completed`.
+    pub status: ApplyPatchCallStatus,
+    /// One of the create_file, delete_file, or update_file operations applied via apply_patch.
+    pub operation: ApplyPatchOperation,
+    /// The ID of the entity that created this tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// Outcome values reported for apply_patch tool call outputs.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum ApplyPatchCallOutputStatus {
+    Completed,
+    Failed,
+}
+
+/// The output emitted by an apply patch tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ApplyPatchToolCallOutput {
+    /// The unique ID of the apply patch tool call output. Populated when this item is returned via API.
+    pub id: String,
+    /// The unique ID of the apply patch tool call generated by the model.
+    pub call_id: String,
+    /// The status of the apply patch tool call output. One of `completed` or `failed`.
+    pub status: ApplyPatchCallOutputStatus,
+    /// Optional textual output returned by the apply patch tool.
+    pub output: Option<String>,
+    /// The ID of the entity that created this tool call output.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// Output of an MCP server tool invocation.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPToolCall {
+    /// A JSON string of the arguments passed to the tool.
+    pub arguments: String,
+    /// The unique ID of the tool call.
+    pub id: String,
+    /// The name of the tool that was run.
+    pub name: String,
+    /// The label of the MCP server running the tool.
+    pub server_label: String,
+    /// Unique identifier for the MCP tool call approval request. Include this value
+    /// in a subsequent `mcp_approval_response` input to approve or reject the corresponding
+    /// tool call.
+    pub approval_request_id: Option<String>,
+    /// Error message from the call, if any.
+    pub error: Option<String>,
+    /// The output from the tool call.
+    pub output: Option<String>,
+    /// The status of the tool call. One of `in_progress`, `completed`, `incomplete`,
+    /// `calling`, or `failed`.
+    pub status: Option<MCPToolCallStatus>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum MCPToolCallStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+    Calling,
+    Failed,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPListTools {
+    /// The unique ID of the list.
+    pub id: String,
+    /// The label of the MCP server.
+    pub server_label: String,
+    /// The tools available on the server.
+    pub tools: Vec<MCPListToolsTool>,
+    /// Error message if listing failed.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct MCPApprovalRequest {
+    /// JSON string of arguments for the tool.
+    pub arguments: String,
+    /// The unique ID of the approval request.
+    pub id: String,
+    /// The name of the tool to run.
+    pub name: String,
+    /// The label of the MCP server making the request.
+    pub server_label: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum Instructions {
+    /// A text input to the model, equivalent to a text input with the `developer` role.
+    Text(String),
+    /// A list of one or many input items to the model, containing different content types.
+    Array(Vec<InputItem>),
+}
+
+/// The complete response returned by the Responses API.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[schema(no_recursion)]
+pub struct Response {
+    /// Whether to run the model response in the background.
+    /// [Learn more](https://platform.openai.com/docs/guides/background).
+    pub background: Option<bool>,
+
+    /// Billing information for the response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub billing: Option<Billing>,
+
+    /// The conversation that this response belongs to. Input items and output
+    /// items from this response are automatically added to this conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<Conversation>,
+
+    /// Unix timestamp (in seconds) when this Response was created.
+    pub created_at: u64,
+
+    /// Unix timestamp (in seconds) of when this Response was completed.
+    pub completed_at: Option<u64>,
+
+    /// An error object returned when the model fails to generate a Response.
+    pub error: Option<ErrorObject>,
+
+    /// The frequency penalty applied during sampling, between -2.0 and 2.0.
+    pub frequency_penalty: Option<f32>,
+
+    /// Unique identifier for this response.
+    pub id: String,
+
+    /// Details about why the response is incomplete, if any.
+    pub incomplete_details: Option<IncompleteDetails>,
+
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When using along with `previous_response_id`, the instructions from a previous response
+    /// will not be carried over to the next response. This makes it simple to swap out
+    /// system (or developer) messages in new responses.
+    pub instructions: Option<Instructions>,
+
+    /// An upper bound for the number of tokens that can be generated for a response,
+    /// including visible output tokens and
+    /// [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
+    pub max_output_tokens: Option<u32>,
+
+    /// An upper bound on the number of tool calls the model can make.
+    pub max_tool_calls: Option<u32>,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be
+    /// useful for storing additional information about the object in a structured
+    /// format, and querying for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are strings
+    /// with a maximum length of 512 characters.
+    #[schema(value_type = Object)]
+    pub metadata: Option<serde_json::Value>,
+
+    /// Model ID used to generate the response, like gpt-4o or o3. OpenAI offers a
+    /// wide range of models with different capabilities, performance characteristics,
+    /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models) to browse and compare available models.
+    pub model: String,
+
+    /// The object type of this resource - always set to `response`.
+    pub object: String,
+
+    /// An array of content items generated by the model.
+    ///
+    /// - The length and order of items in the output array is dependent on the model's response.
+    /// - Rather than accessing the first item in the output array and assuming it's an assistant
+    ///   message with the content generated by the model, you might consider using
+    ///   the `output_text` property where supported in SDKs.
+    pub output: Vec<OutputItem>,
+
+    /// Whether to allow the model to run tool calls in parallel.
+    pub parallel_tool_calls: Option<bool>,
+
+    /// The presence penalty applied during sampling, between -2.0 and 2.0.
+    pub presence_penalty: Option<f32>,
+
+    /// The unique ID of the previous response to the model. Use this to create multi-turn conversations.
+    /// Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
+    pub previous_response_id: Option<String>,
+
+    /// Reference to a prompt template and its variables.
+    /// [Learn more](https://platform.openai.com/docs/guides/text?api-mode=responses#reusable-prompts).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt: Option<Prompt>,
+
+    /// Used by OpenAI to cache responses for similar requests to optimize your cache hit rates. Replaces
+    /// the `user` field. [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
+    pub prompt_cache_key: Option<String>,
+
+    /// The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching,
+    /// which keeps cached prefixes active for longer, up to a maximum of 24 hours. [Learn
+    /// more](https://platform.openai.com/docs/guides/prompt-caching#prompt-cache-retention).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_retention: Option<PromptCacheRetention>,
+
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    pub reasoning: Option<Reasoning>,
+
+    /// A stable identifier used to help detect users of your application that may be violating OpenAI's
+    /// usage policies.
+    pub safety_identifier: Option<String>,
+
+    /// Specifies the processing type used for serving the request.
+    pub service_tier: Option<ServiceTier>,
+
+    /// The status of the response generation.
+    /// One of `completed`, `failed`, `in_progress`, `cancelled`, `queued`, or `incomplete`.
+    pub status: Status,
+
+    /// Whether the response is stored for later retrieval.
+    pub store: Option<bool>,
+
+    /// What sampling temperature was used, between 0 and 2. Higher values like 0.8 make
+    /// outputs more random, lower values like 0.2 make output more focused and deterministic.
+    pub temperature: Option<f32>,
+
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data.
+    pub text: Option<ResponseTextParam>,
+
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response.
+    pub tool_choice: Option<ToolChoiceParam>,
+
+    /// An array of tools the model may call while generating a response.
+    pub tools: Option<Vec<Tool>>,
+
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each
+    /// token position, each with an associated log probability.
+    pub top_logprobs: Option<u8>,
+
+    /// An alternative to sampling with temperature, called nucleus sampling,
+    /// where the model considers the results of the tokens with top_p probability
+    /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
+    /// are considered.
+    pub top_p: Option<f32>,
+
+    /// The truncation strategy to use for the model response.
+    pub truncation: Option<Truncation>,
+
+    /// Represents token usage details including input tokens, output tokens,
+    /// a breakdown of output tokens, and the total tokens used.
+    pub usage: Option<ResponseUsage>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(rename_all = "snake_case")]
+pub enum Status {
+    Completed,
+    Failed,
+    InProgress,
+    Cancelled,
+    Queued,
+    Incomplete,
+}
+
+/// Output item
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[schema(no_recursion)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum OutputItem {
+    /// An output message from the model.
+    Message(OutputMessage),
+    /// The results of a file search tool call. See the
+    /// [file search guide](https://platform.openai.com/docs/guides/tools-file-search)
+    /// for more information.
+    FileSearchCall(FileSearchToolCall),
+    /// A tool call to run a function. See the
+    /// [function calling guide](https://platform.openai.com/docs/guides/function-calling)
+    /// for more information.
+    FunctionCall(FunctionToolCall),
+    /// The results of a web search tool call. See the
+    /// [web search guide](https://platform.openai.com/docs/guides/tools-web-search)
+    /// for more information.
+    WebSearchCall(WebSearchToolCall),
+    /// A tool call to a computer use tool. See the
+    /// [computer use guide](https://platform.openai.com/docs/guides/tools-computer-use)
+    /// for more information.
+    ComputerCall(ComputerToolCall),
+    /// A description of the chain of thought used by a reasoning model while generating
+    /// a response. Be sure to include these items in your `input` to the Responses API for
+    /// subsequent turns of a conversation if you are manually
+    /// [managing context](https://platform.openai.com/docs/guides/conversation-state).
+    Reasoning(ReasoningItem),
+    /// A compaction item generated by the [`v1/responses/compact` API](https://platform.openai.com/docs/api-reference/responses/compact).
+    Compaction(CompactionBody),
+    /// An image generation request made by the model.
+    ImageGenerationCall(ImageGenToolCall),
+    /// A tool call to run code.
+    CodeInterpreterCall(CodeInterpreterToolCall),
+    /// A tool call to run a command on the local shell.
+    LocalShellCall(LocalShellToolCall),
+    /// A tool call that executes one or more shell commands in a managed environment.
+    ShellCall(FunctionShellCall),
+    /// The output of a shell tool call.
+    ShellCallOutput(FunctionShellCallOutput),
+    /// A tool call that applies file diffs by creating, deleting, or updating files.
+    ApplyPatchCall(ApplyPatchToolCall),
+    /// The output emitted by an apply patch tool call.
+    ApplyPatchCallOutput(ApplyPatchToolCallOutput),
+    /// An invocation of a tool on an MCP server.
+    McpCall(MCPToolCall),
+    /// A list of tools available on an MCP server.
+    McpListTools(MCPListTools),
+    /// A request for human approval of a tool invocation.
+    McpApprovalRequest(MCPApprovalRequest),
+    /// A call to a custom tool created by the model.
+    CustomToolCall(CustomToolCall),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[non_exhaustive]
+pub struct CustomToolCall {
+    /// An identifier used to map this custom tool call to a tool call output.
+    pub call_id: String,
+    /// The input for the custom tool call generated by the model.
+    pub input: String,
+    /// The name of the custom tool being called.
+    pub name: String,
+    /// The unique ID of the custom tool call in the OpenAI platform.
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct DeleteResponse {
+    pub object: String,
+    pub deleted: bool,
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct AnyItemReference {
+    pub r#type: Option<String>,
+    pub id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ItemResourceItem {
+    Message(MessageItem),
+    FileSearchCall(FileSearchToolCall),
+    ComputerCall(ComputerToolCall),
+    ComputerCallOutput(ComputerCallOutputItemParam),
+    WebSearchCall(WebSearchToolCall),
+    FunctionCall(FunctionToolCall),
+    FunctionCallOutput(FunctionCallOutputItemParam),
+    ImageGenerationCall(ImageGenToolCall),
+    CodeInterpreterCall(CodeInterpreterToolCall),
+    LocalShellCall(LocalShellToolCall),
+    LocalShellCallOutput(LocalShellToolCallOutput),
+    ShellCall(FunctionShellCallItemParam),
+    ShellCallOutput(FunctionShellCallOutputItemParam),
+    ApplyPatchCall(ApplyPatchToolCallItemParam),
+    ApplyPatchCallOutput(ApplyPatchToolCallOutputItemParam),
+    McpListTools(MCPListTools),
+    McpApprovalRequest(MCPApprovalRequest),
+    McpApprovalResponse(MCPApprovalResponse),
+    McpCall(MCPToolCall),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[serde(untagged)]
+pub enum ItemResource {
+    ItemReference(AnyItemReference),
+    Item(ItemResourceItem),
+}
+
+/// A list of Response items.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseItemList {
+    /// The type of object returned, must be `list`.
+    pub object: String,
+    /// The ID of the first item in the list.
+    pub first_id: Option<String>,
+    /// The ID of the last item in the list.
+    pub last_id: Option<String>,
+    /// Whether there are more items in the list.
+    pub has_more: bool,
+    /// The list of items.
+    pub data: Vec<ItemResource>,
+}
+
+#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq, ToSchema)]
+#[builder(
+    name = "TokenCountsBodyArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct TokenCountsBody {
+    /// The conversation that this response belongs to. Items from this
+    /// conversation are prepended to `input_items` for this response request.
+    /// Input items and output items from this response are automatically added to this
+    /// conversation after this response completes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub conversation: Option<ConversationParam>,
+
+    /// Text, image, or file inputs to the model, used to generate a response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input: Option<InputParam>,
+
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When used along with `previous_response_id`, the instructions from a previous response will
+    /// not be carried over to the next response. This makes it simple to swap out system (or
+    /// developer) messages in new responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// Model ID used to generate the response, like `gpt-4o` or `o3`. OpenAI offers a
+    /// wide range of models with different capabilities, performance characteristics,
+    /// and price points. Refer to the [model guide](https://platform.openai.com/docs/models)
+    /// to browse and compare available models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Whether to allow the model to run tool calls in parallel.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// The unique ID of the previous response to the model. Use this to create multi-turn
+    /// conversations. Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// **gpt-5 and o-series models only**
+    /// Configuration options for [reasoning models](https://platform.openai.com/docs/guides/reasoning).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<Reasoning>,
+
+    /// Configuration options for a text response from the model. Can be plain
+    /// text or structured JSON data. Learn more:
+    /// - [Text inputs and outputs](https://platform.openai.com/docs/guides/text)
+    /// - [Structured Outputs](https://platform.openai.com/docs/guides/structured-outputs)
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<ResponseTextParam>,
+
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response. See the `tools` parameter to see how to specify which tools
+    /// the model can call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoiceParam>,
+
+    /// An array of tools the model may call while generating a response. You can specify which tool
+    /// to use by setting the `tool_choice` parameter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<Tool>>,
+
+    ///The truncation strategy to use for the model response.
+    /// - `auto`: If the input to this Response exceeds
+    ///   the model's context window size, the model will truncate the
+    ///   response to fit the context window by dropping items from the beginning of the conversation.
+    /// - `disabled` (default): If the input size will exceed the context window
+    ///   size for a model, the request will fail with a 400 error.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct TokenCountsResource {
+    pub object: String,
+    pub input_tokens: u32,
+}
+
+/// A compaction item generated by the `/v1/responses/compact` API.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CompactionSummaryItemParam {
+    /// The ID of the compaction item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+    /// The encrypted content of the compaction summary.
+    pub encrypted_content: String,
+}
+
+/// A compaction item generated by the `/v1/responses/compact` API.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CompactionBody {
+    /// The unique ID of the compaction item.
+    pub id: String,
+    /// The encrypted content that was produced by compaction.
+    pub encrypted_content: String,
+    /// The identifier of the actor that created the item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub created_by: Option<String>,
+}
+
+/// Request to compact a conversation.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq, ToSchema)]
+#[builder(name = "CompactResponseRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CompactResponseRequest {
+    /// Model ID used to generate the response, like `gpt-5` or `o3`. OpenAI offers a wide range of models
+    /// with different capabilities, performance characteristics, and price points. Refer to the
+    /// [model guide](https://platform.openai.com/docs/models) to browse and compare available models.
+    pub model: String,
+
+    /// Text, image, or file inputs to the model, used to generate a response
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input: Option<InputParam>,
+
+    /// The unique ID of the previous response to the model. Use this to create multi-turn
+    /// conversations. Learn more about [conversation state](https://platform.openai.com/docs/guides/conversation-state).
+    /// Cannot be used in conjunction with `conversation`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// A system (or developer) message inserted into the model's context.
+    ///
+    /// When used along with `previous_response_id`, the instructions from a previous response will
+    /// not be carried over to the next response. This makes it simple to swap out system (or
+    /// developer) messages in new responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+}
+
+/// The compacted response object.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct CompactResource {
+    /// The unique identifier for the compacted response.
+    pub id: String,
+    /// The object type. Always `response.compaction`.
+    pub object: String,
+    /// The compacted list of output items. This is a list of all user messages,
+    /// followed by a single compaction item.
+    pub output: Vec<OutputItem>,
+    /// Unix timestamp (in seconds) when the compacted conversation was created.
+    pub created_at: u64,
+    /// Token accounting for the compaction pass, including cached, reasoning, and total tokens.
+    pub usage: ResponseUsage,
+}
--- a/lib/async-openai/src/types/responses/sdk.rs
+++ b/lib/async-openai/src/types/responses/sdk.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::types::responses::{OutputItem, OutputMessageContent, Response};
+
+impl Response {
+    /// SDK-only convenience property that contains the aggregated text output from all
+    /// `output_text` items in the `output` array, if any are present.
+    pub fn output_text(&self) -> Option<String> {
+        let output = self
+            .output
+            .iter()
+            .filter_map(|item| match item {
+                OutputItem::Message(msg) => Some(
+                    msg.content
+                        .iter()
+                        .filter_map(|content| match content {
+                            OutputMessageContent::OutputText(ot) => Some(ot.text.clone()),
+                            _ => None,
+                        })
+                        .collect::<Vec<String>>(),
+                ),
+                _ => None,
+            })
+            .flatten()
+            .collect::<Vec<String>>()
+            .join("");
+        if output.is_empty() {
+            None
+        } else {
+            Some(output)
+        }
+    }
+}
--- a/lib/async-openai/src/types/responses/stream.rs
+++ b/lib/async-openai/src/types/responses/stream.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+use crate::types::responses::{OutputContent, OutputItem, Response, ResponseLogProb, SummaryPart};
+
+/// Event types for streaming responses from the Responses API
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+#[schema(no_recursion)]
+#[serde(tag = "type")]
+pub enum ResponseStreamEvent {
+    /// An event that is emitted when a response is created.
+    #[serde(rename = "response.created")]
+    ResponseCreated(ResponseCreatedEvent),
+    /// Emitted when the response is in progress.
+    #[serde(rename = "response.in_progress")]
+    ResponseInProgress(ResponseInProgressEvent),
+    /// Emitted when the model response is complete.
+    #[serde(rename = "response.completed")]
+    ResponseCompleted(ResponseCompletedEvent),
+    /// An event that is emitted when a response fails.
+    #[serde(rename = "response.failed")]
+    ResponseFailed(ResponseFailedEvent),
+    /// An event that is emitted when a response finishes as incomplete.
+    #[serde(rename = "response.incomplete")]
+    ResponseIncomplete(ResponseIncompleteEvent),
+    /// Emitted when a new output item is added.
+    #[serde(rename = "response.output_item.added")]
+    ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
+    /// Emitted when an output item is marked done.
+    #[serde(rename = "response.output_item.done")]
+    ResponseOutputItemDone(ResponseOutputItemDoneEvent),
+    /// Emitted when a new content part is added.
+    #[serde(rename = "response.content_part.added")]
+    ResponseContentPartAdded(ResponseContentPartAddedEvent),
+    /// Emitted when a content part is done.
+    #[serde(rename = "response.content_part.done")]
+    ResponseContentPartDone(ResponseContentPartDoneEvent),
+    /// Emitted when there is an additional text delta.
+    #[serde(rename = "response.output_text.delta")]
+    ResponseOutputTextDelta(ResponseTextDeltaEvent),
+    /// Emitted when text content is finalized.
+    #[serde(rename = "response.output_text.done")]
+    ResponseOutputTextDone(ResponseTextDoneEvent),
+    /// Emitted when there is a partial refusal text.
+    #[serde(rename = "response.refusal.delta")]
+    ResponseRefusalDelta(ResponseRefusalDeltaEvent),
+    #[serde(rename = "response.refusal.done")]
+    /// Emitted when refusal text is finalized.
+    ResponseRefusalDone(ResponseRefusalDoneEvent),
+    /// Emitted when there is a partial function-call arguments delta.
+    #[serde(rename = "response.function_call_arguments.delta")]
+    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
+    /// Emitted when function-call arguments are finalized.
+    #[serde(rename = "response.function_call_arguments.done")]
+    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
+    /// Emitted when a file search call is initiated.
+    #[serde(rename = "response.file_search_call.in_progress")]
+    ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgressEvent),
+    /// Emitted when a file search is currently searching.
+    #[serde(rename = "response.file_search_call.searching")]
+    ResponseFileSearchCallSearching(ResponseFileSearchCallSearchingEvent),
+    /// Emitted when a file search call is completed (results found).
+    #[serde(rename = "response.file_search_call.completed")]
+    ResponseFileSearchCallCompleted(ResponseFileSearchCallCompletedEvent),
+    /// Emitted when a web search call is initiated.
+    #[serde(rename = "response.web_search_call.in_progress")]
+    ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgressEvent),
+    /// Emitted when a web search call is executing.
+    #[serde(rename = "response.web_search_call.searching")]
+    ResponseWebSearchCallSearching(ResponseWebSearchCallSearchingEvent),
+    /// Emitted when a web search call is completed.
+    #[serde(rename = "response.web_search_call.completed")]
+    ResponseWebSearchCallCompleted(ResponseWebSearchCallCompletedEvent),
+    /// Emitted when a new reasoning summary part is added.
+    #[serde(rename = "response.reasoning_summary_part.added")]
+    ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAddedEvent),
+    /// Emitted when a reasoning summary part is completed.
+    #[serde(rename = "response.reasoning_summary_part.done")]
+    ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDoneEvent),
+    /// Emitted when a delta is added to a reasoning summary text.
+    #[serde(rename = "response.reasoning_summary_text.delta")]
+    ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDeltaEvent),
+    /// Emitted when a reasoning summary text is completed.
+    #[serde(rename = "response.reasoning_summary_text.done")]
+    ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDoneEvent),
+    /// Emitted when a delta is added to a reasoning text.
+    #[serde(rename = "response.reasoning_text.delta")]
+    ResponseReasoningTextDelta(ResponseReasoningTextDeltaEvent),
+    /// Emitted when a reasoning text is completed.
+    #[serde(rename = "response.reasoning_text.done")]
+    ResponseReasoningTextDone(ResponseReasoningTextDoneEvent),
+    /// Emitted when an image generation tool call has completed and the final image is available.
+    #[serde(rename = "response.image_generation_call.completed")]
+    ResponseImageGenerationCallCompleted(ResponseImageGenCallCompletedEvent),
+    /// Emitted when an image generation tool call is actively generating an image (intermediate state).
+    #[serde(rename = "response.image_generation_call.generating")]
+    ResponseImageGenerationCallGenerating(ResponseImageGenCallGeneratingEvent),
+    /// Emitted when an image generation tool call is in progress.
+    #[serde(rename = "response.image_generation_call.in_progress")]
+    ResponseImageGenerationCallInProgress(ResponseImageGenCallInProgressEvent),
+    /// Emitted when a partial image is available during image generation streaming.
+    #[serde(rename = "response.image_generation_call.partial_image")]
+    ResponseImageGenerationCallPartialImage(ResponseImageGenCallPartialImageEvent),
+    /// Emitted when there is a delta (partial update) to the arguments of an MCP tool call.
+    #[serde(rename = "response.mcp_call_arguments.delta")]
+    ResponseMCPCallArgumentsDelta(ResponseMCPCallArgumentsDeltaEvent),
+    /// Emitted when the arguments for an MCP tool call are finalized.
+    #[serde(rename = "response.mcp_call_arguments.done")]
+    ResponseMCPCallArgumentsDone(ResponseMCPCallArgumentsDoneEvent),
+    /// Emitted when an MCP tool call has completed successfully.
+    #[serde(rename = "response.mcp_call.completed")]
+    ResponseMCPCallCompleted(ResponseMCPCallCompletedEvent),
+    /// Emitted when an MCP tool call has failed.
+    #[serde(rename = "response.mcp_call.failed")]
+    ResponseMCPCallFailed(ResponseMCPCallFailedEvent),
+    /// Emitted when an MCP tool call is in progress.
+    #[serde(rename = "response.mcp_call.in_progress")]
+    ResponseMCPCallInProgress(ResponseMCPCallInProgressEvent),
+    /// Emitted when the list of available MCP tools has been successfully retrieved.
+    #[serde(rename = "response.mcp_list_tools.completed")]
+    ResponseMCPListToolsCompleted(ResponseMCPListToolsCompletedEvent),
+    /// Emitted when the attempt to list available MCP tools has failed.
+    #[serde(rename = "response.mcp_list_tools.failed")]
+    ResponseMCPListToolsFailed(ResponseMCPListToolsFailedEvent),
+    /// Emitted when the system is in the process of retrieving the list of available MCP tools.
+    #[serde(rename = "response.mcp_list_tools.in_progress")]
+    ResponseMCPListToolsInProgress(ResponseMCPListToolsInProgressEvent),
+    /// Emitted when a code interpreter call is in progress.
+    #[serde(rename = "response.code_interpreter_call.in_progress")]
+    ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgressEvent),
+    /// Emitted when the code interpreter is actively interpreting the code snippet.
+    #[serde(rename = "response.code_interpreter_call.interpreting")]
+    ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpretingEvent),
+    /// Emitted when the code interpreter call is completed.
+    #[serde(rename = "response.code_interpreter_call.completed")]
+    ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompletedEvent),
+    /// Emitted when a partial code snippet is streamed by the code interpreter.
+    #[serde(rename = "response.code_interpreter_call_code.delta")]
+    ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDeltaEvent),
+    /// Emitted when the code snippet is finalized by the code interpreter.
+    #[serde(rename = "response.code_interpreter_call_code.done")]
+    ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDoneEvent),
+    /// Emitted when an annotation is added to output text content.
+    #[serde(rename = "response.output_text.annotation.added")]
+    ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAddedEvent),
+    /// Emitted when a response is queued and waiting to be processed.
+    #[serde(rename = "response.queued")]
+    ResponseQueued(ResponseQueuedEvent),
+    /// Event representing a delta (partial update) to the input of a custom tool call.
+    #[serde(rename = "response.custom_tool_call_input.delta")]
+    ResponseCustomToolCallInputDelta(ResponseCustomToolCallInputDeltaEvent),
+    /// Event indicating that input for a custom tool call is complete.
+    #[serde(rename = "response.custom_tool_call_input.done")]
+    ResponseCustomToolCallInputDone(ResponseCustomToolCallInputDoneEvent),
+    /// Emitted when an error occurs.
+    #[serde(rename = "error")]
+    ResponseError(ResponseErrorEvent),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCreatedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseInProgressEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCompletedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFailedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseIncompleteEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseOutputItemAddedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseOutputItemDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseContentPartAddedEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: OutputContent,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseContentPartDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: OutputContent,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+    pub logprobs: Option<Vec<ResponseLogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub text: String,
+    pub logprobs: Option<Vec<ResponseLogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseRefusalDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseRefusalDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub refusal: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFunctionCallArgumentsDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFunctionCallArgumentsDoneEvent {
+    /// <https://github.com/64bit/async-openai/issues/472>
+    pub name: Option<String>,
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFileSearchCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFileSearchCallSearchingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseFileSearchCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseWebSearchCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseWebSearchCallSearchingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseWebSearchCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningSummaryPartAddedEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: SummaryPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningSummaryPartDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: SummaryPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningSummaryTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningSummaryTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningTextDeltaEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseReasoningTextDoneEvent {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseImageGenCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseImageGenCallGeneratingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseImageGenCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseImageGenCallPartialImageEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub partial_image_index: u32,
+    pub partial_image_b64: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPCallArgumentsDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPCallArgumentsDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPCallFailedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPListToolsCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPListToolsFailedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseMCPListToolsInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCodeInterpreterCallInProgressEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCodeInterpreterCallInterpretingEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCodeInterpreterCallCompletedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCodeInterpreterCallCodeDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCodeInterpreterCallCodeDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub code: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseOutputTextAnnotationAddedEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub annotation_index: u32,
+    pub item_id: String,
+    pub annotation: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseQueuedEvent {
+    pub sequence_number: u64,
+    pub response: Response,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCustomToolCallInputDeltaEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseCustomToolCallInputDoneEvent {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub input: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
+pub struct ResponseErrorEvent {
+    pub sequence_number: u64,
+    pub code: Option<String>,
+    pub message: String,
+    pub param: Option<String>,
+}
--- a/lib/async-openai/src/types/shared/custom_grammar_format_param.rs
+++ b/lib/async-openai/src/types/shared/custom_grammar_format_param.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use crate::error::OpenAIError;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Default)]
+#[serde(rename_all = "lowercase")]
+pub enum GrammarSyntax {
+    Lark,
+    #[default]
+    Regex,
+}
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CustomGrammarFormatParam {
+    /// The grammar definition.
+    pub definition: String,
+    /// The syntax of the grammar definition. One of `lark` or `regex`.
+    pub syntax: GrammarSyntax,
+}
--- a/lib/async-openai/src/types/shared/filter.rs
+++ b/lib/async-openai/src/types/shared/filter.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+/// Filters for file search.
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum Filter {
+    /// A filter used to compare a specified attribute key to a given value using a defined
+    /// comparison operation.
+    Comparison(ComparisonFilter),
+    /// Combine multiple filters using and or or.
+    Compound(CompoundFilter),
+}
+
+/// Single comparison filter.
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComparisonFilter {
+    /// Specifies the comparison operator
+    #[serde(rename = "type")]
+    pub op: ComparisonType,
+    /// The key to compare against the value.
+    pub key: String,
+    /// The value to compare against the attribute key; supports string, number, or boolean types.
+    pub value: serde_json::Value,
+}
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+pub enum ComparisonType {
+    #[serde(rename = "eq")]
+    Equals,
+    #[serde(rename = "ne")]
+    NotEquals,
+    #[serde(rename = "gt")]
+    GreaterThan,
+    #[serde(rename = "gte")]
+    GreaterThanOrEqualTo,
+    #[serde(rename = "lt")]
+    LessThan,
+    #[serde(rename = "lte")]
+    LessThanOrEqualTo,
+}
+
+/// Combine multiple filters.
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CompoundFilter {
+    /// Type of operation
+    #[serde(rename = "type")]
+    pub op: CompoundType,
+    /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter.
+    #[schema(no_recursion)]
+    pub filters: Vec<Filter>,
+}
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum CompoundType {
+    And,
+    Or,
+}
--- a/lib/async-openai/src/types/shared/mod.rs
+++ b/lib/async-openai/src/types/shared/mod.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+mod custom_grammar_format_param;
+mod filter;
+mod response_usage;
+
+pub use custom_grammar_format_param::*;
+pub use filter::*;
+pub use response_usage::*;
+
+// Re-export types that already exist in the crate
+pub use crate::types::CompletionTokensDetails;
+pub use crate::types::ImageDetail;
+pub use crate::types::PromptTokensDetails;
+pub use crate::types::ReasoningEffort;
+pub use crate::types::ResponseFormatJsonSchema;
--- a/lib/async-openai/src/types/shared/response_usage.rs
+++ b/lib/async-openai/src/types/shared/response_usage.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputTokenDetails {
+    /// The number of tokens that were retrieved from the cache.
+    pub cached_tokens: u32,
+}
+
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct OutputTokenDetails {
+    /// The number of reasoning tokens.
+    pub reasoning_tokens: u32,
+}
+
+/// Usage statistics for a response.
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ResponseUsage {
+    /// The number of input tokens.
+    pub input_tokens: u32,
+    /// A detailed breakdown of the input tokens.
+    pub input_tokens_details: InputTokenDetails,
+    /// The number of output tokens.
+    pub output_tokens: u32,
+    /// A detailed breakdown of the output tokens.
+    pub output_tokens_details: OutputTokenDetails,
+    /// The total number of tokens used.
+    pub total_tokens: u32,
+}
--- a/lib/llm/src/http/service/openai.rs
+++ b/lib/llm/src/http/service/openai.rs
@@ -50,7 +50,7 @@ use crate::protocols::openai::{
    completions::{NvCreateCompletionRequest, NvCreateCompletionResponse},
    embeddings::{NvCreateEmbeddingRequest, NvCreateEmbeddingResponse},
    images::{NvCreateImageRequest, NvImagesResponse},
-    responses::{NvCreateResponse, NvResponse},
+    responses::{NvCreateResponse, NvResponse, ResponseParams, chat_completion_to_response},
 };
 use crate::request_template::RequestTemplate;
 use crate::types::Annotated;
@@ -876,7 +876,7 @@ async fn chat_completions(
    // Handle unsupported fields - if Some(resp) is returned by
    // validate_chat_completion_unsupported_fields,
    // then a field was used that is unsupported. We will log an error message
-    // and early return a 501 NOT_IMPLEMENTED status code. Otherwise, proceeed.
+    // and early return a 501 NOT_IMPLEMENTED status code. Otherwise, proceed.
    validate_chat_completion_unsupported_fields(&request)?;

    // Handle required fields like messages shouldn't be empty.
@@ -1151,19 +1151,20 @@ async fn handler_responses(
    request.nvext = apply_header_routing_overrides(request.nvext.take(), &headers);

    // create the context for the request
-    let request_id = get_or_create_request_id(request.inner.user.as_deref(), &headers);
+    let request_id = get_or_create_request_id(None, &headers);
    let request = Context::with_id(request, request_id);
    let context = request.context();

    // create the connection handles
-    let (mut connection_handle, _stream_handle) =
+    let (mut connection_handle, stream_handle) =
        create_connection_monitor(context.clone(), Some(state.metrics_clone())).await;

-    let response = tokio::spawn(responses(state, template, request).in_current_span())
+    let response =
+        tokio::spawn(responses(state, template, request, stream_handle).in_current_span())
            .await
            .map_err(|e| {
                ErrorMessage::internal_server_error(&format!(
-                "Failed to await chat completions task: {:?}",
+                    "Failed to await responses task: {:?}",
                    e,
                ))
            })?;
@@ -1180,48 +1181,62 @@ async fn responses(
    state: Arc<service_v2::State>,
    template: Option<RequestTemplate>,
    mut request: Context<NvCreateResponse>,
+    mut stream_handle: ConnectionHandle,
 ) -> Result<Response, ErrorResponse> {
    // return a 503 if the service is not ready
    check_ready(&state)?;

    // Create http_queue_guard early - tracks time waiting to be processed
-    let model = request.inner.model.clone();
+    // model is Option<String> in upstream; extract to String, defaulting to empty
+    let model = request.inner.model.clone().unwrap_or_default();
    let http_queue_guard = state.metrics_clone().create_http_queue_guard(&model);

    // Handle unsupported fields - if Some(resp) is returned by validate_unsupported_fields,
    // then a field was used that is unsupported. We will log an error message
-    // and early return a 501 NOT_IMPLEMENTED status code. Otherwise, proceeed.
+    // and early return a 501 NOT_IMPLEMENTED status code. Otherwise, proceed.
    if let Some(resp) = validate_response_unsupported_fields(&request) {
        return Ok(resp.into_response());
    }

-    // Handle non-text (image, audio, file) inputs - if Some(resp) is returned by
-    // validate_input_is_text_only, then we are handling something other than Input::Text(_).
-    // We will log an error message and early return a 501 NOT_IMPLEMENTED status code.
-    // Otherwise, proceeed.
-    if let Some(resp) = validate_response_input_is_text_only(&request) {
-        return Ok(resp.into_response());
-    }
+    // Apply template values if present, with sensible defaults for the Responses API.
+    // Unlike chat completions where backends may have their own defaults, the Responses API
+    // should provide a generous default to avoid truncated responses (especially with
+    // reasoning models that emit <think> tokens).
+    const DEFAULT_MAX_OUTPUT_TOKENS: u32 = 4096;

-    // Apply template values if present
    if let Some(template) = template {
-        if request.inner.model.is_empty() {
-            request.inner.model = template.model.clone();
+        if request.inner.model.as_deref().unwrap_or("").is_empty() {
+            request.inner.model = Some(template.model.clone());
        }
-        if request.inner.temperature.unwrap_or(0.0) == 0.0 {
+        if request.inner.temperature.is_none() {
            request.inner.temperature = Some(template.temperature);
        }
-        if request.inner.max_output_tokens.unwrap_or(0) == 0 {
+        if request.inner.max_output_tokens.is_none() {
            request.inner.max_output_tokens = Some(template.max_completion_tokens);
        }
-    }
-    tracing::trace!("Received chat completions request: {:?}", request.inner);
+    } else if request.inner.max_output_tokens.is_none() {
+        request.inner.max_output_tokens = Some(DEFAULT_MAX_OUTPUT_TOKENS);
+    }
+    tracing::trace!("Received responses request: {:?}", request.inner);
+
+    // Extract request parameters before into_parts() consumes the request.
+    // These are echoed back in the Response object per the OpenAI spec.
+    let response_params = ResponseParams {
+        temperature: request.inner.temperature,
+        top_p: request.inner.top_p,
+        max_output_tokens: request.inner.max_output_tokens,
+        store: request.inner.store,
+        tools: request.inner.tools.clone(),
+        tool_choice: request.inner.tool_choice.clone(),
+        instructions: request.inner.instructions.clone(),
+    };

+    let streaming = request.inner.stream.unwrap_or(false);
    let request_id = request.id().to_string();
-    let (request, context) = request.into_parts();
+    let (orig_request, context) = request.into_parts();

-    let mut request: NvCreateChatCompletionRequest =
-        request.try_into().map_err(|e: anyhow::Error| {
+    let mut chat_request: NvCreateChatCompletionRequest =
+        orig_request.try_into().map_err(|e: anyhow::Error| {
            tracing::error!(
                request_id,
                error = %e,
@@ -1229,15 +1244,18 @@ async fn responses(
            );
            ErrorMessage::not_implemented_error(
                VALIDATION_PREFIX.to_string()
-                    + "Only Input::Text(_) is currently supported: "
+                    + "Failed to convert responses request: "
                    + &e.to_string(),
            )
        })?;

-    let request = context.map(|mut _req| {
-        request.inner.stream = Some(false);
-        request
-    });
+    // For non-streaming responses, we still use internal streaming for aggregation,
+    // but we set the chat completion stream flag appropriately.
+    if !streaming {
+        chat_request.inner.stream = Some(true); // Internal streaming for aggregation
+    }
+
+    let request = context.map(|mut _req| chat_request);

    tracing::trace!("Getting chat completions engine for model: {}", model);

@@ -1250,24 +1268,121 @@ async fn responses(

    let mut response_collector = state.metrics_clone().create_response_collector(&model);

-    tracing::trace!("Issuing generate call for chat completions");
+    tracing::trace!("Issuing generate call for responses");

    // issue the generate call on the engine
-    let stream = engine
+    let engine_stream = engine
        .generate(request)
        .await
        .map_err(|e| ErrorMessage::from_anyhow(e, "Failed to generate completions"))?;

+    // Capture the context to cancel the stream if the client disconnects
+    let ctx = engine_stream.context();
+
    // Create inflight_guard now that actual processing has begun
    let mut inflight_guard =
        state
            .metrics_clone()
-            .create_inflight_guard(&model, Endpoint::Responses, false);
+            .create_inflight_guard(&model, Endpoint::Responses, streaming);
+
+    if streaming {
+        // For streaming responses, we return HTTP 200 immediately without checking for errors.
+        // Once HTTP 200 OK is sent, we cannot change the status code, so any backend errors
+        // must be delivered as SSE events in the stream. This is standard SSE behavior.
+        stream_handle.arm(); // allows the system to detect client disconnects and cancel the LLM generation
+
+        // Streaming path: convert chat completion stream chunks to Responses API SSE events.
+        // The engine yields Annotated<NvCreateChatCompletionStreamResponse>. We extract the
+        // inner stream response data and convert it to Responses API events.
+        use crate::protocols::openai::responses::stream_converter::ResponseStreamConverter;
+        use std::sync::atomic::{AtomicBool, Ordering};
+
+        let mut converter = ResponseStreamConverter::new(model.clone(), response_params);
+        let start_events = converter.emit_start_events();
+
+        // Use std::sync::Mutex (not tokio) since process_chunk/emit_end_events are
+        // synchronous -- no .await while lock is held. Avoids async lock overhead per token.
+        let converter = std::sync::Arc::new(std::sync::Mutex::new(converter));
+        let converter_end = converter.clone();
+
+        // Track whether the backend sent an error event during the stream.
+        // Shared between event_stream (writer) and done_stream (reader).
+        let saw_error = std::sync::Arc::new(AtomicBool::new(false));
+        let saw_error_end = saw_error.clone();

-    // Process stream to collect metrics and drop http_queue_guard on first token
        let mut http_queue_guard = Some(http_queue_guard);
-    let stream = stream.inspect(move |response| {
-        // Calls observe_response() on each token - drops http_queue_guard on first token
+
+        // Process each annotated chunk: extract the stream response data, convert to events
+        let event_stream = engine_stream
+            .inspect(move |response| {
+                process_response_and_observe_metrics(
+                    response,
+                    &mut response_collector,
+                    &mut http_queue_guard,
+                );
+            })
+            .filter_map(move |annotated_chunk| {
+                let converter = converter.clone();
+                let saw_error = saw_error.clone();
+                async move {
+                    // Check for backend error before extracting data.
+                    // Error events have data: None and event: Some("error").
+                    if annotated_chunk.data.is_none() {
+                        if annotated_chunk.event.as_deref() == Some("error") {
+                            saw_error.store(true, Ordering::Release);
+                        }
+                        return None;
+                    }
+                    let stream_resp = annotated_chunk.data?;
+                    let mut conv = converter.lock().expect("converter lock poisoned");
+                    let events = conv.process_chunk(&stream_resp);
+                    Some(stream::iter(events))
+                }
+            })
+            .flatten();
+
+        // Chain: start_events -> chunk_events -> end_events
+        let start_stream = stream::iter(start_events);
+
+        let done_stream = stream::once(async move {
+            let mut conv = converter_end.lock().expect("converter lock poisoned");
+            let end_events = if saw_error_end.load(Ordering::Acquire) {
+                conv.emit_error_events()
+            } else {
+                conv.emit_end_events()
+            };
+            stream::iter(end_events)
+        })
+        .flatten();
+
+        let full_stream = start_stream.chain(event_stream).chain(done_stream);
+
+        let full_stream = full_stream.map(|result| result.map_err(axum::Error::new));
+
+        // Wrap with disconnect monitoring: detects client disconnects, cancels generation,
+        // and defers inflight_guard.mark_ok() until the stream completes.
+        let stream = monitor_for_disconnects(full_stream, ctx, inflight_guard, stream_handle);
+
+        let mut sse_stream = Sse::new(stream);
+        if let Some(keep_alive) = state.sse_keep_alive() {
+            sse_stream = sse_stream.keep_alive(KeepAlive::default().interval(keep_alive));
+        }
+
+        Ok(sse_stream.into_response())
+    } else {
+        // Non-streaming path: aggregate stream into single response
+
+        // Check first event for backend errors before aggregating (non-streaming only)
+        let stream_with_check =
+            check_for_backend_error(engine_stream)
+                .await
+                .map_err(|error_response| {
+                    tracing::error!(request_id, "Backend error detected: {:?}", error_response);
+                    error_response
+                })?;
+
+        let mut http_queue_guard = Some(http_queue_guard);
+        let stream = stream_with_check.inspect(move |response| {
            process_response_and_observe_metrics(
                response,
                &mut response_collector,
@@ -1275,24 +1390,20 @@ async fn responses(
            );
        });

-    // TODO: handle streaming, currently just unary
        let response =
            NvCreateChatCompletionResponse::from_annotated_stream(stream, parsing_options.clone())
                .await
                .map_err(|e| {
-                tracing::error!(
-                    request_id,
-                    "Failed to fold chat completions stream for: {:?}",
-                    e
-                );
+                    tracing::error!(request_id, "Failed to fold responses stream: {:?}", e);
                    ErrorMessage::internal_server_error(&format!(
-                    "Failed to fold chat completions stream: {}",
+                        "Failed to fold responses stream: {}",
                        e
                    ))
                })?;

        // Convert NvCreateChatCompletionResponse --> NvResponse
-    let response: NvResponse = response.try_into().map_err(|e| {
+        let response: NvResponse = chat_completion_to_response(response, &response_params)
+            .map_err(|e| {
                tracing::error!(
                    request_id,
                    "Failed to convert NvCreateChatCompletionResponse to NvResponse: {:?}",
@@ -1304,17 +1415,6 @@ async fn responses(
        inflight_guard.mark_ok();

        Ok(Json(response).into_response())
-}
-
-pub fn validate_response_input_is_text_only(
-    request: &NvCreateResponse,
-) -> Option<impl IntoResponse> {
-    match &request.inner.input {
-        dynamo_async_openai::types::responses::Input::Text(_) => None,
-        _ => Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string()
-                + "Only `Input::Text` is supported. Structured, multimedia, or custom input types are not yet implemented.",
-        )),
    }
 }

@@ -1335,16 +1435,6 @@ pub fn validate_response_unsupported_fields(
            VALIDATION_PREFIX.to_string() + "`include` is not supported.",
        ));
    }
-    if inner.instructions.is_some() {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`instructions` is not supported.",
-        ));
-    }
-    if inner.max_tool_calls.is_some() {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`max_tool_calls` is not supported.",
-        ));
-    }
    if inner.previous_response_id.is_some() {
        return Some(ErrorMessage::not_implemented_error(
            VALIDATION_PREFIX.to_string() + "`previous_response_id` is not supported.",
@@ -1370,37 +1460,16 @@ pub fn validate_response_unsupported_fields(
            VALIDATION_PREFIX.to_string() + "`store: true` is not supported.",
        ));
    }
-    if inner.stream == Some(true) {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`stream: true` is not supported.",
-        ));
-    }
    if inner.text.is_some() {
        return Some(ErrorMessage::not_implemented_error(
            VALIDATION_PREFIX.to_string() + "`text` is not supported.",
        ));
    }
-    if inner.tool_choice.is_some() {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`tool_choice` is not supported.",
-        ));
-    }
-    if inner.tools.is_some() {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`tools` is not supported.",
-        ));
-    }
    if inner.truncation.is_some() {
        return Some(ErrorMessage::not_implemented_error(
            VALIDATION_PREFIX.to_string() + "`truncation` is not supported.",
        ));
    }
-    if inner.user.is_some() {
-        return Some(ErrorMessage::not_implemented_error(
-            VALIDATION_PREFIX.to_string() + "`user` is not supported.",
-        ));
-    }
-
    None
 }

@@ -1545,6 +1614,7 @@ pub fn responses_router(
    let router = Router::new()
        .route(&path, post(handler_responses))
        .layer(middleware::from_fn(smart_json_error_middleware))
+        .layer(axum::extract::DefaultBodyLimit::max(get_body_limit()))
        .with_state((state, template));
    (vec![doc], router)
 }
@@ -1652,9 +1722,8 @@ mod tests {
    use crate::protocols::openai::completions::NvCreateCompletionRequest;
    use crate::protocols::openai::responses::NvCreateResponse;
    use dynamo_async_openai::types::responses::{
-        CreateResponse, Input, InputContent, InputItem, InputMessage, PromptConfig,
-        Role as ResponseRole, ServiceTier, TextConfig, TextResponseFormat, ToolChoice,
-        ToolChoiceMode, Truncation,
+        CreateResponse, IncludeEnum, Input, PromptConfig, ServiceTier, TextConfig,
+        TextResponseFormat, Truncation,
    };
    use dynamo_async_openai::types::{
        ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
@@ -1679,28 +1748,8 @@ mod tests {
        NvCreateResponse {
            inner: CreateResponse {
                input: Input::Text("hello".into()),
-                model: "test-model".into(),
-                background: None,
-                include: None,
-                instructions: None,
-                max_output_tokens: None,
-                max_tool_calls: None,
-                metadata: None,
-                parallel_tool_calls: None,
-                previous_response_id: None,
-                prompt: None,
-                reasoning: None,
-                service_tier: None,
-                store: None,
-                stream: None,
-                text: None,
-                tool_choice: None,
-                tools: None,
-                truncation: None,
-                user: None,
-                temperature: None,
-                top_logprobs: None,
-                top_p: None,
+                model: Some("test-model".into()),
+                ..Default::default()
            },
            nvext: None,
        }
@@ -1763,25 +1812,6 @@ mod tests {
        );
    }

-    #[test]
-    fn test_validate_input_is_text_only_accepts_text() {
-        let request = make_base_request();
-        let result = validate_response_input_is_text_only(&request);
-        assert!(result.is_none());
-    }
-
-    #[test]
-    fn test_validate_input_is_text_only_rejects_items() {
-        let mut request = make_base_request();
-        request.inner.input = Input::Items(vec![InputItem::Message(InputMessage {
-            kind: Default::default(),
-            role: ResponseRole::User,
-            content: InputContent::TextInput("structured".into()),
-        })]);
-        let result = validate_response_input_is_text_only(&request);
-        assert!(result.is_some());
-    }
-
    #[test]
    fn test_validate_unsupported_fields_accepts_clean_request() {
        let request = make_base_request();
@@ -1804,13 +1834,8 @@ mod tests {
            ("background", Box::new(|r| r.background = Some(true))),
            (
                "include",
-                Box::new(|r| r.include = Some(vec!["file_search_call.results".into()])),
+                Box::new(|r| r.include = Some(vec![IncludeEnum::FileSearchCallResults])),
            ),
-            (
-                "instructions",
-                Box::new(|r| r.instructions = Some("System prompt".into())),
-            ),
-            ("max_tool_calls", Box::new(|r| r.max_tool_calls = Some(3))),
            (
                "previous_response_id",
                Box::new(|r| r.previous_response_id = Some("prev-id".into())),
@@ -1834,25 +1859,19 @@ mod tests {
                Box::new(|r| r.service_tier = Some(ServiceTier::Auto)),
            ),
            ("store", Box::new(|r| r.store = Some(true))),
-            ("stream", Box::new(|r| r.stream = Some(true))),
            (
                "text",
                Box::new(|r| {
                    r.text = Some(TextConfig {
                        format: TextResponseFormat::Text,
+                        verbosity: None,
                    })
                }),
            ),
-            (
-                "tool_choice",
-                Box::new(|r| r.tool_choice = Some(ToolChoice::Mode(ToolChoiceMode::Required))),
-            ),
-            ("tools", Box::new(|r| r.tools = Some(vec![]))),
            (
                "truncation",
                Box::new(|r| r.truncation = Some(Truncation::Auto)),
            ),
-            ("user", Box::new(|r| r.user = Some("user-id".into()))),
        ];

        for (field, set_field) in unsupported_cases {

--- a/lib/llm/src/protocols/openai/responses.rs
+++ b/lib/llm/src/protocols/openai/responses.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-use dynamo_async_openai::types::responses::{
-    Content, Input, OutputContent, OutputMessage, OutputStatus, OutputText, Response,
-    Role as ResponseRole, Status,
-};
-use dynamo_async_openai::types::{
-    ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
-    ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
-};
-use dynamo_runtime::protocols::annotated::AnnotationsProvider;
-use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-use uuid::Uuid;
-use validator::Validate;
-
-use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse};
-use super::nvext::{NvExt, NvExtProvider};
-use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
-
-#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
-pub struct NvCreateResponse {
-    /// Flattened CreateResponse fields (model, input, temperature, etc.)
-    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::responses::CreateResponse,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub nvext: Option<NvExt>,
-}
-
-#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
-pub struct NvResponse {
-    /// Flattened Response fields.
-    #[serde(flatten)]
-    pub inner: dynamo_async_openai::types::responses::Response,
-
-    /// NVIDIA extension field for response metadata (worker IDs, etc.)
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub nvext: Option<serde_json::Value>,
-}
-
-/// Implements `NvExtProvider` for `NvCreateResponse`,
-/// providing access to NVIDIA-specific extensions.
-impl NvExtProvider for NvCreateResponse {
-    /// Returns a reference to the optional `NvExt` extension, if available.
-    fn nvext(&self) -> Option<&NvExt> {
-        self.nvext.as_ref()
-    }
-
-    /// Returns `None`, as raw prompt extraction is not implemented.
-    fn raw_prompt(&self) -> Option<String> {
-        None
-    }
-}
-
-/// Implements `AnnotationsProvider` for `NvCreateResponse`,
-/// enabling retrieval and management of request annotations.
-impl AnnotationsProvider for NvCreateResponse {
-    /// Retrieves the list of annotations from `NvExt`, if present.
-    fn annotations(&self) -> Option<Vec<String>> {
-        self.nvext
-            .as_ref()
-            .and_then(|nvext| nvext.annotations.clone())
-    }
-
-    /// Checks whether a specific annotation exists in the request.
-    ///
-    /// # Arguments
-    /// * `annotation` - A string slice representing the annotation to check.
-    ///
-    /// # Returns
-    /// `true` if the annotation exists, `false` otherwise.
-    fn has_annotation(&self, annotation: &str) -> bool {
-        self.nvext
-            .as_ref()
-            .and_then(|nvext| nvext.annotations.as_ref())
-            .map(|annotations| annotations.contains(&annotation.to_string()))
-            .unwrap_or(false)
-    }
-}
-
-/// Implements `OpenAISamplingOptionsProvider` for `NvCreateResponse`,
-/// exposing OpenAI's sampling parameters for chat completion.
-impl OpenAISamplingOptionsProvider for NvCreateResponse {
-    /// Retrieves the temperature parameter for sampling, if set.
-    fn get_temperature(&self) -> Option<f32> {
-        self.inner.temperature
-    }
-
-    /// Retrieves the top-p (nucleus sampling) parameter, if set.
-    fn get_top_p(&self) -> Option<f32> {
-        self.inner.top_p
-    }
-
-    /// Retrieves the frequency penalty parameter, if set.
-    fn get_frequency_penalty(&self) -> Option<f32> {
-        None // TODO setting as None for now
-    }
-
-    /// Retrieves the presence penalty parameter, if set.
-    fn get_presence_penalty(&self) -> Option<f32> {
-        None // TODO setting as None for now
-    }
-
-    /// Returns a reference to the optional `NvExt` extension, if available.
-    fn nvext(&self) -> Option<&NvExt> {
-        self.nvext.as_ref()
-    }
-
-    fn get_seed(&self) -> Option<i64> {
-        None // TODO setting as None for now
-    }
-
-    fn get_n(&self) -> Option<u8> {
-        None // TODO setting as None for now
-    }
-
-    fn get_best_of(&self) -> Option<u8> {
-        None // TODO setting as None for now
-    }
-}
-
-/// Implements `OpenAIStopConditionsProvider` for `NvCreateResponse`,
-/// providing access to stop conditions that control chat completion behavior.
-impl OpenAIStopConditionsProvider for NvCreateResponse {
-    /// Retrieves the maximum number of tokens allowed in the response.
-    #[allow(deprecated)]
-    fn get_max_tokens(&self) -> Option<u32> {
-        self.inner.max_output_tokens
-    }
-
-    /// Retrieves the minimum number of tokens required in the response.
-    ///
-    /// # Note
-    /// This method is currently a placeholder and always returns `None`
-    /// since `min_tokens` is not an OpenAI-supported parameter.
-    fn get_min_tokens(&self) -> Option<u32> {
-        None
-    }
-
-    /// Retrieves the stop conditions that terminate the chat completion response.
-    ///
-    /// Converts OpenAI's `Stop` enum to a `Vec<String>`, normalizing the representation.
-    ///
-    /// # Returns
-    /// * `Some(Vec<String>)` if stop conditions are set.
-    /// * `None` if no stop conditions are defined.
-    fn get_stop(&self) -> Option<Vec<String>> {
-        None // TODO returning None for now
-    }
-
-    /// Returns a reference to the optional `NvExt` extension, if available.
-    fn nvext(&self) -> Option<&NvExt> {
-        self.nvext.as_ref()
-    }
-}
-
-impl TryFrom<NvCreateResponse> for NvCreateChatCompletionRequest {
-    type Error = anyhow::Error;
-
-    fn try_from(resp: NvCreateResponse) -> Result<Self, Self::Error> {
-        // Create messages from input
-        let input_text = match resp.inner.input {
-            Input::Text(text) => text,
-            Input::Items(_) => {
-                return Err(anyhow::anyhow!(
-                    "Input::Items not supported in conversion to NvCreateChatCompletionRequest"
-                ));
-            }
-        };
-
-        let messages = vec![ChatCompletionRequestMessage::User(
-            ChatCompletionRequestUserMessage {
-                content: ChatCompletionRequestUserMessageContent::Text(input_text),
-                name: None,
-            },
-        )];
-
-        // TODO: See this PR for details: https://github.com/64bit/async-openai/pull/398
-        let top_logprobs = convert_top_logprobs(resp.inner.top_logprobs);
-
-        // The below should encompass all of the allowed configurable parameters
-        Ok(NvCreateChatCompletionRequest {
-            inner: CreateChatCompletionRequest {
-                messages,
-                model: resp.inner.model,
-                temperature: resp.inner.temperature,
-                top_p: resp.inner.top_p,
-                max_completion_tokens: resp.inner.max_output_tokens,
-                top_logprobs,
-                metadata: resp.inner.metadata,
-                stream: Some(true), // Set this to Some(True) by default to aggregate stream
-                ..Default::default()
-            },
-            common: Default::default(),
-            nvext: resp.nvext,
-            chat_template_args: None,
-            media_io_kwargs: None,
-            unsupported_fields: Default::default(),
-        })
-    }
-}
-
-fn convert_top_logprobs(input: Option<u32>) -> Option<u8> {
-    input.map(|x| x.min(20) as u8)
-}
-
-impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
-    type Error = anyhow::Error;
-
-    fn try_from(nv_resp: NvCreateChatCompletionResponse) -> Result<Self, Self::Error> {
-        let chat_resp = nv_resp;
-
-        // Preserve nvext field from chat completion response
-        let nvext = chat_resp.nvext.clone();
-
-        let content_text = chat_resp
-            .choices
-            .into_iter()
-            .next()
-            .and_then(|choice| choice.message.content)
-            .unwrap_or_else(|| {
-                tracing::warn!("No choices in chat completion response, using empty content");
-                dynamo_async_openai::types::ChatCompletionMessageContent::Text(String::new())
-            });
-
-        // Extract text from content (only handle text for responses API)
-        let text_content = match content_text {
-            dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => text,
-            dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => {
-                tracing::warn!(
-                    "Multimodal content in responses API not yet supported, using placeholder"
-                );
-                "[multimodal content]".to_string()
-            }
-        };
-
-        let message_id = format!("msg_{}", Uuid::new_v4().simple());
-        let response_id = format!("resp_{}", Uuid::new_v4().simple());
-
-        let output = vec![OutputContent::Message(OutputMessage {
-            id: message_id,
-            role: ResponseRole::Assistant,
-            status: OutputStatus::Completed,
-            content: vec![Content::OutputText(OutputText {
-                text: text_content,
-                annotations: vec![],
-            })],
-        })];
-
-        let response = Response {
-            id: response_id,
-            object: "response".to_string(),
-            created_at: chat_resp.created as u64,
-            model: chat_resp.model,
-            status: Status::Completed,
-            output,
-            output_text: None,
-            parallel_tool_calls: None,
-            reasoning: None,
-            service_tier: None,
-            store: None,
-            truncation: None,
-            temperature: None,
-            top_p: None,
-            tools: None,
-            metadata: None,
-            previous_response_id: None,
-            error: None,
-            incomplete_details: None,
-            instructions: None,
-            max_output_tokens: None,
-            text: None,
-            tool_choice: None,
-            usage: None,
-            user: None,
-        };
-
-        Ok(NvResponse {
-            inner: response,
-            nvext,
-        })
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use dynamo_async_openai::types::responses::{CreateResponse, Input};
-    use dynamo_async_openai::types::{
-        ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent,
-    };
-
-    use super::*;
-    use crate::types::openai::chat_completions::NvCreateChatCompletionResponse;
-
-    fn make_response_with_input(text: &str) -> NvCreateResponse {
-        NvCreateResponse {
-            inner: CreateResponse {
-                input: Input::Text(text.into()),
-                model: "test-model".into(),
-                max_output_tokens: Some(1024),
-                temperature: Some(0.5),
-                top_p: Some(0.9),
-                top_logprobs: Some(15),
-                ..Default::default()
-            },
-            nvext: Some(NvExt {
-                annotations: Some(vec!["debug".into(), "trace".into()]),
-                ..Default::default()
-            }),
-        }
-    }
-
-    #[test]
-    fn test_annotations_trait_behavior() {
-        let req = make_response_with_input("hello");
-        assert_eq!(
-            req.annotations(),
-            Some(vec!["debug".to_string(), "trace".to_string()])
-        );
-        assert!(req.has_annotation("debug"));
-        assert!(req.has_annotation("trace"));
-        assert!(!req.has_annotation("missing"));
-    }
-
-    #[test]
-    fn test_openai_sampling_trait_behavior() {
-        let req = make_response_with_input("hello");
-        assert_eq!(req.get_temperature(), Some(0.5));
-        assert_eq!(req.get_top_p(), Some(0.9));
-        assert_eq!(req.get_frequency_penalty(), None);
-        assert_eq!(req.get_presence_penalty(), None);
-    }
-
-    #[test]
-    fn test_openai_stop_conditions_trait_behavior() {
-        let req = make_response_with_input("hello");
-        assert_eq!(req.get_max_tokens(), Some(1024));
-        assert_eq!(req.get_min_tokens(), None);
-        assert_eq!(req.get_stop(), None);
-    }
-
-    #[test]
-    fn test_into_nvcreate_chat_completion_request() {
-        let nv_req: NvCreateChatCompletionRequest =
-            make_response_with_input("hi there").try_into().unwrap();
-
-        assert_eq!(nv_req.inner.model, "test-model");
-        assert_eq!(nv_req.inner.temperature, Some(0.5));
-        assert_eq!(nv_req.inner.top_p, Some(0.9));
-        assert_eq!(nv_req.inner.max_completion_tokens, Some(1024));
-        assert_eq!(nv_req.inner.top_logprobs, Some(15));
-        assert_eq!(nv_req.inner.stream, Some(true));
-
-        let messages = &nv_req.inner.messages;
-        assert_eq!(messages.len(), 1);
-        match &messages[0] {
-            ChatCompletionRequestMessage::User(user_msg) => match &user_msg.content {
-                ChatCompletionRequestUserMessageContent::Text(t) => {
-                    assert_eq!(t, "hi there");
-                }
-                _ => panic!("unexpected user content type"),
-            },
-            _ => panic!("expected user message"),
-        }
-    }
-
-    #[allow(deprecated)]
-    #[test]
-    fn test_into_nvresponse_from_chat_response() {
-        let now = 1_726_000_000;
-        let chat_resp = NvCreateChatCompletionResponse {
-            id: "chatcmpl-xyz".into(),
-            choices: vec![dynamo_async_openai::types::ChatChoice {
-                index: 0,
-                message: dynamo_async_openai::types::ChatCompletionResponseMessage {
-                    content: Some(
-                        dynamo_async_openai::types::ChatCompletionMessageContent::Text(
-                            "This is a reply".to_string(),
-                        ),
-                    ),
-                    refusal: None,
-                    tool_calls: None,
-                    role: dynamo_async_openai::types::Role::Assistant,
-                    function_call: None,
-                    audio: None,
-                    reasoning_content: None,
-                },
-                finish_reason: None,
-                stop_reason: None,
-                logprobs: None,
-            }],
-            created: now,
-            model: "llama-3.1-8b-instruct".into(),
-            service_tier: None,
-            system_fingerprint: None,
-            object: "chat.completion".to_string(),
-            usage: None,
-            nvext: None,
-        };
-
-        let wrapped: NvResponse = chat_resp.try_into().unwrap();
-
-        assert_eq!(wrapped.inner.model, "llama-3.1-8b-instruct");
-        assert_eq!(wrapped.inner.status, Status::Completed);
-        assert_eq!(wrapped.inner.object, "response");
-        assert!(wrapped.inner.id.starts_with("resp_"));
-
-        let msg = match &wrapped.inner.output[0] {
-            OutputContent::Message(m) => m,
-            _ => panic!("Expected Message variant"),
-        };
-        assert_eq!(msg.role, ResponseRole::Assistant);
-
-        match &msg.content[0] {
-            Content::OutputText(txt) => {
-                assert_eq!(txt.text, "This is a reply");
-            }
-            _ => panic!("Expected OutputText content"),
-        }
-    }
-
-    #[test]
-    fn test_convert_top_logprobs_clamped() {
-        assert_eq!(convert_top_logprobs(Some(5)), Some(5));
-        assert_eq!(convert_top_logprobs(Some(21)), Some(20));
-        assert_eq!(convert_top_logprobs(Some(1000)), Some(20));
-        assert_eq!(convert_top_logprobs(None), None);
-    }
-}