chore: Bring async-openai into repo as request starter (#2520)

Co-authored-by: Graham King <grahamk@nvidia.com>

chore: Bring async-openai into repo as request starter (#2520)
Co-authored-by: Graham King <grahamk@nvidia.com>
199b9a30 · nachiketb-nvidia · GitHub · 26d9f159 · 199b9a30 · 199b9a30
Unverified Commit 199b9a30 authored Aug 19, 2025 by nachiketb-nvidia Committed by GitHub Aug 19, 2025
20 changed files
--- a/lib/async-openai/src/types/realtime/error.rs
+++ b/lib/async-openai/src/types/realtime/error.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RealtimeAPIError {
+    /// The type of error (e.g., "invalid_request_error", "server_error").
+    pub r#type: String,
+
+    /// Error code, if any.
+    pub code: Option<String>,
+
+    /// A human-readable error message.
+    pub message: String,
+
+    /// Parameter related to the error, if any.
+    pub param: Option<String>,
+
+    /// The event_id of the client event that caused the error, if applicable.
+    pub event_id: Option<String>,
+}
--- a/lib/async-openai/src/types/realtime/item.rs
+++ b/lib/async-openai/src/types/realtime/item.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ItemType {
+    Message,
+    FunctionCall,
+    FunctionCallOutput,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ItemStatus {
+    Completed,
+    InProgress,
+    Incomplete,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum ItemRole {
+    User,
+    Assistant,
+    System,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ItemContentType {
+    InputText,
+    InputAudio,
+    Text,
+    Audio,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ItemContent {
+    /// The content type ("input_text", "input_audio", "text", "audio").
+    pub r#type: ItemContentType,
+
+    /// The text content.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<String>,
+
+    /// Base64-encoded audio bytes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub audio: Option<String>,
+
+    /// The transcript of the audio.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub transcript: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Item {
+    /// The unique ID of the item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub id: Option<String>,
+
+    /// The type of the item ("message", "function_call", "function_call_output").
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub r#type: Option<ItemType>,
+
+    /// The status of the item ("completed", "in_progress", "incomplete").
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<ItemStatus>,
+
+    /// The role of the message sender ("user", "assistant", "system").
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub role: Option<ItemRole>,
+
+    /// The content of the message.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<Vec<ItemContent>>,
+
+    /// The ID of the function call (for "function_call" items).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub call_id: Option<String>,
+
+    /// The name of the function being called (for "function_call" items).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+
+    /// The arguments of the function call (for "function_call" items).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub arguments: Option<String>,
+
+    /// The output of the function call (for "function_call_output" items).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output: Option<String>,
+}
+
+impl TryFrom<serde_json::Value> for Item {
+    type Error = serde_json::Error;
+
+    fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> {
+        serde_json::from_value(value)
+    }
+}
--- a/lib/async-openai/src/types/realtime/mod.rs
+++ b/lib/async-openai/src/types/realtime/mod.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+mod client_event;
+mod content_part;
+mod conversation;
+mod error;
+mod item;
+mod rate_limit;
+mod response_resource;
+mod server_event;
+mod session_resource;
+
+pub use client_event::*;
+pub use content_part::*;
+pub use conversation::*;
+pub use error::*;
+pub use item::*;
+pub use rate_limit::*;
+pub use response_resource::*;
+pub use server_event::*;
+pub use session_resource::*;
--- a/lib/async-openai/src/types/realtime/rate_limit.rs
+++ b/lib/async-openai/src/types/realtime/rate_limit.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RateLimit {
+    /// The name of the rate limit ("requests", "tokens", "input_tokens", "output_tokens").
+    pub name: String,
+    /// The maximum allowed value for the rate limit.
+    pub limit: u32,
+    /// The remaining value before the limit is reached.
+    pub remaining: u32,
+    /// Seconds until the rate limit resets.
+    pub reset_seconds: f32,
+}
--- a/lib/async-openai/src/types/realtime/response_resource.rs
+++ b/lib/async-openai/src/types/realtime/response_resource.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+use super::item::Item;
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct Usage {
+    pub total_tokens: u32,
+    pub input_tokens: u32,
+    pub output_tokens: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum ResponseStatus {
+    InProgress,
+    Completed,
+    Cancelled,
+    Failed,
+    Incomplete,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct FailedError {
+    pub code: String,
+    pub message: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "snake_case")]
+pub enum IncompleteReason {
+    Interruption,
+    MaxOutputTokens,
+    ContentFilter,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum ResponseStatusDetail {
+    #[serde(rename = "incomplete")]
+    Incomplete { reason: IncompleteReason },
+    #[serde(rename = "failed")]
+    Failed { error: Option<FailedError> },
+    #[serde(rename = "cancelled")]
+    Cancelled { reason: String },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseResource {
+    /// The unique ID of the response.
+    pub id: String,
+    /// The object type, must be "realtime.response".
+    pub object: String,
+    /// The status of the response
+    pub status: ResponseStatus,
+    /// Additional details about the status.
+    pub status_details: Option<ResponseStatusDetail>,
+    /// The list of output items generated by the response.
+    pub output: Vec<Item>,
+    /// Usage statistics for the response.
+    pub usage: Option<Usage>,
+}
--- a/lib/async-openai/src/types/realtime/server_event.rs
+++ b/lib/async-openai/src/types/realtime/server_event.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+use super::{
+    content_part::ContentPart, conversation::Conversation, error::RealtimeAPIError, item::Item,
+    rate_limit::RateLimit, response_resource::ResponseResource, session_resource::SessionResource,
+};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ErrorEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// Details of the error.
+    pub error: RealtimeAPIError,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct SessionCreatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The session resource.
+    pub session: SessionResource,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct SessionUpdatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The updated session resource.
+    pub session: SessionResource,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationCreatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The conversation resource.
+    pub conversation: Conversation,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputAudioBufferCommitedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the preceding item after which the new item will be inserted.
+    pub previous_item_id: String,
+    /// The ID of the user message item that will be created.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputAudioBufferClearedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputAudioBufferSpeechStartedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// Milliseconds since the session started when speech was detected.
+    pub audio_start_ms: u32,
+    /// The ID of the user message item that will be created when speech stops.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct InputAudioBufferSpeechStoppedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// Milliseconds since the session started when speech stopped.
+    pub audio_end_ms: u32,
+    /// The ID of the user message item that will be created.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemCreatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the preceding item.
+    pub previous_item_id: Option<String>,
+    /// The item that was created.
+    pub item: Item,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+/// Log probability information for a transcribed token.
+pub struct LogProb {
+    /// Raw UTF-8 bytes for the token.
+    pub bytes: Vec<u8>,
+    /// The log probability of the token.
+    pub logprob: f64,
+    /// The token string.
+    pub token: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemInputAudioTranscriptionCompletedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the user message item.
+    pub item_id: String,
+    /// The index of the content part containing the audio.
+    pub content_index: u32,
+    /// The transcribed text.
+    pub transcript: String,
+    /// Optional per-token log probability data.
+    pub logprobs: Option<Vec<LogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemInputAudioTranscriptionDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the user message item.
+    pub item_id: String,
+    /// The index of the content part containing the audio.
+    pub content_index: u32,
+    /// The text delta.
+    pub delta: String,
+    /// Optional per-token log probability data.
+    pub logprobs: Option<Vec<LogProb>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemInputAudioTranscriptionFailedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the user message item.
+    pub item_id: String,
+    /// The index of the content part containing the audio.
+    pub content_index: u32,
+    /// Details of the transcription error.
+    pub error: RealtimeAPIError,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemTruncatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the assistant message item that was truncated.
+    pub item_id: String,
+    /// The index of the content part that was truncated.
+    pub content_index: u32,
+    /// The duration up to which the audio was truncated, in milliseconds.
+    pub audio_end_ms: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ConversationItemDeletedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the item that was deleted.
+    pub item_id: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseCreatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The response resource.
+    pub response: ResponseResource,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The response resource.
+    pub response: ResponseResource,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseOutputItemAddedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response to which the item belongs.
+    pub response_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The item that was added.
+    pub item: Item,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseOutputItemDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response to which the item belongs.
+    pub response_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The completed item.
+    pub item: Item,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseContentPartAddedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item to which the content part was added.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// The content part that was added.
+    pub part: ContentPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseContentPartDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item to which the content part was added.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// The content part that is done.
+    pub part: ContentPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseTextDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// The text delta.
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseTextDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// The final text content.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseAudioTranscriptDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// The text delta.
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseAudioTranscriptDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    ///The final transcript of the audio.
+    pub transcript: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseAudioDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+    /// Base64-encoded audio data delta.
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseAudioDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The index of the content part in the item's content array.
+    pub content_index: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseFunctionCallArgumentsDeltaEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the function call item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The ID of the function call.
+    pub call_id: String,
+    /// The arguments delta as a JSON string.
+    pub delta: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct ResponseFunctionCallArgumentsDoneEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    /// The ID of the response.
+    pub response_id: String,
+    /// The ID of the function call item.
+    pub item_id: String,
+    /// The index of the output item in the response.
+    pub output_index: u32,
+    /// The ID of the function call.
+    pub call_id: String,
+    /// The final arguments as a JSON string.
+    pub arguments: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub struct RateLimitsUpdatedEvent {
+    /// The unique ID of the server event.
+    pub event_id: String,
+    pub rate_limits: Vec<RateLimit>,
+}
+
+/// These are events emitted from the OpenAI Realtime WebSocket server to the client.
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum ServerEvent {
+    /// Returned when an error occurs.
+    #[serde(rename = "error")]
+    Error(ErrorEvent),
+
+    /// Returned when a session is created. Emitted automatically when a new connection is established.
+    #[serde(rename = "session.created")]
+    SessionCreated(SessionCreatedEvent),
+
+    /// Returned when a session is updated.
+    #[serde(rename = "session.updated")]
+    SessionUpdated(SessionUpdatedEvent),
+
+    /// Returned when a conversation is created. Emitted right after session creation.
+    #[serde(rename = "conversation.created")]
+    ConversationCreated(ConversationCreatedEvent),
+
+    /// Returned when an input audio buffer is committed, either by the client or automatically in server VAD mode.
+    #[serde(rename = "input_audio_buffer.committed")]
+    InputAudioBufferCommited(InputAudioBufferCommitedEvent),
+
+    /// Returned when the input audio buffer is cleared by the client.
+    #[serde(rename = "input_audio_buffer.cleared")]
+    InputAudioBufferCleared(InputAudioBufferClearedEvent),
+
+    /// Returned in server turn detection mode when speech is detected.
+    #[serde(rename = "input_audio_buffer.speech_started")]
+    InputAudioBufferSpeechStarted(InputAudioBufferSpeechStartedEvent),
+
+    /// Returned in server turn detection mode when speech stops.
+    #[serde(rename = "input_audio_buffer.speech_stopped")]
+    InputAudioBufferSpeechStopped(InputAudioBufferSpeechStoppedEvent),
+
+    /// Returned when a conversation item is created.
+    #[serde(rename = "conversation.item.created")]
+    ConversationItemCreated(ConversationItemCreatedEvent),
+
+    /// Returned when input audio transcription is enabled and a transcription succeeds.
+    #[serde(rename = "conversation.item.input_audio_transcription.completed")]
+    ConversationItemInputAudioTranscriptionCompleted(
+        ConversationItemInputAudioTranscriptionCompletedEvent,
+    ),
+
+    #[serde(rename = "conversation.item.input_audio_transcription.delta")]
+    ConversationItemInputAudioTranscriptionDelta(ConversationItemInputAudioTranscriptionDeltaEvent),
+
+    /// Returned when input audio transcription is configured, and a transcription request for a user message failed.
+    #[serde(rename = "conversation.item.input_audio_transcription.failed")]
+    ConversationItemInputAudioTranscriptionFailed(
+        ConversationItemInputAudioTranscriptionFailedEvent,
+    ),
+
+    /// Returned when an earlier assistant audio message item is truncated by the client.
+    #[serde(rename = "conversation.item.truncated")]
+    ConversationItemTruncated(ConversationItemTruncatedEvent),
+
+    /// Returned when an item in the conversation is deleted.
+    #[serde(rename = "conversation.item.deleted")]
+    ConversationItemDeleted(ConversationItemDeletedEvent),
+
+    /// Returned when a new Response is created. The first event of response creation, where the response is in an initial state of "in_progress".
+    #[serde(rename = "response.created")]
+    ResponseCreated(ResponseCreatedEvent),
+
+    /// Returned when a Response is done streaming. Always emitted, no matter the final state.
+    #[serde(rename = "response.done")]
+    ResponseDone(ResponseDoneEvent),
+
+    /// Returned when a new Item is created during response generation.
+    #[serde(rename = "response.output_item.added")]
+    ResponseOutputItemAdded(ResponseOutputItemAddedEvent),
+
+    /// Returned when an Item is done streaming. Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.output_item.done")]
+    ResponseOutputItemDone(ResponseOutputItemDoneEvent),
+
+    /// Returned when a new content part is added to an assistant message item during response generation.
+    #[serde(rename = "response.content_part.added")]
+    ResponseContentPartAdded(ResponseContentPartAddedEvent),
+
+    /// Returned when a content part is done streaming in an assistant message item.
+    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.content_part.done")]
+    ResponseContentPartDone(ResponseContentPartDoneEvent),
+
+    /// Returned when the text value of a "text" content part is updated.
+    #[serde(rename = "response.text.delta")]
+    ResponseTextDelta(ResponseTextDeltaEvent),
+
+    /// Returned when the text value of a "text" content part is done streaming.
+    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.text.done")]
+    ResponseTextDone(ResponseTextDoneEvent),
+
+    /// Returned when the model-generated transcription of audio output is updated.
+    #[serde(rename = "response.audio_transcript.delta")]
+    ResponseAudioTranscriptDelta(ResponseAudioTranscriptDeltaEvent),
+
+    /// Returned when the model-generated transcription of audio output is done streaming.
+    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.audio_transcript.done")]
+    ResponseAudioTranscriptDone(ResponseAudioTranscriptDoneEvent),
+
+    /// Returned when the model-generated audio is updated.
+    #[serde(rename = "response.audio.delta")]
+    ResponseAudioDelta(ResponseAudioDeltaEvent),
+
+    /// Returned when the model-generated audio is done.
+    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.audio.done")]
+    ResponseAudioDone(ResponseAudioDoneEvent),
+
+    /// Returned when the model-generated function call arguments are updated.
+    #[serde(rename = "response.function_call_arguments.delta")]
+    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDeltaEvent),
+
+    /// Returned when the model-generated function call arguments are done streaming.
+    /// Also emitted when a Response is interrupted, incomplete, or cancelled.
+    #[serde(rename = "response.function_call_arguments.done")]
+    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDoneEvent),
+
+    /// Emitted after every "response.done" event to indicate the updated rate limits.
+    #[serde(rename = "rate_limits.updated")]
+    RateLimitsUpdated(RateLimitsUpdatedEvent),
+}
--- a/lib/async-openai/src/types/realtime/session_resource.rs
+++ b/lib/async-openai/src/types/realtime/session_resource.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub enum AudioFormat {
+    #[serde(rename = "pcm16")]
+    PCM16,
+    #[serde(rename = "g711_law")]
+    G711ULAW,
+    #[serde(rename = "g711_alaw")]
+    G711ALAW,
+}
+
+#[derive(Debug, Default, Serialize, Deserialize, Clone)]
+pub struct AudioTranscription {
+    /// The language of the input audio. Supplying the input language in ISO-639-1 (e.g. en) format will improve accuracy and latency.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub language: Option<String>,
+    /// The model to use for transcription, current options are gpt-4o-transcribe, gpt-4o-mini-transcribe, and whisper-1.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    /// An optional text to guide the model's style or continue a previous audio segment.
+    /// For whisper-1, the prompt is a list of keywords. For gpt-4o-transcribe models,
+    /// the prompt is a free text string, for example "expect words related to technology".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum TurnDetection {
+    /// Type of turn detection, only "server_vad" is currently supported.
+    #[serde(rename = "server_vad")]
+    ServerVAD {
+        /// Activation threshold for VAD (0.0 to 1.0).
+        threshold: f32,
+        /// Amount of audio to include before speech starts (in milliseconds).
+        prefix_padding_ms: u32,
+        /// Duration of silence to detect speech stop (in milliseconds).
+        silence_duration_ms: u32,
+
+        /// Whether or not to automatically generate a response when a VAD stop event occurs.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        create_response: Option<bool>,
+
+        /// Whether or not to automatically interrupt any ongoing response with output to
+        /// the default conversation (i.e. conversation of auto) when a VAD start event occurs.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        interrupt_response: Option<bool>,
+    },
+
+    #[serde(rename = "semantic_vad")]
+    SemanticVAD {
+        /// The eagerness of the model to respond.
+        /// `low` will wait longer for the user to continue speaking,
+        /// `high`` will respond more quickly. `auto`` is the default and is equivalent to `medium`
+        eagerness: String,
+
+        /// Whether or not to automatically generate a response when a VAD stop event occurs.
+        #[serde(skip_serializing_if = "Option::is_none", default)]
+        create_response: Option<bool>,
+
+        /// Whether or not to automatically interrupt any ongoing response with output to
+        /// the default conversation (i.e. conversation of auto) when a VAD start event occurs.
+        #[serde(skip_serializing_if = "Option::is_none", default)]
+        interrupt_response: Option<bool>,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+pub enum MaxResponseOutputTokens {
+    #[serde(rename = "inf")]
+    Inf,
+    #[serde(untagged)]
+    Num(u16),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(tag = "type")]
+pub enum ToolDefinition {
+    #[serde(rename = "function")]
+    Function {
+        /// The name of the function.
+        name: String,
+        /// The description of the function.
+        description: String,
+        /// Parameters of the function in JSON Schema.
+        parameters: serde_json::Value,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum FunctionType {
+    Function,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum ToolChoice {
+    Auto,
+    None,
+    Required,
+    #[serde(untagged)]
+    Function {
+        r#type: FunctionType,
+        name: String,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone)]
+#[serde(rename_all = "lowercase")]
+pub enum RealtimeVoice {
+    Alloy,
+    Ash,
+    Ballad,
+    Coral,
+    Echo,
+    Fable,
+    Onyx,
+    Nova,
+    Shimmer,
+    Verse,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Default)]
+pub struct SessionResource {
+    /// The default model used for this session.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// The set of modalities the model can respond with. To disable audio, set this to ["text"].
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub modalities: Option<Vec<String>>,
+
+    //// The default system instructions prepended to model calls.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// The voice the model uses to respond. Cannot be changed once the model has responded with audio at least once.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub voice: Option<RealtimeVoice>,
+
+    /// The format of input audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_audio_format: Option<AudioFormat>,
+
+    /// The format of output audio. Options are "pcm16", "g711_ulaw", or "g711_alaw".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_audio_format: Option<AudioFormat>,
+
+    /// Configuration for input audio transcription. Can be set to null to turn off.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_audio_transcription: Option<AudioTranscription>,
+
+    /// Configuration for turn detection. Can be set to null to turn off.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub turn_detection: Option<TurnDetection>,
+
+    /// Tools (functions) available to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    /// How the model chooses tools.
+    pub tool_choice: Option<ToolChoice>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    /// Sampling temperature for the model.
+    pub temperature: Option<f32>,
+
+    /// Maximum number of output tokens for a single assistant response, inclusive of tool calls.
+    /// Provide an integer between 1 and 4096 to limit output tokens, or "inf" for the maximum available tokens for a given model.
+    /// Defaults to "inf".
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_response_output_tokens: Option<MaxResponseOutputTokens>,
+}
--- a/lib/async-openai/src/types/responses.rs
+++ b/lib/async-openai/src/types/responses.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use crate::error::OpenAIError;
+pub use crate::types::{
+    CompletionTokensDetails, ImageDetail, PromptTokensDetails, ReasoningEffort,
+    ResponseFormatJsonSchema,
+};
+use derive_builder::Builder;
+use futures::Stream;
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use std::collections::HashMap;
+use std::pin::Pin;
+
+/// Role of messages in the API.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum Role {
+    User,
+    Assistant,
+    System,
+    Developer,
+}
+
+/// Status of input/output items.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum OutputStatus {
+    InProgress,
+    Completed,
+    Incomplete,
+}
+
+/// Input payload: raw text or structured context items.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum Input {
+    /// A text input to the model, equivalent to a text input with the user role.
+    Text(String),
+    /// A list of one or many input items to the model, containing different content types.
+    Items(Vec<InputItem>),
+}
+
+/// A context item: currently only messages.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged, rename_all = "snake_case")]
+pub enum InputItem {
+    Message(InputMessage),
+    Custom(serde_json::Value),
+}
+
+/// A message to prime the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "InputMessageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputMessage {
+    #[serde(default, rename = "type")]
+    pub kind: InputMessageType,
+    /// The role of the message input.
+    pub role: Role,
+    /// Text, image, or audio input to the model, used to generate a response. Can also contain
+    /// previous assistant responses.
+    pub content: InputContent,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum InputMessageType {
+    #[default]
+    Message,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum InputContent {
+    /// A text input to the model.
+    TextInput(String),
+    /// A list of one or many input items to the model, containing different content types.
+    InputItemContentList(Vec<ContentType>),
+}
+
+/// Parts of a message: text, image, file, or audio.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ContentType {
+    /// A text input to the model.
+    InputText(InputText),
+    /// An image input to the model.
+    InputImage(InputImage),
+    /// A file input to the model.
+    InputFile(InputFile),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputText {
+    text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "InputImageArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputImage {
+    /// The detail level of the image to be sent to the model.
+    detail: ImageDetail,
+    /// The ID of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_id: Option<String>,
+    /// The URL of the image to be sent to the model. A fully qualified URL or base64 encoded image
+    /// in a data URL.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    image_url: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "InputFileArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct InputFile {
+    /// The content of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_data: Option<String>,
+    /// The ID of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    file_id: Option<String>,
+    /// The name of the file to be sent to the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    filename: Option<String>,
+}
+
+/// Builder for a Responses API request.
+#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
+#[builder(
+    name = "CreateResponseArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateResponse {
+    /// Text, image, or file inputs to the model, used to generate a response.
+    pub input: Input,
+
+    /// Model ID used to generate the response, like `gpt-4o`.
+    /// OpenAI offers a wide range of models with different capabilities,
+    /// performance characteristics, and price points.
+    pub model: String,
+
+    /// Whether to run the model response in the background.
+    /// boolean or null.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<bool>,
+
+    /// Specify additional output data to include in the model response.
+    ///
+    /// Supported values:
+    /// - `file_search_call.results`
+    ///   Include the search results of the file search tool call.
+    /// - `message.input_image.image_url`
+    ///   Include image URLs from the input message.
+    /// - `computer_call_output.output.image_url`
+    ///   Include image URLs from the computer call output.
+    /// - `reasoning.encrypted_content`
+    ///   Include an encrypted version of reasoning tokens in reasoning item outputs.
+    ///   This enables reasoning items to be used in multi-turn conversations when
+    ///   using the Responses API statelessly (for example, when the `store` parameter
+    ///   is set to `false`, or when an organization is enrolled in the zero-data-
+    ///   retention program).
+    ///
+    /// If `None`, no additional data is returned.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub include: Option<Vec<String>>,
+
+    /// Inserts a system (or developer) message as the first item in the model's context.
+    ///
+    /// When using along with previous_response_id, the instructions from a previous response will
+    /// not be carried over to the next response. This makes it simple to swap out system
+    /// (or developer) messages in new responses.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// An upper bound for the number of tokens that can be generated for a
+    /// response, including visible output tokens and reasoning tokens.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+
+    /// The maximum number of total calls to built-in tools that can be processed in a response.
+    /// This maximum number applies across all built-in tool calls, not per individual tool.
+    /// Any further attempts to call a tool by the model will be ignored.
+    pub max_tool_calls: Option<u32>,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be
+    /// useful for storing additional information about the object in a structured
+    /// format, and querying for objects via API or the dashboard.
+    ///
+    /// Keys are strings with a maximum length of 64 characters. Values are
+    /// strings with a maximum length of 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+
+    /// Whether to allow the model to run tool calls in parallel.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// The unique ID of the previous response to the model. Use this to create
+    /// multi-turn conversations.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// Reference to a prompt template and its variables.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt: Option<PromptConfig>,
+
+    /// **o-series models only**: Configuration options for reasoning models.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+
+    /// Specifies the latency tier to use for processing the request.
+    ///
+    /// This parameter is relevant for customers subscribed to the Scale tier service.
+    ///
+    /// Supported values:
+    /// - `auto`
+    ///   - If the Project is Scale tier enabled, the system will utilize Scale tier credits until
+    ///     they are exhausted.
+    ///   - If the Project is not Scale tier enabled, the request will be processed using the
+    ///     default service tier with a lower uptime SLA and no latency guarantee.
+    /// - `default`
+    ///   The request will be processed using the default service tier with a lower uptime SLA and
+    ///   no latency guarantee.
+    /// - `flex`
+    ///   The request will be processed with the Flex Processing service tier. Learn more.
+    ///
+    /// When not set, the default behavior is `auto`.
+    ///
+    /// When this parameter is set, the response body will include the `service_tier` utilized.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<ServiceTier>,
+
+    /// Whether to store the generated model response for later retrieval via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+
+    /// If set to true, the model response data will be streamed to the client as it is
+    /// generated using server-sent events.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8
+    /// will make the output more random, while lower values like 0.2 will make it
+    /// more focused and deterministic. We generally recommend altering this or
+    /// `top_p` but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// Configuration options for a text response from the model. Can be plain text
+    /// or structured JSON data.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<TextConfig>,
+
+    /// How the model should select which tool (or tools) to use when generating
+    /// a response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+
+    /// An array of tools the model may call while generating a response.
+    /// Can include built-in tools (file_search, web_search_preview,
+    /// computer_use_preview) or custom function definitions.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+
+    /// An integer between 0 and 20 specifying the number of most likely tokens to return
+    /// at each token position, each with an associated log probability.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_logprobs: Option<u32>, // TODO add validation of range
+
+    /// An alternative to sampling with temperature, called nucleus sampling,
+    /// where the model considers the results of the tokens with top_p probability
+    /// mass. So 0.1 means only the tokens comprising the top 10% probability mass
+    /// are considered. We generally recommend altering this or `temperature` but
+    /// not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// The truncation strategy to use for the model response:
+    /// - `auto`: drop items in the middle to fit context window.
+    /// - `disabled`: error if exceeding context window.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+
+    /// A unique identifier representing your end-user, which can help OpenAI to
+    /// monitor and detect abuse.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user: Option<String>,
+}
+
+/// Service tier request options.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct PromptConfig {
+    /// The unique identifier of the prompt template to use.
+    pub id: String,
+
+    /// Optional version of the prompt template.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub version: Option<String>,
+
+    /// Optional map of values to substitute in for variables in your prompt. The substitution
+    /// values can either be strings, or other Response input types like images or files.
+    /// For now only supporting Strings.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub variables: Option<HashMap<String, String>>,
+}
+
+/// Service tier request options.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ServiceTier {
+    Auto,
+    Default,
+    Flex,
+}
+
+/// Truncation strategies.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum Truncation {
+    Auto,
+    Disabled,
+}
+
+/// o-series reasoning settings.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "ReasoningConfigArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ReasoningConfig {
+    /// Constrain effort on reasoning.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub effort: Option<ReasoningEffort>,
+    /// Summary mode for reasoning.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<ReasoningSummary>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ReasoningSummary {
+    Auto,
+    Concise,
+    Detailed,
+}
+
+/// Configuration for text response format.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct TextConfig {
+    /// Defines the format: plain text, JSON object, or JSON schema.
+    pub format: TextResponseFormat,
+}
+
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum TextResponseFormat {
+    /// The type of response format being defined: `text`
+    Text,
+    /// The type of response format being defined: `json_object`
+    JsonObject,
+    /// The type of response format being defined: `json_schema`
+    JsonSchema(ResponseFormatJsonSchema),
+}
+
+/// Definitions for model-callable tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ToolDefinition {
+    /// File search tool.
+    FileSearch(FileSearch),
+    /// Custom function call.
+    Function(Function),
+    /// Web search preview tool.
+    WebSearchPreview(WebSearchPreview),
+    /// Virtual computer control tool.
+    ComputerUsePreview(ComputerUsePreview),
+    /// Remote Model Context Protocol server.
+    Mcp(Mcp),
+    /// Python code interpreter tool.
+    CodeInterpreter(CodeInterpreter),
+    /// Image generation tool.
+    ImageGeneration(ImageGeneration),
+    /// Local shell command execution tool.
+    LocalShell,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "FileSearchArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct FileSearch {
+    /// The IDs of the vector stores to search.
+    pub vector_store_ids: Vec<String>,
+    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_num_results: Option<u32>,
+    /// A filter to apply.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub filters: Option<Filter>,
+    /// Ranking options for search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ranking_options: Option<RankingOptions>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "FunctionArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct Function {
+    /// The name of the function to call.
+    pub name: String,
+    /// A JSON schema object describing the parameters of the function.
+    pub parameters: serde_json::Value,
+    /// Whether to enforce strict parameter validation.
+    pub strict: bool,
+    /// A description of the function. Used by the model to determine whether or not to call the
+    /// function.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "WebSearchPreviewArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct WebSearchPreview {
+    /// The user's location.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user_location: Option<Location>,
+    /// High level guidance for the amount of context window space to use for the search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub search_context_size: Option<WebSearchContextSize>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
+#[serde(rename_all = "lowercase")]
+pub enum WebSearchContextSize {
+    Low,
+    Medium,
+    High,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "ComputerUsePreviewArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+pub struct ComputerUsePreview {
+    /// The type of computer environment to control.
+    environment: String,
+    /// The width of the computer display.
+    display_width: u32,
+    /// The height of the computer display.
+    display_height: u32,
+}
+
+/// Options for search result ranking.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct RankingOptions {
+    /// The ranker to use for the file search.
+    pub ranker: String,
+    /// The score threshold for the file search, a number between 0 and 1. Numbers closer to 1 will
+    /// attempt to return only the most relevant results, but may return fewer results.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub score_threshold: Option<f32>,
+}
+
+/// Filters for file search.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum Filter {
+    /// A filter used to compare a specified attribute key to a given value using a defined
+    /// comparison operation.
+    Comparison(ComparisonFilter),
+    /// Combine multiple filters using and or or.
+    Compound(CompoundFilter),
+}
+
+/// Single comparison filter.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComparisonFilter {
+    /// Specifies the comparison operator
+    #[serde(rename = "type")]
+    pub op: ComparisonType,
+    /// The key to compare against the value.
+    pub key: String,
+    /// The value to compare against the attribute key; supports string, number, or boolean types.
+    pub value: serde_json::Value,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+pub enum ComparisonType {
+    #[serde(rename = "eq")]
+    Equals,
+    #[serde(rename = "ne")]
+    NotEquals,
+    #[serde(rename = "gt")]
+    GreaterThan,
+    #[serde(rename = "gte")]
+    GreaterThanOrEqualTo,
+    #[serde(rename = "lt")]
+    LessThan,
+    #[serde(rename = "lte")]
+    LessThanOrEqualTo,
+}
+
+/// Combine multiple filters.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CompoundFilter {
+    /// Type of operation
+    #[serde(rename = "type")]
+    pub op: ComparisonType,
+    /// Array of filters to combine. Items can be ComparisonFilter or CompoundFilter.
+    pub filters: Vec<Filter>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum CompoundType {
+    And,
+    Or,
+}
+
+/// Approximate user location for web search.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "LocationArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct Location {
+    /// The type of location approximation. Always approximate.
+    #[serde(rename = "type")]
+    pub kind: String,
+    /// Free text input for the city of the user, e.g. San Francisco.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub city: Option<String>,
+    /// The two-letter ISO country code of the user, e.g. US.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub country: Option<String>,
+    /// Free text input for the region of the user, e.g. California.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub region: Option<String>,
+    /// The IANA timezone of the user, e.g. America/Los_Angeles.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub timezone: Option<String>,
+}
+
+/// MCP (Model Context Protocol) tool configuration.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "McpArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct Mcp {
+    /// A label for this MCP server.
+    pub server_label: String,
+    /// The URL for the MCP server.
+    pub server_url: String,
+    /// List of allowed tool names or filter object.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub allowed_tools: Option<AllowedTools>,
+    /// Optional HTTP headers for the MCP server.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub headers: Option<Value>,
+    /// Approval policy or filter for tools.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub require_approval: Option<RequireApproval>,
+}
+
+/// Allowed tools configuration for MCP.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum AllowedTools {
+    /// A flat list of allowed tool names.
+    List(Vec<String>),
+    /// A filter object specifying allowed tools.
+    Filter(McpAllowedToolsFilter),
+}
+
+/// Filter object for MCP allowed tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpAllowedToolsFilter {
+    /// Names of tools in the filter
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_names: Option<Vec<String>>,
+}
+
+/// Approval policy or filter for MCP tools.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum RequireApproval {
+    /// A blanket policy: "always" or "never".
+    Policy(RequireApprovalPolicy),
+    /// A filter object specifying which tools require approval.
+    Filter(McpApprovalFilter),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum RequireApprovalPolicy {
+    Always,
+    Never,
+}
+
+/// Filter object for MCP tool approval.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpApprovalFilter {
+    /// A list of tools that always require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub always: Option<McpAllowedToolsFilter>,
+    /// A list of tools that never require approval.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub never: Option<McpAllowedToolsFilter>,
+}
+
+/// Container configuration for a code interpreter.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum CodeInterpreterContainer {
+    /// A simple container ID.
+    Id(String),
+    /// Auto-configured container with optional files.
+    Container(CodeInterpreterContainerKind),
+}
+
+/// Auto configuration for code interpreter container.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum CodeInterpreterContainerKind {
+    Auto {
+        /// Optional list of uploaded file IDs.
+        #[serde(skip_serializing_if = "Option::is_none")]
+        file_ids: Option<Vec<String>>,
+    },
+}
+
+/// Code interpreter tool definition.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "CodeInterpreterArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CodeInterpreter {
+    /// Container configuration for running code.
+    pub container: CodeInterpreterContainer,
+}
+
+/// Mask image input for image generation.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct InputImageMask {
+    /// Base64-encoded mask image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub image_url: Option<String>,
+    /// File ID for the mask image.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_id: Option<String>,
+}
+
+/// Image generation tool definition.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[builder(
+    name = "ImageGenerationArgs",
+    pattern = "mutable",
+    setter(into, strip_option),
+    default
+)]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ImageGeneration {
+    /// Background type: transparent, opaque, or auto.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<ImageGenerationBackground>,
+    /// Optional mask for inpainting.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input_image_mask: Option<InputImageMask>,
+    /// Model to use (default: gpt-image-1).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    /// Moderation level (default: auto).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub moderation: Option<String>,
+    /// Compression level (0-100).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_compression: Option<u8>,
+    /// Output format: png, webp, or jpeg.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_format: Option<ImageGenerationOutputFormat>,
+    /// Number of partial images (0-3).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub partial_images: Option<u8>,
+    /// Quality: low, medium, high, or auto.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub quality: Option<ImageGenerationQuality>,
+    /// Size: e.g. "1024x1024" or auto.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub size: Option<ImageGenerationSize>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenerationBackground {
+    Transparent,
+    Opaque,
+    Auto,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenerationOutputFormat {
+    Png,
+    Webp,
+    Jpeg,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenerationQuality {
+    Low,
+    Medium,
+    High,
+    Auto,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ImageGenerationSize {
+    Auto,
+    #[serde(rename = "1024x1024")]
+    Size1024x1024,
+    #[serde(rename = "1024x1536")]
+    Size1024x1536,
+    #[serde(rename = "1536x1024")]
+    Size1536x1024,
+}
+
+/// Control how the model picks or is forced to pick a tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum ToolChoice {
+    /// Controls which (if any) tool is called by the model.
+    Mode(ToolChoiceMode),
+    /// Indicates that the model should use a built-in tool to generate a response.
+    Hosted {
+        /// The type of hosted tool the model should to use.
+        #[serde(rename = "type")]
+        kind: HostedToolType,
+    },
+    /// Use this option to force the model to call a specific function.
+    Function {
+        /// The name of the function to call.
+        name: String,
+    },
+}
+
+/// Simple tool-choice modes.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ToolChoiceMode {
+    /// The model will not call any tool and instead generates a message.
+    None,
+    /// The model can pick between generating a message or calling one or more tools.
+    Auto,
+    /// The model must call one or more tools.
+    Required,
+}
+
+/// Hosted tool type identifiers.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum HostedToolType {
+    FileSearch,
+    WebSearchPreview,
+    ComputerUsePreview,
+}
+
+/// Error returned by the API when a request fails.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ErrorObject {
+    /// The error code for the response.
+    pub code: String,
+    /// A human-readable description of the error.
+    pub message: String,
+}
+
+/// Details about an incomplete response.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct IncompleteDetails {
+    /// The reason why the response is incomplete.
+    pub reason: String,
+}
+
+/// A simple text output from the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct OutputText {
+    /// The annotations of the text output.
+    pub annotations: Vec<Annotation>,
+    /// The text output from the model.
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Annotation {
+    /// A citation to a file.
+    FileCitation(FileCitation),
+    /// A citation for a web resource used to generate a model response.
+    UrlCitation(UrlCitation),
+    /// A path to a file.
+    FilePath(FilePath),
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FileCitation {
+    /// The ID of the file.
+    file_id: String,
+    /// The index of the file in the list of files.
+    index: u32,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct UrlCitation {
+    /// The index of the last character of the URL citation in the message.
+    end_index: u32,
+    /// The index of the first character of the URL citation in the message.
+    start_index: u32,
+    /// The title of the web resource.
+    title: String,
+    /// The URL of the web resource.
+    url: String,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FilePath {
+    /// The ID of the file.
+    file_id: String,
+    /// The index of the file in the list of files.
+    index: u32,
+}
+
+/// A refusal explanation from the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Refusal {
+    /// The refusal explanationfrom the model.
+    pub refusal: String,
+}
+
+/// A message generated by the model.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct OutputMessage {
+    /// The content of the output message.
+    pub content: Vec<Content>,
+    /// The unique ID of the output message.
+    pub id: String,
+    /// The role of the output message. Always assistant.
+    pub role: Role,
+    /// The status of the message input.
+    pub status: OutputStatus,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum Content {
+    /// A text output from the model.
+    OutputText(OutputText),
+    /// A refusal from the model.
+    Refusal(Refusal),
+}
+
+/// Nested content within an output message.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum OutputContent {
+    /// An output message from the model.
+    Message(OutputMessage),
+    /// The results of a file search tool call.
+    FileSearchCall(FileSearchCallOutput),
+    /// A tool call to run a function.
+    FunctionCall(FunctionCall),
+    /// The results of a web search tool call.
+    WebSearchCall(WebSearchCallOutput),
+    /// A tool call to a computer use tool.
+    ComputerCall(ComputerCallOutput),
+    /// A description of the chain of thought used by a reasoning model while generating a response.
+    /// Be sure to include these items in your input to the Responses API for subsequent turns of a
+    /// conversation if you are manually managing context.
+    Reasoning(ReasoningItem),
+    /// Image generation tool call output.
+    ImageGenerationCall(ImageGenerationCallOutput),
+    /// Code interpreter tool call output.
+    CodeInterpreterCall(CodeInterpreterCallOutput),
+    /// Local shell tool call output.
+    LocalShellCall(LocalShellCallOutput),
+    /// MCP tool invocation output.
+    McpCall(McpCallOutput),
+    /// MCP list-tools output.
+    McpListTools(McpListToolsOutput),
+    /// MCP approval request output.
+    McpApprovalRequest(McpApprovalRequestOutput),
+}
+
+/// A reasoning item representing the model's chain of thought, including summary paragraphs.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ReasoningItem {
+    /// Unique identifier of the reasoning content.
+    pub id: String,
+    /// The summarized chain-of-thought paragraphs.
+    pub summary: Vec<SummaryText>,
+    /// The encrypted content of the reasoning item - populated when a response is generated with
+    /// `reasoning.encrypted_content` in the `include` parameter.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub encrypted_content: Option<String>,
+    /// The status of the reasoning item.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<OutputStatus>,
+}
+
+/// A single summary text fragment from reasoning.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct SummaryText {
+    /// A short summary of the reasoning used by the model.
+    pub text: String,
+}
+
+/// File search tool call output.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FileSearchCallOutput {
+    /// The unique ID of the file search tool call.
+    pub id: String,
+    /// The queries used to search for files.
+    pub queries: Vec<String>,
+    /// The status of the file search tool call.
+    pub status: FileSearchCallOutputStatus,
+    /// The results of the file search tool call.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub results: Option<Vec<FileSearchResult>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum FileSearchCallOutputStatus {
+    InProgress,
+    Searching,
+    Incomplete,
+    Failed,
+    Completed,
+}
+
+/// A single result from a file search.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FileSearchResult {
+    /// The unique ID of the file.
+    pub file_id: String,
+    /// The name of the file.
+    pub filename: String,
+    /// The relevance score of the file - a value between 0 and 1.
+    pub score: f32,
+    /// The text that was retrieved from the file.
+    pub text: String,
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing
+    /// additional information about the object in a structured format, and querying for objects
+    /// API or the dashboard. Keys are strings with a maximum length of 64 characters
+    /// . Values are strings with a maximum length of 512 characters, booleans, or numbers.
+    pub attributes: HashMap<String, serde_json::Value>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct SafetyCheck {
+    /// The ID of the safety check.
+    pub id: String,
+    /// The type/code of the pending safety check.
+    pub code: String,
+    /// Details about the pending safety check.
+    pub message: String,
+}
+
+/// Web search tool call output.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct WebSearchCallOutput {
+    /// The unique ID of the web search tool call.
+    pub id: String,
+    /// The status of the web search tool call.
+    pub status: String,
+}
+
+/// Output from a computer tool call.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComputerCallOutput {
+    pub action: ComputerCallAction,
+    /// An identifier used when responding to the tool call with output.
+    pub call_id: String,
+    /// The unique ID of the computer call.
+    pub id: String,
+    /// The pending safety checks for the computer call.
+    pub pending_safety_checks: Vec<SafetyCheck>,
+    /// The status of the item.
+    pub status: OutputStatus,
+}
+
+/// A point in 2D space.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Point {
+    pub x: i32,
+    pub y: i32,
+}
+
+/// Represents all user‐triggered actions.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum ComputerCallAction {
+    /// A click action.
+    Click(Click),
+
+    /// A double-click action.
+    DoubleClick(DoubleClick),
+
+    /// A drag action.
+    Drag(Drag),
+
+    /// A keypress action.
+    KeyPress(KeyPress),
+
+    /// A mouse move action.
+    Move(MoveAction),
+
+    /// A screenshot action.
+    Screenshot,
+
+    /// A scroll action.
+    Scroll(Scroll),
+
+    /// A type (text entry) action.
+    Type(TypeAction),
+
+    /// A wait (no-op) action.
+    Wait,
+}
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ButtonPress {
+    Left,
+    Right,
+    Wheel,
+    Back,
+    Forward,
+}
+
+/// A click action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Click {
+    /// Which mouse button was pressed.
+    pub button: ButtonPress,
+    /// X‐coordinate of the click.
+    pub x: i32,
+    /// Y‐coordinate of the click.
+    pub y: i32,
+}
+
+/// A double click action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct DoubleClick {
+    /// X‐coordinate of the double click.
+    pub x: i32,
+    /// Y‐coordinate of the double click.
+    pub y: i32,
+}
+
+/// A drag action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Drag {
+    /// The path of points the cursor drags through.
+    pub path: Vec<Point>,
+    /// X‐coordinate at the end of the drag.
+    pub x: i32,
+    /// Y‐coordinate at the end of the drag.
+    pub y: i32,
+}
+
+/// A keypress action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct KeyPress {
+    /// The list of keys to press (e.g. `["Control", "C"]`).
+    pub keys: Vec<String>,
+}
+
+/// A mouse move action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct MoveAction {
+    /// X‐coordinate to move to.
+    pub x: i32,
+    /// Y‐coordinate to move to.
+    pub y: i32,
+}
+
+/// A scroll action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct Scroll {
+    /// Horizontal scroll distance.
+    pub scroll_x: i32,
+    /// Vertical scroll distance.
+    pub scroll_y: i32,
+    /// X‐coordinate where the scroll began.
+    pub x: i32,
+    /// Y‐coordinate where the scroll began.
+    pub y: i32,
+}
+
+/// A typing (text entry) action.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub struct TypeAction {
+    /// The text to type.
+    pub text: String,
+}
+
+/// Metadata for a function call request.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct FunctionCall {
+    /// The unique ID of the function tool call.
+    pub id: String,
+    /// The unique ID of the function tool call generated by the model.
+    pub call_id: String,
+    /// The name of the function to run.
+    pub name: String,
+    /// A JSON string of the arguments to pass to the function.
+    pub arguments: String,
+    /// The status of the item.
+    pub status: OutputStatus,
+}
+
+/// Output of an image generation request.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ImageGenerationCallOutput {
+    /// Unique ID of the image generation call.
+    pub id: String,
+    /// Base64-encoded generated image, or null.
+    pub result: Option<String>,
+    /// Status of the image generation call.
+    pub status: String,
+}
+
+/// Output of a code interpreter request.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CodeInterpreterCallOutput {
+    /// The code that was executed.
+    pub code: String,
+    /// Unique ID of the call.
+    pub id: String,
+    /// Status of the tool call.
+    pub status: String,
+    /// ID of the container used to run the code.
+    pub container_id: String,
+    /// The results of the execution: logs or files.
+    pub results: Vec<CodeInterpreterResult>,
+}
+
+/// Individual result from a code interpreter: either logs or files.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type", rename_all = "snake_case")]
+pub enum CodeInterpreterResult {
+    /// Text logs from the execution.
+    Logs(CodeInterpreterTextOutput),
+    /// File outputs from the execution.
+    Files(CodeInterpreterFileOutput),
+}
+
+/// The output containing execution logs.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CodeInterpreterTextOutput {
+    /// The logs of the code interpreter tool call.
+    pub logs: String,
+}
+
+/// The output containing file references.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CodeInterpreterFileOutput {
+    /// List of file IDs produced.
+    pub files: Vec<CodeInterpreterFile>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CodeInterpreterFile {
+    /// The ID of the file.
+    file_id: String,
+    /// The MIME type of the file.
+    mime_type: String,
+}
+
+/// Output of a local shell command request.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct LocalShellCallOutput {
+    /// Details of the exec action.
+    pub action: LocalShellAction,
+    /// Unique call identifier for responding to the tool call.
+    pub call_id: String,
+    /// Unique ID of the local shell call.
+    pub id: String,
+    /// Status of the local shell call.
+    pub status: String,
+}
+
+/// Define the shape of a local shell action (exec).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct LocalShellAction {
+    /// The command to run.
+    pub command: Vec<String>,
+    /// Environment variables to set for the command.
+    pub env: HashMap<String, String>,
+    /// Optional timeout for the command (ms).
+    pub timeout_ms: Option<u64>,
+    /// Optional user to run the command as.
+    pub user: Option<String>,
+    /// Optional working directory for the command.
+    pub working_directory: Option<String>,
+}
+
+/// Output of an MCP server tool invocation.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpCallOutput {
+    /// JSON string of the arguments passed.
+    pub arguments: String,
+    /// Unique ID of the MCP call.
+    pub id: String,
+    /// Name of the tool invoked.
+    pub name: String,
+    /// Label of the MCP server.
+    pub server_label: String,
+    /// Error message from the call, if any.
+    pub error: Option<String>,
+    /// Output from the call, if any.
+    pub output: Option<String>,
+}
+
+/// Output listing tools available on an MCP server.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpListToolsOutput {
+    /// Unique ID of the list request.
+    pub id: String,
+    /// Label of the MCP server.
+    pub server_label: String,
+    /// Tools available on the server with metadata.
+    pub tools: Vec<McpToolInfo>,
+    /// Error message if listing failed.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<String>,
+}
+
+/// Information about a single tool on an MCP server.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpToolInfo {
+    /// The name of the tool.
+    pub name: String,
+    /// The JSON schema describing the tool's input.
+    pub input_schema: Value,
+    /// Additional annotations about the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub annotations: Option<Value>,
+    /// The description of the tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+}
+
+/// Output representing a human approval request for an MCP tool.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct McpApprovalRequestOutput {
+    /// JSON string of arguments for the tool.
+    pub arguments: String,
+    /// Unique ID of the approval request.
+    pub id: String,
+    /// Name of the tool requiring approval.
+    pub name: String,
+    /// Label of the MCP server making the request.
+    pub server_label: String,
+}
+
+/// Usage statistics for a response.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Usage {
+    /// The number of input tokens.
+    pub input_tokens: u32,
+    /// A detailed breakdown of the input tokens.
+    pub input_tokens_details: PromptTokensDetails,
+    /// The number of output tokens.
+    pub output_tokens: u32,
+    /// A detailed breakdown of the output tokens.
+    pub output_tokens_details: CompletionTokensDetails,
+    /// The total number of tokens used.
+    pub total_tokens: u32,
+}
+
+/// The complete response returned by the Responses API.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct Response {
+    /// Unix timestamp (in seconds) when this Response was created.
+    pub created_at: u64,
+
+    /// Error object if the API failed to generate a response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<ErrorObject>,
+
+    /// Unique identifier for this response.
+    pub id: String,
+
+    /// Details about why the response is incomplete, if any.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub incomplete_details: Option<IncompleteDetails>,
+
+    /// Instructions that were inserted as the first item in context.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// The value of `max_output_tokens` that was honored.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+
+    /// Metadata tags/values that were attached to this response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+
+    /// Model ID used to generate the response.
+    pub model: String,
+
+    /// The object type – always `response`.
+    pub object: String,
+
+    /// The array of content items generated by the model.
+    pub output: Vec<OutputContent>,
+
+    /// SDK-only convenience property that contains the aggregated text output from all
+    /// `output_text` items in the `output` array, if any are present.
+    /// Supported in the Python and JavaScript SDKs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output_text: Option<String>,
+
+    /// Whether parallel tool calls were enabled.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    /// Previous response ID, if creating part of a multi-turn conversation.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+
+    /// Reasoning configuration echoed back (effort, summary settings).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+
+    /// Whether to store the generated model response for later retrieval via API.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+
+    /// The service tier that actually processed this response.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<ServiceTier>,
+
+    /// The status of the response generation.
+    pub status: Status,
+
+    /// Sampling temperature that was used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    /// Text format configuration echoed back (plain, json_object, json_schema).
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<TextConfig>,
+
+    /// How the model chose or was forced to choose a tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+
+    /// Tool definitions that were provided.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+
+    /// Nucleus sampling cutoff that was used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// Truncation strategy that was applied.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+
+    /// Token usage statistics for this request.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub usage: Option<Usage>,
+
+    /// End-user ID for which this response was generated.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user: Option<String>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum Status {
+    Completed,
+    Failed,
+    InProgress,
+    Incomplete,
+}
+
+/// Event types for streaming responses from the Responses API
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type")]
+#[non_exhaustive] // Future-proof against breaking changes
+pub enum ResponseEvent {
+    /// Response creation started
+    #[serde(rename = "response.created")]
+    ResponseCreated(ResponseCreated),
+    /// Processing in progress
+    #[serde(rename = "response.in_progress")]
+    ResponseInProgress(ResponseInProgress),
+    /// Response completed (different from done)
+    #[serde(rename = "response.completed")]
+    ResponseCompleted(ResponseCompleted),
+    /// Response failed
+    #[serde(rename = "response.failed")]
+    ResponseFailed(ResponseFailed),
+    /// Response incomplete
+    #[serde(rename = "response.incomplete")]
+    ResponseIncomplete(ResponseIncomplete),
+    /// Response queued
+    #[serde(rename = "response.queued")]
+    ResponseQueued(ResponseQueued),
+    /// Output item added
+    #[serde(rename = "response.output_item.added")]
+    ResponseOutputItemAdded(ResponseOutputItemAdded),
+    /// Content part added
+    #[serde(rename = "response.content_part.added")]
+    ResponseContentPartAdded(ResponseContentPartAdded),
+    /// Text delta update
+    #[serde(rename = "response.output_text.delta")]
+    ResponseOutputTextDelta(ResponseOutputTextDelta),
+    /// Text output completed
+    #[serde(rename = "response.output_text.done")]
+    ResponseOutputTextDone(ResponseOutputTextDone),
+    /// Refusal delta update
+    #[serde(rename = "response.refusal.delta")]
+    ResponseRefusalDelta(ResponseRefusalDelta),
+    /// Refusal completed
+    #[serde(rename = "response.refusal.done")]
+    ResponseRefusalDone(ResponseRefusalDone),
+    /// Content part completed
+    #[serde(rename = "response.content_part.done")]
+    ResponseContentPartDone(ResponseContentPartDone),
+    /// Output item completed
+    #[serde(rename = "response.output_item.done")]
+    ResponseOutputItemDone(ResponseOutputItemDone),
+    /// Function call arguments delta
+    #[serde(rename = "response.function_call_arguments.delta")]
+    ResponseFunctionCallArgumentsDelta(ResponseFunctionCallArgumentsDelta),
+    /// Function call arguments completed
+    #[serde(rename = "response.function_call_arguments.done")]
+    ResponseFunctionCallArgumentsDone(ResponseFunctionCallArgumentsDone),
+    /// File search call in progress
+    #[serde(rename = "response.file_search_call.in_progress")]
+    ResponseFileSearchCallInProgress(ResponseFileSearchCallInProgress),
+    /// File search call searching
+    #[serde(rename = "response.file_search_call.searching")]
+    ResponseFileSearchCallSearching(ResponseFileSearchCallSearching),
+    /// File search call completed
+    #[serde(rename = "response.file_search_call.completed")]
+    ResponseFileSearchCallCompleted(ResponseFileSearchCallCompleted),
+    /// Web search call in progress
+    #[serde(rename = "response.web_search_call.in_progress")]
+    ResponseWebSearchCallInProgress(ResponseWebSearchCallInProgress),
+    /// Web search call searching
+    #[serde(rename = "response.web_search_call.searching")]
+    ResponseWebSearchCallSearching(ResponseWebSearchCallSearching),
+    /// Web search call completed
+    #[serde(rename = "response.web_search_call.completed")]
+    ResponseWebSearchCallCompleted(ResponseWebSearchCallCompleted),
+    /// Reasoning summary part added
+    #[serde(rename = "response.reasoning_summary_part.added")]
+    ResponseReasoningSummaryPartAdded(ResponseReasoningSummaryPartAdded),
+    /// Reasoning summary part done
+    #[serde(rename = "response.reasoning_summary_part.done")]
+    ResponseReasoningSummaryPartDone(ResponseReasoningSummaryPartDone),
+    /// Reasoning summary text delta
+    #[serde(rename = "response.reasoning_summary_text.delta")]
+    ResponseReasoningSummaryTextDelta(ResponseReasoningSummaryTextDelta),
+    /// Reasoning summary text done
+    #[serde(rename = "response.reasoning_summary_text.done")]
+    ResponseReasoningSummaryTextDone(ResponseReasoningSummaryTextDone),
+    /// Reasoning summary delta
+    #[serde(rename = "response.reasoning_summary.delta")]
+    ResponseReasoningSummaryDelta(ResponseReasoningSummaryDelta),
+    /// Reasoning summary done
+    #[serde(rename = "response.reasoning_summary.done")]
+    ResponseReasoningSummaryDone(ResponseReasoningSummaryDone),
+    /// Image generation call in progress
+    #[serde(rename = "response.image_generation_call.in_progress")]
+    ResponseImageGenerationCallInProgress(ResponseImageGenerationCallInProgress),
+    /// Image generation call generating
+    #[serde(rename = "response.image_generation_call.generating")]
+    ResponseImageGenerationCallGenerating(ResponseImageGenerationCallGenerating),
+    /// Image generation call partial image
+    #[serde(rename = "response.image_generation_call.partial_image")]
+    ResponseImageGenerationCallPartialImage(ResponseImageGenerationCallPartialImage),
+    /// Image generation call completed
+    #[serde(rename = "response.image_generation_call.completed")]
+    ResponseImageGenerationCallCompleted(ResponseImageGenerationCallCompleted),
+    /// MCP call arguments delta
+    #[serde(rename = "response.mcp_call_arguments.delta")]
+    ResponseMcpCallArgumentsDelta(ResponseMcpCallArgumentsDelta),
+    /// MCP call arguments done
+    #[serde(rename = "response.mcp_call_arguments.done")]
+    ResponseMcpCallArgumentsDone(ResponseMcpCallArgumentsDone),
+    /// MCP call completed
+    #[serde(rename = "response.mcp_call.completed")]
+    ResponseMcpCallCompleted(ResponseMcpCallCompleted),
+    /// MCP call failed
+    #[serde(rename = "response.mcp_call.failed")]
+    ResponseMcpCallFailed(ResponseMcpCallFailed),
+    /// MCP call in progress
+    #[serde(rename = "response.mcp_call.in_progress")]
+    ResponseMcpCallInProgress(ResponseMcpCallInProgress),
+    /// MCP list tools completed
+    #[serde(rename = "response.mcp_list_tools.completed")]
+    ResponseMcpListToolsCompleted(ResponseMcpListToolsCompleted),
+    /// MCP list tools failed
+    #[serde(rename = "response.mcp_list_tools.failed")]
+    ResponseMcpListToolsFailed(ResponseMcpListToolsFailed),
+    /// MCP list tools in progress
+    #[serde(rename = "response.mcp_list_tools.in_progress")]
+    ResponseMcpListToolsInProgress(ResponseMcpListToolsInProgress),
+    /// Code interpreter call in progress
+    #[serde(rename = "response.code_interpreter_call.in_progress")]
+    ResponseCodeInterpreterCallInProgress(ResponseCodeInterpreterCallInProgress),
+    /// Code interpreter call interpreting
+    #[serde(rename = "response.code_interpreter_call.interpreting")]
+    ResponseCodeInterpreterCallInterpreting(ResponseCodeInterpreterCallInterpreting),
+    /// Code interpreter call completed
+    #[serde(rename = "response.code_interpreter_call.completed")]
+    ResponseCodeInterpreterCallCompleted(ResponseCodeInterpreterCallCompleted),
+    /// Code interpreter call code delta
+    #[serde(rename = "response.code_interpreter_call_code.delta")]
+    ResponseCodeInterpreterCallCodeDelta(ResponseCodeInterpreterCallCodeDelta),
+    /// Code interpreter call code done
+    #[serde(rename = "response.code_interpreter_call_code.done")]
+    ResponseCodeInterpreterCallCodeDone(ResponseCodeInterpreterCallCodeDone),
+    /// Output text annotation added
+    #[serde(rename = "response.output_text.annotation.added")]
+    ResponseOutputTextAnnotationAdded(ResponseOutputTextAnnotationAdded),
+    /// Error occurred
+    #[serde(rename = "error")]
+    ResponseError(ResponseError),
+
+    /// Unknown event type
+    #[serde(untagged)]
+    Unknown(serde_json::Value),
+}
+
+/// Stream of response events
+pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>;
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCreated {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseInProgress {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseOutputItemAdded {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseContentPartAdded {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: ContentPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseOutputTextDelta {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<serde_json::Value>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseContentPartDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub part: ContentPart,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseOutputItemDone {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item: OutputItem,
+}
+
+/// Response completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCompleted {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+/// Response failed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFailed {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+/// Response incomplete event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseIncomplete {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+/// Response queued event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseQueued {
+    pub sequence_number: u64,
+    pub response: ResponseMetadata,
+}
+
+/// Text output completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseOutputTextDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub text: String,
+    pub logprobs: Option<Vec<serde_json::Value>>,
+}
+
+/// Refusal delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseRefusalDelta {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub delta: String,
+}
+
+/// Refusal done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseRefusalDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub refusal: String,
+}
+
+/// Function call arguments delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFunctionCallArgumentsDelta {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub delta: String,
+}
+
+/// Function call arguments done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFunctionCallArgumentsDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub arguments: String,
+}
+
+/// Error event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseError {
+    pub sequence_number: u64,
+    pub code: Option<String>,
+    pub message: String,
+    pub param: Option<String>,
+}
+
+/// File search call in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFileSearchCallInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// File search call searching event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFileSearchCallSearching {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// File search call completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseFileSearchCallCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Web search call in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseWebSearchCallInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Web search call searching event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseWebSearchCallSearching {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Web search call completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseWebSearchCallCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Reasoning summary part added event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryPartAdded {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: serde_json::Value, // Could be more specific but using Value for flexibility
+}
+
+/// Reasoning summary part done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryPartDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub part: serde_json::Value,
+}
+
+/// Reasoning summary text delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryTextDelta {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub delta: String,
+}
+
+/// Reasoning summary text done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryTextDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub text: String,
+}
+
+/// Reasoning summary delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryDelta {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub delta: serde_json::Value,
+}
+
+/// Reasoning summary done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseReasoningSummaryDone {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub summary_index: u32,
+    pub text: String,
+}
+
+/// Image generation call in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseImageGenerationCallInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Image generation call generating event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseImageGenerationCallGenerating {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Image generation call partial image event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseImageGenerationCallPartialImage {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub partial_image_index: u32,
+    pub partial_image_b64: String,
+}
+
+/// Image generation call completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseImageGenerationCallCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP call arguments delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpCallArgumentsDelta {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+/// MCP call arguments done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpCallArgumentsDone {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub arguments: String,
+}
+
+/// MCP call completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpCallCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP call failed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpCallFailed {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP call in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpCallInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP list tools completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpListToolsCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP list tools failed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpListToolsFailed {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// MCP list tools in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMcpListToolsInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Code interpreter call in progress event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCodeInterpreterCallInProgress {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Code interpreter call interpreting event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCodeInterpreterCallInterpreting {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Code interpreter call completed event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCodeInterpreterCallCompleted {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+}
+
+/// Code interpreter call code delta event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCodeInterpreterCallCodeDelta {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub delta: String,
+}
+
+/// Code interpreter call code done event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseCodeInterpreterCallCodeDone {
+    pub sequence_number: u64,
+    pub output_index: u32,
+    pub item_id: String,
+    pub code: String,
+}
+
+/// Response metadata
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseMetadata {
+    pub id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub object: Option<String>,
+    pub created_at: u64,
+    pub status: Status,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub usage: Option<Usage>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub error: Option<ErrorObject>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub incomplete_details: Option<IncompleteDetails>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub input: Option<Input>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_output_tokens: Option<u32>,
+    /// Whether the model was run in background mode
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub background: Option<bool>,
+    /// The service tier that was actually used
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub service_tier: Option<ServiceTier>,
+    /// The effective value of top_logprobs parameter
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_logprobs: Option<u32>,
+    /// The effective value of max_tool_calls parameter
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_tool_calls: Option<u32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub output: Option<Vec<OutputItem>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub previous_response_id: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub reasoning: Option<ReasoningConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub store: Option<bool>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub text: Option<TextConfig>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<ToolChoice>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<ToolDefinition>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation: Option<Truncation>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub user: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    /// Prompt cache key for improved performance
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub prompt_cache_key: Option<String>,
+    /// Safety identifier for content filtering
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub safety_identifier: Option<String>,
+}
+
+/// Output item
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct OutputItem {
+    pub id: String,
+    #[serde(rename = "type")]
+    pub item_type: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub status: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub content: Option<Vec<ContentPart>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub role: Option<String>,
+    /// For reasoning items - summary paragraphs
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub summary: Option<Vec<serde_json::Value>>,
+}
+
+/// Content part
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ContentPart {
+    #[serde(rename = "type")]
+    pub part_type: String,
+    pub text: Option<String>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub annotations: Option<Vec<serde_json::Value>>,
+    #[serde(default, skip_serializing_if = "Option::is_none")]
+    pub logprobs: Option<Vec<serde_json::Value>>,
+}
+
+// ===== RESPONSE COLLECTOR =====
+
+/// Collects streaming response events into a complete response
+/// Output text annotation added event
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct ResponseOutputTextAnnotationAdded {
+    pub sequence_number: u64,
+    pub item_id: String,
+    pub output_index: u32,
+    pub content_index: u32,
+    pub annotation_index: u32,
+    pub annotation: TextAnnotation,
+}
+
+/// Text annotation object for output text
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[non_exhaustive]
+pub struct TextAnnotation {
+    #[serde(rename = "type")]
+    pub annotation_type: String,
+    pub text: String,
+    pub start: u32,
+    pub end: u32,
+}
--- a/lib/async-openai/src/types/run.rs
+++ b/lib/async-openai/src/types/run.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use std::collections::HashMap;
+
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use crate::{error::OpenAIError, types::FunctionCall};
+
+use super::{
+    AssistantTools, AssistantsApiResponseFormatOption, AssistantsApiToolChoiceOption,
+    CreateMessageRequest,
+};
+
+/// Represents an execution run on a [thread](https://platform.openai.com/docs/api-reference/threads).
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `thread.run`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the run was created.
+    pub created_at: i32,
+    ///The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was executed on as a part of this run.
+    pub thread_id: String,
+
+    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for execution of this run.
+    pub assistant_id: Option<String>,
+
+    /// The status of the run, which can be either `queued`, `in_progress`, `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, `incomplete`, or `expired`.
+    pub status: RunStatus,
+
+    /// Details on the action required to continue the run. Will be `null` if no action is required.
+    pub required_action: Option<RequiredAction>,
+
+    /// The last error associated with this run. Will be `null` if there are no errors.
+    pub last_error: Option<LastError>,
+
+    /// The Unix timestamp (in seconds) for when the run will expire.
+    pub expires_at: Option<i32>,
+    ///  The Unix timestamp (in seconds) for when the run was started.
+    pub started_at: Option<i32>,
+    /// The Unix timestamp (in seconds) for when the run was cancelled.
+    pub cancelled_at: Option<i32>,
+    /// The Unix timestamp (in seconds) for when the run failed.
+    pub failed_at: Option<i32>,
+    ///The Unix timestamp (in seconds) for when the run was completed.
+    pub completed_at: Option<i32>,
+
+    /// Details on why the run is incomplete. Will be `null` if the run is not incomplete.
+    pub incomplete_details: Option<RunObjectIncompleteDetails>,
+
+    /// The model that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
+    pub model: String,
+
+    /// The instructions that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
+    pub instructions: String,
+
+    /// The list of tools that the [assistant](https://platform.openai.com/docs/api-reference/assistants) used for this run.
+    pub tools: Vec<AssistantTools>,
+
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+
+    /// Usage statistics related to the run. This value will be `null` if the run is not in a terminal state (i.e. `in_progress`, `queued`, etc.).
+    pub usage: Option<RunCompletionUsage>,
+
+    /// The sampling temperature used for this run. If not set, defaults to 1.
+    pub temperature: Option<f32>,
+
+    /// The nucleus sampling value used for this run. If not set, defaults to 1.
+    pub top_p: Option<f32>,
+
+    /// The maximum number of prompt tokens specified to have been used over the course of the run.
+    pub max_prompt_tokens: Option<u32>,
+
+    /// The maximum number of completion tokens specified to have been used over the course of the run.
+    pub max_completion_tokens: Option<u32>,
+
+    /// Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
+    pub truncation_strategy: Option<TruncationObject>,
+
+    pub tool_choice: Option<AssistantsApiToolChoiceOption>,
+
+    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
+    pub parallel_tool_calls: bool,
+
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum TruncationObjectType {
+    #[default]
+    Auto,
+    LastMessages,
+}
+
+/// Thread Truncation Controls
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct TruncationObject {
+    /// The truncation strategy to use for the thread. The default is `auto`. If set to `last_messages`, the thread will be truncated to the n most recent messages in the thread. When set to `auto`, messages in the middle of the thread will be dropped to fit the context length of the model, `max_prompt_tokens`.
+    pub r#type: TruncationObjectType,
+    /// The number of most recent messages from the thread when constructing the context for the run.
+    pub last_messages: Option<u32>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunObjectIncompleteDetails {
+    /// The reason why the run is incomplete. This will point to which specific token limit was reached over the course of the run.
+    pub reason: RunObjectIncompleteDetailsReason,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum RunObjectIncompleteDetailsReason {
+    MaxCompletionTokens,
+    MaxPromptTokens,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum RunStatus {
+    Queued,
+    InProgress,
+    RequiresAction,
+    Cancelling,
+    Cancelled,
+    Failed,
+    Completed,
+    Incomplete,
+    Expired,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RequiredAction {
+    /// For now, this is always `submit_tool_outputs`.
+    pub r#type: String,
+
+    pub submit_tool_outputs: SubmitToolOutputs,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct SubmitToolOutputs {
+    pub tool_calls: Vec<RunToolCallObject>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunToolCallObject {
+    /// The ID of the tool call. This ID must be referenced when you submit the tool outputs in using the [Submit tool outputs to run](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) endpoint.
+    pub id: String,
+    /// The type of tool call the output is required for. For now, this is always `function`.
+    pub r#type: String,
+    /// The function definition.
+    pub function: FunctionCall,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct LastError {
+    /// One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`.
+    pub code: LastErrorCode,
+    /// A human-readable description of the error.
+    pub message: String,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum LastErrorCode {
+    ServerError,
+    RateLimitExceeded,
+    InvalidPrompt,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunCompletionUsage {
+    /// Number of completion tokens used over the course of the run.
+    pub completion_tokens: u32,
+    /// Number of prompt tokens used over the course of the run.
+    pub prompt_tokens: u32,
+    /// Total number of tokens used (prompt + completion).
+    pub total_tokens: u32,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "CreateRunRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateRunRequest {
+    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to execute this run.
+    pub assistant_id: String,
+
+    /// The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Overrides the [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant) of the assistant. This is useful for modifying the behavior on a per-run basis.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// Appends additional instructions at the end of the instructions for the run. This is useful for modifying the behavior on a per-run basis without overriding other instructions.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub additional_instructions: Option<String>,
+
+    /// Adds additional messages to the thread before creating the run.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub additional_messages: Option<Vec<CreateMessageRequest>>,
+
+    /// Override the tools the assistant can use for this run. This is useful for modifying the behavior on a per-run basis.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<AssistantTools>>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+
+    /// The sampling temperature used for this run. If not set, defaults to 1.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    ///  An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    ///
+    /// We generally recommend altering this or temperature but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+
+    /// The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_prompt_tokens: Option<u32>,
+
+    /// The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_completion_tokens: Option<u32>,
+
+    /// Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation_strategy: Option<TruncationObject>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<AssistantsApiToolChoiceOption>,
+
+    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct ModifyRunRequest {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct ListRunsResponse {
+    pub object: String,
+    pub data: Vec<RunObject>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct SubmitToolOutputsRunRequest {
+    /// A list of tools for which the outputs are being submitted.
+    pub tool_outputs: Vec<ToolsOutputs>,
+    /// If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
+    pub stream: Option<bool>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "ToolsOutputsArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ToolsOutputs {
+    /// The ID of the tool call in the `required_action` object within the run object the output is being submitted for.
+    pub tool_call_id: Option<String>,
+    /// The output of the tool call to be submitted to continue the run.
+    pub output: Option<String>,
+}
--- a/lib/async-openai/src/types/step.rs
+++ b/lib/async-openai/src/types/step.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use super::{FileSearchRankingOptions, ImageFile, LastError, RunStatus};
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum RunStepType {
+    MessageCreation,
+    ToolCalls,
+}
+
+/// Represents a step in execution of a run.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `thread.run.step`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the run step was created.
+    pub created_at: i32,
+
+    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) associated with the run step.
+    pub assistant_id: Option<String>,
+
+    /// The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) that was run.
+    pub thread_id: String,
+
+    ///  The ID of the [run](https://platform.openai.com/docs/api-reference/runs) that this run step is a part of.
+    pub run_id: String,
+
+    /// The type of run step, which can be either `message_creation` or `tool_calls`.
+    pub r#type: RunStepType,
+
+    /// The status of the run step, which can be either `in_progress`, `cancelled`, `failed`, `completed`, or `expired`.
+    pub status: RunStatus,
+
+    /// The details of the run step.
+    pub step_details: StepDetails,
+
+    /// The last error associated with this run. Will be `null` if there are no errors.
+    pub last_error: Option<LastError>,
+
+    ///The Unix timestamp (in seconds) for when the run step expired. A step is considered expired if the parent run is expired.
+    pub expires_at: Option<i32>,
+
+    /// The Unix timestamp (in seconds) for when the run step was cancelled.
+    pub cancelled_at: Option<i32>,
+
+    /// The Unix timestamp (in seconds) for when the run step failed.
+    pub failed_at: Option<i32>,
+
+    /// The Unix timestamp (in seconds) for when the run step completed.
+    pub completed_at: Option<i32>,
+
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+
+    /// Usage statistics related to the run step. This value will be `null` while the run step's status is `in_progress`.
+    pub usage: Option<RunStepCompletionUsage>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepCompletionUsage {
+    /// Number of completion tokens used over the course of the run step.
+    pub completion_tokens: u32,
+    /// Number of prompt tokens used over the course of the run step.
+    pub prompt_tokens: u32,
+    /// Total number of tokens used (prompt + completion).
+    pub total_tokens: u32,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum StepDetails {
+    MessageCreation(RunStepDetailsMessageCreationObject),
+    ToolCalls(RunStepDetailsToolCallsObject),
+}
+
+/// Details of the message creation by the run step.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsMessageCreationObject {
+    pub message_creation: MessageCreation,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct MessageCreation {
+    /// The ID of the message that was created by this run step.
+    pub message_id: String,
+}
+
+/// Details of the tool call.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsObject {
+    /// An array of tool calls the run step was involved in. These can be associated with one of three types of tools: `code_interpreter`, `file_search`, or `function`.
+    pub tool_calls: Vec<RunStepDetailsToolCalls>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum RunStepDetailsToolCalls {
+    /// Details of the Code Interpreter tool call the run step was involved in.
+    CodeInterpreter(RunStepDetailsToolCallsCodeObject),
+    FileSearch(RunStepDetailsToolCallsFileSearchObject),
+    Function(RunStepDetailsToolCallsFunctionObject),
+}
+
+/// Code interpreter tool call
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsCodeObject {
+    /// The ID of the tool call.
+    pub id: String,
+
+    /// The Code Interpreter tool call definition.
+    pub code_interpreter: CodeInterpreter,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct CodeInterpreter {
+    /// The input to the Code Interpreter tool call.
+    pub input: String,
+    /// The outputs from the Code Interpreter tool call. Code Interpreter can output one or more items, including text (`logs`) or images (`image`). Each of these are represented by a different object type.
+    pub outputs: Vec<CodeInterpreterOutput>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "lowercase")]
+pub enum CodeInterpreterOutput {
+    /// Code interpreter log output
+    Logs(RunStepDetailsToolCallsCodeOutputLogsObject),
+    /// Code interpreter image output
+    Image(RunStepDetailsToolCallsCodeOutputImageObject),
+}
+
+/// Text output from the Code Interpreter tool call as part of a run step.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsCodeOutputLogsObject {
+    /// The text output from the Code Interpreter tool call.
+    pub logs: String,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsCodeOutputImageObject {
+    /// The [file](https://platform.openai.com/docs/api-reference/files) ID of the image.
+    pub image: ImageFile,
+}
+
+/// File search tool call
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsFileSearchObject {
+    /// The ID of the tool call object.
+    pub id: String,
+    /// For now, this is always going to be an empty object.
+    pub file_search: RunStepDetailsToolCallsFileSearchObjectFileSearch,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsFileSearchObjectFileSearch {
+    pub ranking_options: Option<FileSearchRankingOptions>,
+    /// The results of the file search.
+    pub results: Option<Vec<RunStepDetailsToolCallsFileSearchResultObject>>,
+}
+
+/// A result instance of the file search.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsFileSearchResultObject {
+    /// The ID of the file that result was found in.
+    pub file_id: String,
+    /// The name of the file that result was found in.
+    pub file_name: String,
+    /// The score of the result. All values must be a floating point number between 0 and 1.
+    pub score: f32,
+    /// The content of the result that was found. The content is only included if requested via the include query parameter.
+    pub content: Option<Vec<RunStepDetailsToolCallsFileSearchResultObjectContent>>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsFileSearchResultObjectContent {
+    // note: type is text hence omitted from struct
+    /// The text content of the file.
+    pub text: Option<String>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDetailsToolCallsFunctionObject {
+    /// The ID of the tool call object.
+    pub id: String,
+    /// he definition of the function that was called.
+    pub function: RunStepFunctionObject,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepFunctionObject {
+    /// The name of the function.
+    pub name: String,
+    /// The arguments passed to the function.
+    pub arguments: String,
+    /// The output of the function. This will be `null` if the outputs have not been [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) yet.
+    pub output: Option<String>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepFunctionObjectDelta {
+    /// The name of the function.
+    pub name: Option<String>,
+    /// The arguments passed to the function.
+    pub arguments: Option<String>,
+    /// The output of the function. This will be `null` if the outputs have not been [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs) yet.
+    pub output: Option<String>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct ListRunStepsResponse {
+    pub object: String,
+    pub data: Vec<RunStepObject>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+}
+
+/// Represents a run step delta i.e. any changed fields on a run step during streaming.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaObject {
+    /// The identifier of the run step, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `thread.run.step.delta`.
+    pub object: String,
+    /// The delta containing the fields that have changed on the run step.
+    pub delta: RunStepDelta,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDelta {
+    pub step_details: DeltaStepDetails,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum DeltaStepDetails {
+    MessageCreation(RunStepDeltaStepDetailsMessageCreationObject),
+    ToolCalls(RunStepDeltaStepDetailsToolCallsObject),
+}
+
+/// Details of the message creation by the run step.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsMessageCreationObject {
+    pub message_creation: Option<MessageCreation>,
+}
+
+/// Details of the tool call.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsObject {
+    /// An array of tool calls the run step was involved in. These can be associated with one of three types of tools: `code_interpreter`, `file_search`, or `function`.
+    pub tool_calls: Option<Vec<RunStepDeltaStepDetailsToolCalls>>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum RunStepDeltaStepDetailsToolCalls {
+    CodeInterpreter(RunStepDeltaStepDetailsToolCallsCodeObject),
+    FileSearch(RunStepDeltaStepDetailsToolCallsFileSearchObject),
+    Function(RunStepDeltaStepDetailsToolCallsFunctionObject),
+}
+
+/// Details of the Code Interpreter tool call the run step was involved in.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsCodeObject {
+    /// The index of the tool call in the tool calls array.
+    pub index: u32,
+    /// The ID of the tool call.
+    pub id: Option<String>,
+    /// The Code Interpreter tool call definition.
+    pub code_interpreter: Option<DeltaCodeInterpreter>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct DeltaCodeInterpreter {
+    /// The input to the Code Interpreter tool call.
+    pub input: Option<String>,
+    /// The outputs from the Code Interpreter tool call. Code Interpreter can output one or more items, including text (`logs`) or images (`image`). Each of these are represented by a different object type.
+    pub outputs: Option<Vec<DeltaCodeInterpreterOutput>>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "lowercase")]
+pub enum DeltaCodeInterpreterOutput {
+    Logs(RunStepDeltaStepDetailsToolCallsCodeOutputLogsObject),
+    Image(RunStepDeltaStepDetailsToolCallsCodeOutputImageObject),
+}
+
+/// Text output from the Code Interpreter tool call as part of a run step.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsCodeOutputLogsObject {
+    /// The index of the output in the outputs array.
+    pub index: u32,
+    /// The text output from the Code Interpreter tool call.
+    pub logs: Option<String>,
+}
+
+/// Code interpreter image output
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsCodeOutputImageObject {
+    /// The index of the output in the outputs array.
+    pub index: u32,
+
+    pub image: Option<ImageFile>,
+}
+
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsFileSearchObject {
+    /// The index of the tool call in the tool calls array.
+    pub index: u32,
+    /// The ID of the tool call object.
+    pub id: Option<String>,
+    /// For now, this is always going to be an empty object.
+    pub file_search: Option<serde_json::Value>,
+}
+
+/// Function tool call
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct RunStepDeltaStepDetailsToolCallsFunctionObject {
+    /// The index of the tool call in the tool calls array.
+    pub index: u32,
+    /// The ID of the tool call object.
+    pub id: Option<String>,
+    /// The definition of the function that was called.
+    pub function: Option<RunStepFunctionObjectDelta>,
+}
--- a/lib/async-openai/src/types/thread.rs
+++ b/lib/async-openai/src/types/thread.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use std::collections::HashMap;
+
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use crate::error::OpenAIError;
+
+use super::{
+    AssistantToolResources, AssistantTools, AssistantsApiResponseFormatOption,
+    AssistantsApiToolChoiceOption, CreateAssistantToolResources, CreateMessageRequest,
+    TruncationObject,
+};
+
+/// Represents a thread that contains [messages](https://platform.openai.com/docs/api-reference/messages).
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct ThreadObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `thread`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the thread was created.
+    pub created_at: i32,
+
+    /// A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    pub tool_resources: Option<AssistantToolResources>,
+
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "CreateThreadRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateThreadRequest {
+    /// A list of [messages](https://platform.openai.com/docs/api-reference/messages) to start the thread with.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub messages: Option<Vec<CreateMessageRequest>>,
+
+    /// A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_resources: Option<CreateAssistantToolResources>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct ModifyThreadRequest {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+
+    /// A set of resources that are made available to the assistant's tools in this thread. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_resources: Option<AssistantToolResources>,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct DeleteThreadResponse {
+    pub id: String,
+    pub deleted: bool,
+    pub object: String,
+}
+
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "CreateThreadAndRunRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateThreadAndRunRequest {
+    /// The ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to execute this run.
+    pub assistant_id: String,
+
+    /// If no thread is provided, an empty thread will be created.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thread: Option<CreateThreadRequest>,
+
+    /// The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to be used to execute this run. If a value is provided here, it will override the model associated with the assistant. If not, the model associated with the assistant will be used.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+
+    /// Override the default system message of the assistant. This is useful for modifying the behavior on a per-run basis.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+
+    /// Override the tools the assistant can use for this run. This is useful for modifying the behavior on a per-run basis.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<AssistantTools>>,
+
+    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_resources: Option<AssistantToolResources>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+
+    ///  An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    ///
+    /// We generally recommend altering this or temperature but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+
+    /// If `true`, returns a stream of events that happen during the Run as server-sent events, terminating when the Run enters a terminal state with a `data: [DONE]` message.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub stream: Option<bool>,
+
+    /// The maximum number of prompt tokens that may be used over the course of the run. The run will make a best effort to use only the number of prompt tokens specified, across multiple turns of the run. If the run exceeds the number of prompt tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_prompt_tokens: Option<u32>,
+
+    /// The maximum number of completion tokens that may be used over the course of the run. The run will make a best effort to use only the number of completion tokens specified, across multiple turns of the run. If the run exceeds the number of completion tokens specified, the run will end with status `incomplete`. See `incomplete_details` for more info.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_completion_tokens: Option<u32>,
+
+    /// Controls for how a thread will be truncated prior to the run. Use this to control the intial context window of the run.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub truncation_strategy: Option<TruncationObject>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_choice: Option<AssistantsApiToolChoiceOption>,
+
+    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub parallel_tool_calls: Option<bool>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
--- a/lib/async-openai/src/types/upload.rs
+++ b/lib/async-openai/src/types/upload.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use crate::error::OpenAIError;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use super::{InputSource, OpenAIFile};
+
+/// Request to create an upload object that can accept byte chunks in the form of Parts.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "CreateUploadRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateUploadRequest {
+    /// The name of the file to upload.
+    pub filename: String,
+
+    /// The intended purpose of the uploaded file.
+    ///
+    /// See the [documentation on File purposes](https://platform.openai.com/docs/api-reference/files/create#files-create-purpose).
+    pub purpose: UploadPurpose,
+
+    /// The number of bytes in the file you are uploading.
+    pub bytes: u64,
+
+    /// The MIME type of the file.
+    ///
+    /// This must fall within the supported MIME types for your file purpose. See the supported MIME
+    /// types for assistants and vision.
+    pub mime_type: String,
+}
+
+/// The intended purpose of the uploaded file.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum UploadPurpose {
+    /// For use with Assistants and Message files
+    Assistants,
+    /// For Assistants image file inputs
+    Vision,
+    /// For use with the Batch API
+    Batch,
+    /// For use with Fine-tuning
+    #[default]
+    FineTune,
+}
+
+/// The Upload object can accept byte chunks in the form of Parts.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct Upload {
+    /// The Upload unique identifier, which can be referenced in API endpoints
+    pub id: String,
+
+    /// The Unix timestamp (in seconds) for when the Upload was created
+    pub created_at: u32,
+
+    /// The name of the file to be uploaded
+    pub filename: String,
+
+    /// The intended number of bytes to be uploaded
+    pub bytes: u64,
+
+    /// The intended purpose of the file. [Pelase refer here]([Please refer here](/docs/api-reference/files/object#files/object-purpose) for acceptable values.)
+    pub purpose: UploadPurpose,
+
+    /// The status of the Upload.
+    pub status: UploadStatus,
+
+    /// The Unix timestamp (in seconds) for when the Upload was created
+    pub expires_at: u32,
+
+    /// The object type, which is always "upload"
+    pub object: String,
+
+    /// The ready File object after the Upload is completed
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file: Option<OpenAIFile>,
+}
+
+/// The status of an upload
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum UploadStatus {
+    /// Upload is pending
+    Pending,
+    /// Upload has completed successfully
+    Completed,
+    /// Upload was cancelled
+    Cancelled,
+    /// Upload has expired
+    Expired,
+}
+
+/// The upload Part represents a chunk of bytes we can add to an Upload object.
+#[derive(Debug, Serialize, Deserialize)]
+pub struct UploadPart {
+    /// The upload Part unique identifier, which can be referenced in API endpoints
+    pub id: String,
+
+    /// The Unix timestamp (in seconds) for when the Part was created
+    pub created_at: u32,
+
+    /// The ID of the Upload object that this Part was added to
+    pub upload_id: String,
+
+    /// The object type, which is always `upload.part`
+    pub object: String,
+}
+
+/// Request parameters for adding a part to an Upload
+#[derive(Debug, Clone)]
+pub struct AddUploadPartRequest {
+    /// The chunk of bytes for this Part
+    pub data: InputSource,
+}
+
+/// Request parameters for completing an Upload
+#[derive(Debug, Serialize)]
+pub struct CompleteUploadRequest {
+    /// The ordered list of Part IDs
+    pub part_ids: Vec<String>,
+
+    /// The optional md5 checksum for the file contents to verify if the bytes uploaded matches what you expect
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub md5: Option<String>,
+}
--- a/lib/async-openai/src/types/users.rs
+++ b/lib/async-openai/src/types/users.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use crate::types::OpenAIError;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use super::OrganizationRole;
+
+/// Represents an individual `user` within an organization.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct User {
+    /// The object type, which is always `organization.user`
+    pub object: String,
+    /// The identifier, which can be referenced in API endpoints
+    pub id: String,
+    /// The name of the user
+    pub name: String,
+    /// The email address of the user
+    pub email: String,
+    /// `owner` or `reader`
+    pub role: OrganizationRole,
+    /// The Unix timestamp (in seconds) of when the users was added.
+    pub added_at: u32,
+}
+
+/// A list of `User` objects.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct UserListResponse {
+    pub object: String,
+    pub data: Vec<User>,
+    pub first_id: String,
+    pub last_id: String,
+    pub has_more: bool,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Builder)]
+#[builder(name = "UserRoleUpdateRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option))]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct UserRoleUpdateRequest {
+    /// `owner` or `reader`
+    pub role: OrganizationRole,
+}
+
+/// Confirmation of the deleted user
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct UserDeleteResponse {
+    pub object: String,
+    pub id: String,
+    pub deleted: bool,
+}
--- a/lib/async-openai/src/types/vector_store.rs
+++ b/lib/async-openai/src/types/vector_store.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use std::collections::HashMap;
+
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+
+use crate::error::OpenAIError;
+
+use super::StaticChunkingStrategy;
+
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[builder(name = "CreateVectorStoreRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateVectorStoreRequest {
+    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_ids: Option<Vec<String>>,
+    /// The name of the vector store.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+
+    /// The expiration policy for a vector store.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub expires_after: Option<VectorStoreExpirationAfter>,
+
+    /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy. Only applicable if `file_ids` is non-empty.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[serde(tag = "type")]
+pub enum VectorStoreChunkingStrategy {
+    /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    #[default]
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "static")]
+    Static {
+        #[serde(rename = "static")]
+        config: StaticChunkingStrategy,
+    },
+}
+
+/// Vector store expiration policy
+#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+pub struct VectorStoreExpirationAfter {
+    /// Anchor timestamp after which the expiration policy applies. Supported anchors: `last_active_at`.
+    pub anchor: String,
+    /// The number of days after the anchor time that the vector store will expire.
+    pub days: u16, // min: 1, max: 365
+}
+
+/// A vector store is a collection of processed files can be used by the `file_search` tool.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `vector_store`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the vector store was created.
+    pub created_at: u32,
+    /// The name of the vector store.
+    pub name: Option<String>,
+    /// The total number of bytes used by the files in the vector store.
+    pub usage_bytes: u64,
+    pub file_counts: VectorStoreFileCounts,
+    /// The status of the vector store, which can be either `expired`, `in_progress`, or `completed`. A status of `completed` indicates that the vector store is ready for use.
+    pub status: VectorStoreStatus,
+    pub expires_after: Option<VectorStoreExpirationAfter>,
+    /// The Unix timestamp (in seconds) for when the vector store will expire.
+    pub expires_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the vector store was last active.
+    pub last_active_at: Option<u32>,
+
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VectorStoreStatus {
+    Expired,
+    InProgress,
+    Completed,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileCounts {
+    /// The number of files that are currently being processed.
+    pub in_progress: u32,
+    /// The number of files that have been successfully processed.
+    pub completed: u32,
+    /// The number of files that have failed to process.
+    pub failed: u32,
+    /// The number of files that were cancelled.
+    pub cancelled: u32,
+    /// The total number of files.
+    pub total: u32,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct ListVectorStoresResponse {
+    pub object: String,
+    pub data: Vec<VectorStoreObject>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct DeleteVectorStoreResponse {
+    pub id: String,
+    pub object: String,
+    pub deleted: bool,
+}
+
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[builder(name = "UpdateVectorStoreRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct UpdateVectorStoreRequest {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub expires_after: Option<VectorStoreExpirationAfter>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct ListVectorStoreFilesResponse {
+    pub object: String,
+    pub data: Vec<VectorStoreFileObject>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `vector_store.file`.
+    pub object: String,
+    /// The total vector store usage in bytes. Note that this may be different from the original file size.
+    pub usage_bytes: u64,
+    /// The Unix timestamp (in seconds) for when the vector store file was created.
+    pub created_at: u32,
+    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
+    pub vector_store_id: String,
+    /// The status of the vector store file, which can be either `in_progress`, `completed`, `cancelled`, or `failed`. The status `completed` indicates that the vector store file is ready for use.
+    pub status: VectorStoreFileStatus,
+    /// The last error associated with this vector store file. Will be `null` if there are no errors.
+    pub last_error: Option<VectorStoreFileError>,
+    /// The strategy used to chunk the file.
+    pub chunking_strategy: Option<VectorStoreFileObjectChunkingStrategy>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VectorStoreFileStatus {
+    InProgress,
+    Completed,
+    Cancelled,
+    Failed,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileError {
+    pub code: VectorStoreFileErrorCode,
+    /// A human-readable description of the error.
+    pub message: String,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VectorStoreFileErrorCode {
+    ServerError,
+    UnsupportedFile,
+    InvalidFile,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(tag = "type")]
+#[serde(rename_all = "lowercase")]
+pub enum VectorStoreFileObjectChunkingStrategy {
+    /// This is returned when the chunking strategy is unknown. Typically, this is because the file was indexed before the `chunking_strategy` concept was introduced in the API.
+    Other,
+    Static {
+        r#static: StaticChunkingStrategy,
+    },
+}
+
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[builder(name = "CreateVectorStoreFileRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateVectorStoreFileRequest {
+    /// A [File](https://platform.openai.com/docs/api-reference/files) ID that the vector store should use. Useful for tools like `file_search` that can access files.
+    pub file_id: String,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub attributes: Option<HashMap<String, AttributeValue>>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct DeleteVectorStoreFileResponse {
+    pub id: String,
+    pub object: String,
+    pub deleted: bool,
+}
+
+#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
+#[builder(name = "CreateVectorStoreFileBatchRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateVectorStoreFileBatchRequest {
+    /// A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that the vector store should use. Useful for tools like `file_search` that can access files.
+    pub file_ids: Vec<String>, // minItems: 1, maxItems: 500
+    pub chunking_strategy: Option<VectorStoreChunkingStrategy>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum VectorStoreFileBatchStatus {
+    InProgress,
+    Completed,
+    Cancelled,
+    Failed,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileBatchCounts {
+    /// The number of files that are currently being processed.
+    pub in_progress: u32,
+    /// The number of files that have been successfully processed.
+    pub completed: u32,
+    /// The number of files that have failed to process.
+    pub failed: u32,
+    /// The number of files that were cancelled.
+    pub cancelled: u32,
+    /// The total number of files.
+    pub total: u32,
+}
+
+///  A batch of files attached to a vector store.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileBatchObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `vector_store.file_batch`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the vector store files batch was created.
+    pub created_at: u32,
+    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) that the [File](https://platform.openai.com/docs/api-reference/files) is attached to.
+    pub vector_store_id: String,
+    /// The status of the vector store files batch, which can be either `in_progress`, `completed`, `cancelled` or `failed`.
+    pub status: VectorStoreFileBatchStatus,
+    pub file_counts: VectorStoreFileBatchCounts,
+}
+
+/// Represents the parsed content of a vector store file.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileContentResponse {
+    /// The object type, which is always `vector_store.file_content.page`
+    pub object: String,
+
+    /// Parsed content of the file.
+    pub data: Vec<VectorStoreFileContentObject>,
+
+    /// Indicates if there are more content pages to fetch.
+    pub has_more: bool,
+
+    /// The token for the next page, if any.
+    pub next_page: Option<String>,
+}
+
+/// Represents the parsed content of a vector store file.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreFileContentObject {
+    /// The content type (currently only `"text"`)
+    pub r#type: String,
+
+    /// The text content
+    pub text: String,
+}
+
+#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
+#[builder(name = "VectorStoreSearchRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct VectorStoreSearchRequest {
+    /// A query string for a search.
+    pub query: VectorStoreSearchQuery,
+
+    /// Whether to rewrite the natural language query for vector search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub rewrite_query: Option<bool>,
+
+    /// The maximum number of results to return. This number should be between 1 and 50 inclusive.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub max_num_results: Option<u8>,
+
+    /// A filter to apply based on file attributes.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub filters: Option<VectorStoreSearchFilter>,
+
+    /// Ranking options for search.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ranking_options: Option<RankingOptions>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum VectorStoreSearchQuery {
+    /// A single query to search for.
+    Text(String),
+    /// A list of queries to search for.
+    Array(Vec<String>),
+}
+
+impl Default for VectorStoreSearchQuery {
+    fn default() -> Self {
+        Self::Text(String::new())
+    }
+}
+
+impl From<String> for VectorStoreSearchQuery {
+    fn from(query: String) -> Self {
+        Self::Text(query)
+    }
+}
+
+impl From<&str> for VectorStoreSearchQuery {
+    fn from(query: &str) -> Self {
+        Self::Text(query.to_string())
+    }
+}
+
+impl From<Vec<String>> for VectorStoreSearchQuery {
+    fn from(query: Vec<String>) -> Self {
+        Self::Array(query)
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum VectorStoreSearchFilter {
+    Comparison(ComparisonFilter),
+    Compound(CompoundFilter),
+}
+
+impl From<ComparisonFilter> for VectorStoreSearchFilter {
+    fn from(filter: ComparisonFilter) -> Self {
+        Self::Comparison(filter)
+    }
+}
+
+impl From<CompoundFilter> for VectorStoreSearchFilter {
+    fn from(filter: CompoundFilter) -> Self {
+        Self::Compound(filter)
+    }
+}
+
+/// A filter used to compare a specified attribute key to a given value using a defined comparison operation.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct ComparisonFilter {
+    /// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+    pub r#type: ComparisonType,
+
+    /// The key to compare against the value.
+    pub key: String,
+
+    /// The value to compare against the attribute key; supports string, number, or boolean types.
+    pub value: AttributeValue,
+}
+
+/// Specifies the comparison operator: `eq`, `ne`, `gt`, `gte`, `lt`, `lte`.
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ComparisonType {
+    Eq,
+    Ne,
+    Gt,
+    Gte,
+    Lt,
+    Lte,
+}
+
+/// The value to compare against the attribute key; supports string, number, or boolean types.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum AttributeValue {
+    String(String),
+    Number(i64),
+    Boolean(bool),
+}
+
+impl From<String> for AttributeValue {
+    fn from(value: String) -> Self {
+        Self::String(value)
+    }
+}
+
+impl From<i64> for AttributeValue {
+    fn from(value: i64) -> Self {
+        Self::Number(value)
+    }
+}
+
+impl From<bool> for AttributeValue {
+    fn from(value: bool) -> Self {
+        Self::Boolean(value)
+    }
+}
+
+impl From<&str> for AttributeValue {
+    fn from(value: &str) -> Self {
+        Self::String(value.to_string())
+    }
+}
+
+/// Ranking options for search.
+#[derive(Debug, Serialize, Default, Deserialize, Clone, PartialEq)]
+pub struct RankingOptions {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ranker: Option<Ranker>,
+
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub score_threshold: Option<f32>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub enum Ranker {
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "default-2024-11-15")]
+    Default20241115,
+}
+
+/// Combine multiple filters using `and` or `or`.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub struct CompoundFilter {
+    /// Type of operation: `and` or `or`.
+    pub r#type: CompoundFilterType,
+
+    /// Array of filters to combine. Items can be `ComparisonFilter` or `CompoundFilter`
+    pub filters: Vec<VectorStoreSearchFilter>,
+}
+
+/// Type of operation: `and` or `or`.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum CompoundFilterType {
+    And,
+    Or,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreSearchResultsPage {
+    /// The object type, which is always `vector_store.search_results.page`.
+    pub object: String,
+
+    /// The query used for this search.
+    pub search_query: Vec<String>,
+
+    /// The list of search result items.
+    pub data: Vec<VectorStoreSearchResultItem>,
+
+    /// Indicates if there are more results to fetch.
+    pub has_more: bool,
+
+    /// The token for the next page, if any.
+    pub next_page: Option<String>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreSearchResultItem {
+    /// The ID of the vector store file.
+    pub file_id: String,
+
+    /// The name of the vector store file.
+    pub filename: String,
+
+    /// The similarity score for the result.
+    pub score: f32, // minimum: 0, maximum: 1
+
+    /// Attributes of the vector store file.
+    pub attributes: HashMap<String, AttributeValue>,
+
+    /// Content chunks from the file.
+    pub content: Vec<VectorStoreSearchResultContentObject>,
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct VectorStoreSearchResultContentObject {
+    /// The type of content
+    pub r#type: String,
+
+    /// The text content returned from search.
+    pub text: String,
+}
--- a/lib/async-openai/src/uploads.rs
+++ b/lib/async-openai/src/uploads.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{AddUploadPartRequest, CompleteUploadRequest, CreateUploadRequest, Upload, UploadPart},
+    Client,
+};
+
+/// Allows you to upload large files in multiple parts.
+pub struct Uploads<'c, C: Config> {
+    client: &'c Client<C>,
+}
+
+impl<'c, C: Config> Uploads<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+
+    /// Creates an intermediate [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object that
+    /// you can add [Parts](https://platform.openai.com/docs/api-reference/uploads/part-object) to. Currently,
+    /// an Upload can accept at most 8 GB in total and expires after an hour after you create it.
+    ///
+    /// Once you complete the Upload, we will create a [File](https://platform.openai.com/docs/api-reference/files/object)
+    /// object that contains all the parts you uploaded. This File is usable in the rest of our platform as a regular File object.
+    ///
+    /// For certain `purpose`s, the correct `mime_type` must be specified. Please refer to documentation for the
+    /// supported MIME types for your use case:
+    /// - [Assistants](https://platform.openai.com/docs/assistants/tools/file-search/supported-files)
+    ///
+    /// For guidance on the proper filename extensions for each purpose, please follow the documentation on
+    /// [creating a File](https://platform.openai.com/docs/api-reference/files/create).
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(&self, request: CreateUploadRequest) -> Result<Upload, OpenAIError> {
+        self.client.post("/uploads", request).await
+    }
+
+    /// Adds a [Part](https://platform.openai.com/docs/api-reference/uploads/part-object) to an
+    /// [Upload](https://platform.openai.com/docs/api-reference/uploads/object) object.
+    /// A Part represents a chunk of bytes from the file you are trying to upload.
+    ///
+    /// Each Part can be at most 64 MB, and you can add Parts until you hit the Upload maximum of 8 GB.
+    ///
+    /// It is possible to add multiple Parts in parallel. You can decide the intended order of the Parts
+    /// when you [complete the Upload](https://platform.openai.com/docs/api-reference/uploads/complete).
+    #[crate::byot(
+        T0 = std::fmt::Display,
+        T1 = Clone,
+        R = serde::de::DeserializeOwned,
+        where_clause =  "reqwest::multipart::Form: crate::traits::AsyncTryFrom<T1, Error = OpenAIError>")]
+    pub async fn add_part(
+        &self,
+        upload_id: &str,
+        request: AddUploadPartRequest,
+    ) -> Result<UploadPart, OpenAIError> {
+        self.client
+            .post_form(&format!("/uploads/{upload_id}/parts"), request)
+            .await
+    }
+
+    /// Completes the [Upload](https://platform.openai.com/docs/api-reference/uploads/object).
+    ///
+    /// Within the returned Upload object, there is a nested [File](https://platform.openai.com/docs/api-reference/files/object)
+    /// object that is ready to use in the rest of the platform.
+    ///
+    /// You can specify the order of the Parts by passing in an ordered list of the Part IDs.
+    ///
+    /// The number of bytes uploaded upon completion must match the number of bytes initially specified
+    /// when creating the Upload object. No Parts may be added after an Upload is completed.
+
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn complete(
+        &self,
+        upload_id: &str,
+        request: CompleteUploadRequest,
+    ) -> Result<Upload, OpenAIError> {
+        self.client
+            .post(&format!("/uploads/{upload_id}/complete"), request)
+            .await
+    }
+
+    /// Cancels the Upload. No Parts may be added after an Upload is cancelled.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn cancel(&self, upload_id: &str) -> Result<Upload, OpenAIError> {
+        self.client
+            .post(
+                &format!("/uploads/{upload_id}/cancel"),
+                serde_json::json!({}),
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/users.rs
+++ b/lib/async-openai/src/users.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::Serialize;
+
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{User, UserDeleteResponse, UserListResponse, UserRoleUpdateRequest},
+    Client,
+};
+
+/// Manage users and their role in an organization. Users will be automatically added to the Default project.
+pub struct Users<'c, C: Config> {
+    client: &'c Client<C>,
+}
+
+impl<'c, C: Config> Users<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+
+    /// Lists all of the users in the organization.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<UserListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query("/organization/users", &query)
+            .await
+    }
+
+    /// Modifies a user's role in the organization.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn modify(
+        &self,
+        user_id: &str,
+        request: UserRoleUpdateRequest,
+    ) -> Result<User, OpenAIError> {
+        self.client
+            .post(format!("/organization/users/{user_id}").as_str(), request)
+            .await
+    }
+
+    /// Retrieve a user by their identifier
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, user_id: &str) -> Result<User, OpenAIError> {
+        self.client
+            .get(format!("/organization/users/{user_id}").as_str())
+            .await
+    }
+
+    /// Deletes a user from the organization.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, user_id: &str) -> Result<UserDeleteResponse, OpenAIError> {
+        self.client
+            .delete(format!("/organizations/users/{user_id}").as_str())
+            .await
+    }
+}
--- a/lib/async-openai/src/util.rs
+++ b/lib/async-openai/src/util.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use std::path::Path;
+
+use reqwest::Body;
+use tokio::fs::File;
+use tokio_util::codec::{BytesCodec, FramedRead};
+
+use crate::error::OpenAIError;
+use crate::types::InputSource;
+
+pub(crate) async fn file_stream_body(source: InputSource) -> Result<Body, OpenAIError> {
+    let body = match source {
+        InputSource::Path { path } => {
+            let file = File::open(path)
+                .await
+                .map_err(|e| OpenAIError::FileReadError(e.to_string()))?;
+            let stream = FramedRead::new(file, BytesCodec::new());
+            Body::wrap_stream(stream)
+        }
+        _ => {
+            return Err(OpenAIError::FileReadError(
+                "Cannot create stream from non-file source".to_string(),
+            ))
+        }
+    };
+    Ok(body)
+}
+
+/// Creates the part for the given file for multipart upload.
+pub(crate) async fn create_file_part(
+    source: InputSource,
+) -> Result<reqwest::multipart::Part, OpenAIError> {
+    let (stream, file_name) = match source {
+        InputSource::Path { path } => {
+            let file_name = path
+                .file_name()
+                .ok_or_else(|| {
+                    OpenAIError::FileReadError(format!(
+                        "cannot extract file name from {}",
+                        path.display()
+                    ))
+                })?
+                .to_str()
+                .unwrap()
+                .to_string();
+
+            (
+                file_stream_body(InputSource::Path { path }).await?,
+                file_name,
+            )
+        }
+        InputSource::Bytes { filename, bytes } => (Body::from(bytes), filename),
+        InputSource::VecU8 { filename, vec } => (Body::from(vec), filename),
+    };
+
+    let file_part = reqwest::multipart::Part::stream(stream).file_name(file_name);
+
+    Ok(file_part)
+}
+
+pub(crate) fn create_all_dir<P: AsRef<Path>>(dir: P) -> Result<(), OpenAIError> {
+    let exists = match Path::try_exists(dir.as_ref()) {
+        Ok(exists) => exists,
+        Err(e) => return Err(OpenAIError::FileSaveError(e.to_string())),
+    };
+
+    if !exists {
+        std::fs::create_dir_all(dir.as_ref())
+            .map_err(|e| OpenAIError::FileSaveError(e.to_string()))?;
+    }
+
+    Ok(())
+}
--- a/lib/async-openai/src/vector_store_file_batches.rs
+++ b/lib/async-openai/src/vector_store_file_batches.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::Serialize;
+
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        CreateVectorStoreFileBatchRequest, ListVectorStoreFilesResponse, VectorStoreFileBatchObject,
+    },
+    Client,
+};
+
+/// Vector store file batches represent operations to add multiple files to a vector store.
+///
+/// Related guide: [File Search](https://platform.openai.com/docs/assistants/tools/file-search)
+pub struct VectorStoreFileBatches<'c, C: Config> {
+    client: &'c Client<C>,
+    pub vector_store_id: String,
+}
+
+impl<'c, C: Config> VectorStoreFileBatches<'c, C> {
+    pub fn new(client: &'c Client<C>, vector_store_id: &str) -> Self {
+        Self {
+            client,
+            vector_store_id: vector_store_id.into(),
+        }
+    }
+
+    /// Create vector store file batch
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: CreateVectorStoreFileBatchRequest,
+    ) -> Result<VectorStoreFileBatchObject, OpenAIError> {
+        self.client
+            .post(
+                &format!("/vector_stores/{}/file_batches", &self.vector_store_id),
+                request,
+            )
+            .await
+    }
+
+    /// Retrieves a vector store file batch.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(
+        &self,
+        batch_id: &str,
+    ) -> Result<VectorStoreFileBatchObject, OpenAIError> {
+        self.client
+            .get(&format!(
+                "/vector_stores/{}/file_batches/{batch_id}",
+                &self.vector_store_id
+            ))
+            .await
+    }
+
+    /// Cancel a vector store file batch. This attempts to cancel the processing of files in this batch as soon as possible.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn cancel(&self, batch_id: &str) -> Result<VectorStoreFileBatchObject, OpenAIError> {
+        self.client
+            .post(
+                &format!(
+                    "/vector_stores/{}/file_batches/{batch_id}/cancel",
+                    &self.vector_store_id
+                ),
+                serde_json::json!({}),
+            )
+            .await
+    }
+
+    /// Returns a list of vector store files in a batch.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(
+        &self,
+        batch_id: &str,
+        query: &Q,
+    ) -> Result<ListVectorStoreFilesResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                &format!(
+                    "/vector_stores/{}/file_batches/{batch_id}/files",
+                    &self.vector_store_id
+                ),
+                &query,
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/vector_store_files.rs
+++ b/lib/async-openai/src/vector_store_files.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::Serialize;
+
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        CreateVectorStoreFileRequest, DeleteVectorStoreFileResponse, ListVectorStoreFilesResponse,
+        VectorStoreFileContentResponse, VectorStoreFileObject,
+    },
+    Client,
+};
+
+/// Vector store files represent files inside a vector store.
+///
+/// Related guide: [File Search](https://platform.openai.com/docs/assistants/tools/file-search)
+pub struct VectorStoreFiles<'c, C: Config> {
+    client: &'c Client<C>,
+    pub vector_store_id: String,
+}
+
+impl<'c, C: Config> VectorStoreFiles<'c, C> {
+    pub fn new(client: &'c Client<C>, vector_store_id: &str) -> Self {
+        Self {
+            client,
+            vector_store_id: vector_store_id.into(),
+        }
+    }
+
+    /// Create a vector store file by attaching a [File](https://platform.openai.com/docs/api-reference/files) to a [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object).
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: CreateVectorStoreFileRequest,
+    ) -> Result<VectorStoreFileObject, OpenAIError> {
+        self.client
+            .post(
+                &format!("/vector_stores/{}/files", &self.vector_store_id),
+                request,
+            )
+            .await
+    }
+
+    /// Retrieves a vector store file.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, file_id: &str) -> Result<VectorStoreFileObject, OpenAIError> {
+        self.client
+            .get(&format!(
+                "/vector_stores/{}/files/{file_id}",
+                &self.vector_store_id
+            ))
+            .await
+    }
+
+    /// Delete a vector store file. This will remove the file from the vector store but the file itself will not be deleted. To delete the file, use the [delete file](https://platform.openai.com/docs/api-reference/files/delete) endpoint.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(
+        &self,
+        file_id: &str,
+    ) -> Result<DeleteVectorStoreFileResponse, OpenAIError> {
+        self.client
+            .delete(&format!(
+                "/vector_stores/{}/files/{file_id}",
+                &self.vector_store_id
+            ))
+            .await
+    }
+
+    /// Returns a list of vector store files.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ListVectorStoreFilesResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                &format!("/vector_stores/{}/files", &self.vector_store_id),
+                &query,
+            )
+            .await
+    }
+
+    /// Retrieve the parsed contents of a vector store file.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve_file_content(
+        &self,
+        file_id: &str,
+    ) -> Result<VectorStoreFileContentResponse, OpenAIError> {
+        self.client
+            .get(&format!(
+                "/vector_stores/{}/files/{file_id}/content",
+                &self.vector_store_id
+            ))
+            .await
+    }
+}
--- a/lib/async-openai/src/vector_stores.rs
+++ b/lib/async-openai/src/vector_stores.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+
+use serde::Serialize;
+
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        CreateVectorStoreRequest, DeleteVectorStoreResponse, ListVectorStoresResponse,
+        UpdateVectorStoreRequest, VectorStoreObject, VectorStoreSearchRequest,
+        VectorStoreSearchResultsPage,
+    },
+    vector_store_file_batches::VectorStoreFileBatches,
+    Client, VectorStoreFiles,
+};
+
+pub struct VectorStores<'c, C: Config> {
+    client: &'c Client<C>,
+}
+
+impl<'c, C: Config> VectorStores<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+
+    /// [VectorStoreFiles] API group
+    pub fn files(&self, vector_store_id: &str) -> VectorStoreFiles<C> {
+        VectorStoreFiles::new(self.client, vector_store_id)
+    }
+
+    /// [VectorStoreFileBatches] API group
+    pub fn file_batches(&self, vector_store_id: &str) -> VectorStoreFileBatches<C> {
+        VectorStoreFileBatches::new(self.client, vector_store_id)
+    }
+
+    /// Create a vector store.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: CreateVectorStoreRequest,
+    ) -> Result<VectorStoreObject, OpenAIError> {
+        self.client.post("/vector_stores", request).await
+    }
+
+    /// Retrieves a vector store.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, vector_store_id: &str) -> Result<VectorStoreObject, OpenAIError> {
+        self.client
+            .get(&format!("/vector_stores/{vector_store_id}"))
+            .await
+    }
+
+    /// Returns a list of vector stores.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ListVectorStoresResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client.get_with_query("/vector_stores", &query).await
+    }
+
+    /// Delete a vector store.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(
+        &self,
+        vector_store_id: &str,
+    ) -> Result<DeleteVectorStoreResponse, OpenAIError> {
+        self.client
+            .delete(&format!("/vector_stores/{vector_store_id}"))
+            .await
+    }
+
+    /// Modifies a vector store.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn update(
+        &self,
+        vector_store_id: &str,
+        request: UpdateVectorStoreRequest,
+    ) -> Result<VectorStoreObject, OpenAIError> {
+        self.client
+            .post(&format!("/vector_stores/{vector_store_id}"), request)
+            .await
+    }
+
+    /// Searches a vector store.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn search(
+        &self,
+        vector_store_id: &str,
+        request: VectorStoreSearchRequest,
+    ) -> Result<VectorStoreSearchResultsPage, OpenAIError> {
+        self.client
+            .post(&format!("/vector_stores/{vector_store_id}/search"), request)
+            .await
+    }
+}