chore: Bring async-openai into repo as request starter (#2520)

Co-authored-by: Graham King <grahamk@nvidia.com>

chore: Bring async-openai into repo as request starter (#2520)
Co-authored-by: Graham King <grahamk@nvidia.com>
199b9a30 · nachiketb-nvidia · GitHub · 26d9f159 · 199b9a30 · 199b9a30
Unverified Commit 199b9a30 authored Aug 19, 2025 by nachiketb-nvidia Committed by GitHub Aug 19, 2025
20 changed files
--- a/lib/async-openai/src/invites.rs
+++ b/lib/async-openai/src/invites.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{Invite, InviteDeleteResponse, InviteListResponse, InviteRequest},
+    Client,
+};
+/// Invite and manage invitations for an organization. Invited users are automatically added to the Default project.
+pub struct Invites<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Invites<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    /// Returns a list of invites in the organization.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<InviteListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query("/organization/invites", &query)
+            .await
+    }
+    /// Retrieves an invite.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, invite_id: &str) -> Result<Invite, OpenAIError> {
+        self.client
+            .get(format!("/organization/invites/{invite_id}").as_str())
+            .await
+    }
+    /// Create an invite for a user to the organization. The invite must be accepted by the user before they have access to the organization.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(&self, request: InviteRequest) -> Result<Invite, OpenAIError> {
+        self.client.post("/organization/invites", request).await
+    }
+    /// Delete an invite. If the invite has already been accepted, it cannot be deleted.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, invite_id: &str) -> Result<InviteDeleteResponse, OpenAIError> {
+        self.client
+            .delete(format!("/organization/invites/{invite_id}").as_str())
+            .await
+    }
+}
--- a/lib/async-openai/src/lib.rs
+++ b/lib/async-openai/src/lib.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+//! ## Examples
+//! For full working examples for all supported features see [examples](https://github.com/64bit/async-openai/tree/main/examples) directory in the repository.
+//!
+#![allow(deprecated)]
+#![allow(warnings)]
+#![cfg_attr(docsrs, feature(doc_cfg))]
+#[cfg(feature = "byot")]
+pub(crate) use async_openai_macros::byot;
+#[cfg(not(feature = "byot"))]
+pub(crate) use async_openai_macros::byot_passthrough as byot;
+mod assistants;
+mod audio;
+mod audit_logs;
+mod batches;
+mod chat;
+mod client;
+mod completion;
+pub mod config;
+mod download;
+mod embedding;
+pub mod error;
+mod file;
+mod fine_tuning;
+mod image;
+mod invites;
+mod messages;
+mod model;
+mod moderation;
+mod project_api_keys;
+mod project_service_accounts;
+mod project_users;
+mod projects;
+mod responses;
+mod runs;
+mod steps;
+mod threads;
+pub mod traits;
+pub mod types;
+mod uploads;
+mod users;
+mod util;
+mod vector_store_file_batches;
+mod vector_store_files;
+mod vector_stores;
+pub use assistants::Assistants;
+pub use audio::Audio;
+pub use audit_logs::AuditLogs;
+pub use batches::Batches;
+pub use chat::Chat;
+pub use client::Client;
+pub use completion::Completions;
+pub use embedding::Embeddings;
+pub use file::Files;
+pub use fine_tuning::FineTuning;
+pub use image::Images;
+pub use invites::Invites;
+pub use messages::Messages;
+pub use model::Models;
+pub use moderation::Moderations;
+pub use project_api_keys::ProjectAPIKeys;
+pub use project_service_accounts::ProjectServiceAccounts;
+pub use project_users::ProjectUsers;
+pub use projects::Projects;
+pub use responses::Responses;
+pub use runs::Runs;
+pub use steps::Steps;
+pub use threads::Threads;
+pub use uploads::Uploads;
+pub use users::Users;
+pub use vector_store_file_batches::VectorStoreFileBatches;
+pub use vector_store_files::VectorStoreFiles;
+pub use vector_stores::VectorStores;
--- a/lib/async-openai/src/messages.rs
+++ b/lib/async-openai/src/messages.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        CreateMessageRequest, DeleteMessageResponse, ListMessagesResponse, MessageObject,
+        ModifyMessageRequest,
+    },
+    Client,
+};
+/// Represents a message within a [thread](https://platform.openai.com/docs/api-reference/threads).
+pub struct Messages<'c, C: Config> {
+    ///  The ID of the [thread](https://platform.openai.com/docs/api-reference/threads) to create a message for.
+    pub thread_id: String,
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Messages<'c, C> {
+    pub fn new(client: &'c Client<C>, thread_id: &str) -> Self {
+        Self {
+            client,
+            thread_id: thread_id.into(),
+        }
+    }
+    /// Create a message.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: CreateMessageRequest,
+    ) -> Result<MessageObject, OpenAIError> {
+        self.client
+            .post(&format!("/threads/{}/messages", self.thread_id), request)
+            .await
+    }
+    /// Retrieve a message.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, message_id: &str) -> Result<MessageObject, OpenAIError> {
+        self.client
+            .get(&format!(
+                "/threads/{}/messages/{message_id}",
+                self.thread_id
+            ))
+            .await
+    }
+    /// Modifies a message.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn update(
+        &self,
+        message_id: &str,
+        request: ModifyMessageRequest,
+    ) -> Result<MessageObject, OpenAIError> {
+        self.client
+            .post(
+                &format!("/threads/{}/messages/{message_id}", self.thread_id),
+                request,
+            )
+            .await
+    }
+    /// Returns a list of messages for a given thread.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ListMessagesResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(&format!("/threads/{}/messages", self.thread_id), &query)
+            .await
+    }
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, message_id: &str) -> Result<DeleteMessageResponse, OpenAIError> {
+        self.client
+            .delete(&format!(
+                "/threads/{}/messages/{message_id}",
+                self.thread_id
+            ))
+            .await
+    }
+}
--- a/lib/async-openai/src/model.rs
+++ b/lib/async-openai/src/model.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{DeleteModelResponse, ListModelResponse, Model},
+    Client,
+};
+/// List and describe the various models available in the API.
+/// You can refer to the [Models](https://platform.openai.com/docs/models) documentation to understand what
+/// models are available and the differences between them.
+pub struct Models<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Models<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    /// Lists the currently available models, and provides basic information
+    /// about each one such as the owner and availability.
+    #[crate::byot(R = serde::de::DeserializeOwned)]
+    pub async fn list(&self) -> Result<ListModelResponse, OpenAIError> {
+        self.client.get("/models").await
+    }
+    /// Retrieves a model instance, providing basic information about the model
+    /// such as the owner and permissioning.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, id: &str) -> Result<Model, OpenAIError> {
+        self.client.get(format!("/models/{id}").as_str()).await
+    }
+    /// Delete a fine-tuned model. You must have the Owner role in your organization.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, model: &str) -> Result<DeleteModelResponse, OpenAIError> {
+        self.client
+            .delete(format!("/models/{model}").as_str())
+            .await
+    }
+}
--- a/lib/async-openai/src/moderation.rs
+++ b/lib/async-openai/src/moderation.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{CreateModerationRequest, CreateModerationResponse},
+    Client,
+};
+/// Given text and/or image inputs, classifies if those inputs are potentially harmful across several categories.
+///
+/// Related guide: [Moderations](https://platform.openai.com/docs/guides/moderation)
+pub struct Moderations<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Moderations<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    /// Classifies if text and/or image inputs are potentially harmful. Learn
+    /// more in the [moderation guide](https://platform.openai.com/docs/guides/moderation).
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: CreateModerationRequest,
+    ) -> Result<CreateModerationResponse, OpenAIError> {
+        self.client.post("/moderations", request).await
+    }
+}
--- a/lib/async-openai/src/project_api_keys.rs
+++ b/lib/async-openai/src/project_api_keys.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{ProjectApiKey, ProjectApiKeyDeleteResponse, ProjectApiKeyListResponse},
+    Client,
+};
+/// Manage API keys for a given project. Supports listing and deleting keys for users.
+/// This API does not allow issuing keys for users, as users need to authorize themselves to generate keys.
+pub struct ProjectAPIKeys<'c, C: Config> {
+    client: &'c Client<C>,
+    pub project_id: String,
+}
+impl<'c, C: Config> ProjectAPIKeys<'c, C> {
+    pub fn new(client: &'c Client<C>, project_id: &str) -> Self {
+        Self {
+            client,
+            project_id: project_id.into(),
+        }
+    }
+    /// Returns a list of API keys in the project.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ProjectApiKeyListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                format!("/organization/projects/{}/api_keys", self.project_id).as_str(),
+                &query,
+            )
+            .await
+    }
+    /// Retrieves an API key in the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, api_key: &str) -> Result<ProjectApiKey, OpenAIError> {
+        self.client
+            .get(
+                format!(
+                    "/organization/projects/{}/api_keys/{api_key}",
+                    self.project_id
+                )
+                .as_str(),
+            )
+            .await
+    }
+    /// Deletes an API key from the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, api_key: &str) -> Result<ProjectApiKeyDeleteResponse, OpenAIError> {
+        self.client
+            .delete(
+                format!(
+                    "/organization/projects/{}/api_keys/{api_key}",
+                    self.project_id
+                )
+                .as_str(),
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/project_service_accounts.rs
+++ b/lib/async-openai/src/project_service_accounts.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        ProjectServiceAccount, ProjectServiceAccountCreateRequest,
+        ProjectServiceAccountCreateResponse, ProjectServiceAccountDeleteResponse,
+        ProjectServiceAccountListResponse,
+    },
+    Client,
+};
+/// Manage service accounts within a project. A service account is a bot user that is not
+/// associated with a user. If a user leaves an organization, their keys and membership in projects
+/// will no longer work. Service accounts do not have this limitation.
+/// However, service accounts can also be deleted from a project.
+pub struct ProjectServiceAccounts<'c, C: Config> {
+    client: &'c Client<C>,
+    pub project_id: String,
+}
+impl<'c, C: Config> ProjectServiceAccounts<'c, C> {
+    pub fn new(client: &'c Client<C>, project_id: &str) -> Self {
+        Self {
+            client,
+            project_id: project_id.into(),
+        }
+    }
+    /// Returns a list of service accounts in the project.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ProjectServiceAccountListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                format!(
+                    "/organization/projects/{}/service_accounts",
+                    self.project_id
+                )
+                .as_str(),
+                &query,
+            )
+            .await
+    }
+    /// Creates a new service account in the project. This also returns an unredacted API key for the service account.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: ProjectServiceAccountCreateRequest,
+    ) -> Result<ProjectServiceAccountCreateResponse, OpenAIError> {
+        self.client
+            .post(
+                format!(
+                    "/organization/projects/{}/service_accounts",
+                    self.project_id
+                )
+                .as_str(),
+                request,
+            )
+            .await
+    }
+    /// Retrieves a service account in the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(
+        &self,
+        service_account_id: &str,
+    ) -> Result<ProjectServiceAccount, OpenAIError> {
+        self.client
+            .get(
+                format!(
+                    "/organization/projects/{}/service_accounts/{service_account_id}",
+                    self.project_id
+                )
+                .as_str(),
+            )
+            .await
+    }
+    /// Deletes a service account from the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(
+        &self,
+        service_account_id: &str,
+    ) -> Result<ProjectServiceAccountDeleteResponse, OpenAIError> {
+        self.client
+            .delete(
+                format!(
+                    "/organization/projects/{}/service_accounts/{service_account_id}",
+                    self.project_id
+                )
+                .as_str(),
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/project_users.rs
+++ b/lib/async-openai/src/project_users.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        ProjectUser, ProjectUserCreateRequest, ProjectUserDeleteResponse, ProjectUserListResponse,
+        ProjectUserUpdateRequest,
+    },
+    Client,
+};
+/// Manage users within a project, including adding, updating roles, and removing users.
+/// Users cannot be removed from the Default project, unless they are being removed from the organization.
+pub struct ProjectUsers<'c, C: Config> {
+    client: &'c Client<C>,
+    pub project_id: String,
+}
+impl<'c, C: Config> ProjectUsers<'c, C> {
+    pub fn new(client: &'c Client<C>, project_id: &str) -> Self {
+        Self {
+            client,
+            project_id: project_id.into(),
+        }
+    }
+    /// Returns a list of users in the project.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ProjectUserListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                format!("/organization/projects/{}/users", self.project_id).as_str(),
+                &query,
+            )
+            .await
+    }
+    /// Adds a user to the project. Users must already be members of the organization to be added to a project.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(
+        &self,
+        request: ProjectUserCreateRequest,
+    ) -> Result<ProjectUser, OpenAIError> {
+        self.client
+            .post(
+                format!("/organization/projects/{}/users", self.project_id).as_str(),
+                request,
+            )
+            .await
+    }
+    /// Retrieves a user in the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, user_id: &str) -> Result<ProjectUser, OpenAIError> {
+        self.client
+            .get(format!("/organization/projects/{}/users/{user_id}", self.project_id).as_str())
+            .await
+    }
+    /// Modifies a user's role in the project.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn modify(
+        &self,
+        user_id: &str,
+        request: ProjectUserUpdateRequest,
+    ) -> Result<ProjectUser, OpenAIError> {
+        self.client
+            .post(
+                format!("/organization/projects/{}/users/{user_id}", self.project_id).as_str(),
+                request,
+            )
+            .await
+    }
+    /// Deletes a user from the project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, user_id: &str) -> Result<ProjectUserDeleteResponse, OpenAIError> {
+        self.client
+            .delete(format!("/organization/projects/{}/users/{user_id}", self.project_id).as_str())
+            .await
+    }
+}
--- a/lib/async-openai/src/projects.rs
+++ b/lib/async-openai/src/projects.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    project_api_keys::ProjectAPIKeys,
+    types::{Project, ProjectCreateRequest, ProjectListResponse, ProjectUpdateRequest},
+    Client, ProjectServiceAccounts, ProjectUsers,
+};
+/// Manage the projects within an organization includes creation, updating, and archiving or projects.
+/// The Default project cannot be modified or archived.
+pub struct Projects<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Projects<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    // call [ProjectUsers] group APIs
+    pub fn users(&self, project_id: &str) -> ProjectUsers<C> {
+        ProjectUsers::new(self.client, project_id)
+    }
+    // call [ProjectServiceAccounts] group APIs
+    pub fn service_accounts(&self, project_id: &str) -> ProjectServiceAccounts<C> {
+        ProjectServiceAccounts::new(self.client, project_id)
+    }
+    // call [ProjectAPIKeys] group APIs
+    pub fn api_keys(&self, project_id: &str) -> ProjectAPIKeys<C> {
+        ProjectAPIKeys::new(self.client, project_id)
+    }
+    /// Returns a list of projects.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ProjectListResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query("/organization/projects", &query)
+            .await
+    }
+    /// Create a new project in the organization. Projects can be created and archived, but cannot be deleted.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(&self, request: ProjectCreateRequest) -> Result<Project, OpenAIError> {
+        self.client.post("/organization/projects", request).await
+    }
+    /// Retrieves a project.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, project_id: String) -> Result<Project, OpenAIError> {
+        self.client
+            .get(format!("/organization/projects/{project_id}").as_str())
+            .await
+    }
+    /// Modifies a project in the organization.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn modify(
+        &self,
+        project_id: String,
+        request: ProjectUpdateRequest,
+    ) -> Result<Project, OpenAIError> {
+        self.client
+            .post(
+                format!("/organization/projects/{project_id}").as_str(),
+                request,
+            )
+            .await
+    }
+    /// Archives a project in the organization. Archived projects cannot be used or updated.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn archive(&self, project_id: String) -> Result<Project, OpenAIError> {
+        self.client
+            .post(
+                format!("/organization/projects/{project_id}/archive").as_str(),
+                (),
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/responses.rs
+++ b/lib/async-openai/src/responses.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::responses::{CreateResponse, Response, ResponseStream},
+    Client,
+};
+/// Given text input or a list of context items, the model will generate a response.
+///
+/// Related guide: [Responses](https://platform.openai.com/docs/api-reference/responses)
+pub struct Responses<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Responses<'c, C> {
+    /// Constructs a new Responses client.
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    /// Creates a model response for the given input.
+    #[crate::byot(
+        T0 = serde::Serialize,
+        R = serde::de::DeserializeOwned
+    )]
+    pub async fn create(&self, request: CreateResponse) -> Result<Response, OpenAIError> {
+        self.client.post("/responses", request).await
+    }
+    /// Creates a model response for the given input with streaming.
+    ///
+    /// Response events will be sent as server-sent events as they become available,
+    #[crate::byot(
+        T0 = serde::Serialize,
+        R = serde::de::DeserializeOwned,
+        stream = "true",
+        where_clause = "R: std::marker::Send + 'static"
+    )]
+    #[allow(unused_mut)]
+    pub async fn create_stream(
+        &self,
+        mut request: CreateResponse,
+    ) -> Result<ResponseStream, OpenAIError> {
+        #[cfg(not(feature = "byot"))]
+        {
+            if matches!(request.stream, Some(false)) {
+                return Err(OpenAIError::InvalidArgument(
+                    "When stream is false, use Responses::create".into(),
+                ));
+            }
+            request.stream = Some(true);
+        }
+        Ok(self.client.post_stream("/responses", request).await)
+    }
+}
--- a/lib/async-openai/src/runs.rs
+++ b/lib/async-openai/src/runs.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    steps::Steps,
+    types::{
+        AssistantEventStream, CreateRunRequest, ListRunsResponse, ModifyRunRequest, RunObject,
+        SubmitToolOutputsRunRequest,
+    },
+    Client,
+};
+/// Represents an execution run on a thread.
+///
+/// Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview)
+pub struct Runs<'c, C: Config> {
+    pub thread_id: String,
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Runs<'c, C> {
+    pub fn new(client: &'c Client<C>, thread_id: &str) -> Self {
+        Self {
+            client,
+            thread_id: thread_id.into(),
+        }
+    }
+    ///  [Steps] API group
+    pub fn steps(&self, run_id: &str) -> Steps<C> {
+        Steps::new(self.client, &self.thread_id, run_id)
+    }
+    /// Create a run.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(&self, request: CreateRunRequest) -> Result<RunObject, OpenAIError> {
+        self.client
+            .post(&format!("/threads/{}/runs", self.thread_id), request)
+            .await
+    }
+    /// Create a run.
+    ///
+    /// byot: You must ensure "stream: true" in serialized `request`
+    #[crate::byot(
+        T0 = serde::Serialize,
+        R = serde::de::DeserializeOwned,
+        stream = "true",
+        where_clause = "R: std::marker::Send + 'static + TryFrom<eventsource_stream::Event, Error = OpenAIError>"
+    )]
+    #[allow(unused_mut)]
+    pub async fn create_stream(
+        &self,
+        mut request: CreateRunRequest,
+    ) -> Result<AssistantEventStream, OpenAIError> {
+        #[cfg(not(feature = "byot"))]
+        {
+            if request.stream.is_some() && !request.stream.unwrap() {
+                return Err(OpenAIError::InvalidArgument(
+                    "When stream is false, use Runs::create".into(),
+                ));
+            }
+            request.stream = Some(true);
+        }
+        Ok(self
+            .client
+            .post_stream_mapped_raw_events(
+                &format!("/threads/{}/runs", self.thread_id),
+                request,
+                TryFrom::try_from,
+            )
+            .await)
+    }
+    /// Retrieves a run.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, run_id: &str) -> Result<RunObject, OpenAIError> {
+        self.client
+            .get(&format!("/threads/{}/runs/{run_id}", self.thread_id))
+            .await
+    }
+    /// Modifies a run.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn update(
+        &self,
+        run_id: &str,
+        request: ModifyRunRequest,
+    ) -> Result<RunObject, OpenAIError> {
+        self.client
+            .post(
+                &format!("/threads/{}/runs/{run_id}", self.thread_id),
+                request,
+            )
+            .await
+    }
+    /// Returns a list of runs belonging to a thread.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ListRunsResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(&format!("/threads/{}/runs", self.thread_id), &query)
+            .await
+    }
+    /// When a run has the status: "requires_action" and required_action.type is submit_tool_outputs, this endpoint can be used to submit the outputs from the tool calls once they're all completed. All outputs must be submitted in a single request.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn submit_tool_outputs(
+        &self,
+        run_id: &str,
+        request: SubmitToolOutputsRunRequest,
+    ) -> Result<RunObject, OpenAIError> {
+        self.client
+            .post(
+                &format!(
+                    "/threads/{}/runs/{run_id}/submit_tool_outputs",
+                    self.thread_id
+                ),
+                request,
+            )
+            .await
+    }
+    /// byot: You must ensure "stream: true" in serialized `request`
+    #[crate::byot(
+        T0 = std::fmt::Display,
+        T1 = serde::Serialize,
+        R = serde::de::DeserializeOwned,
+        stream = "true",
+        where_clause = "R: std::marker::Send + 'static + TryFrom<eventsource_stream::Event, Error = OpenAIError>"
+    )]
+    #[allow(unused_mut)]
+    pub async fn submit_tool_outputs_stream(
+        &self,
+        run_id: &str,
+        mut request: SubmitToolOutputsRunRequest,
+    ) -> Result<AssistantEventStream, OpenAIError> {
+        #[cfg(not(feature = "byot"))]
+        {
+            if request.stream.is_some() && !request.stream.unwrap() {
+                return Err(OpenAIError::InvalidArgument(
+                    "When stream is false, use Runs::submit_tool_outputs".into(),
+                ));
+            }
+            request.stream = Some(true);
+        }
+        Ok(self
+            .client
+            .post_stream_mapped_raw_events(
+                &format!(
+                    "/threads/{}/runs/{run_id}/submit_tool_outputs",
+                    self.thread_id
+                ),
+                request,
+                TryFrom::try_from,
+            )
+            .await)
+    }
+    /// Cancels a run that is `in_progress`
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn cancel(&self, run_id: &str) -> Result<RunObject, OpenAIError> {
+        self.client
+            .post(
+                &format!("/threads/{}/runs/{run_id}/cancel", self.thread_id),
+                (),
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/steps.rs
+++ b/lib/async-openai/src/steps.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use serde::Serialize;
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{ListRunStepsResponse, RunStepObject},
+    Client,
+};
+/// Represents a step in execution of a run.
+pub struct Steps<'c, C: Config> {
+    pub thread_id: String,
+    pub run_id: String,
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Steps<'c, C> {
+    pub fn new(client: &'c Client<C>, thread_id: &str, run_id: &str) -> Self {
+        Self {
+            client,
+            thread_id: thread_id.into(),
+            run_id: run_id.into(),
+        }
+    }
+    /// Retrieves a run step.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, step_id: &str) -> Result<RunStepObject, OpenAIError> {
+        self.client
+            .get(&format!(
+                "/threads/{}/runs/{}/steps/{step_id}",
+                self.thread_id, self.run_id
+            ))
+            .await
+    }
+    /// Returns a list of run steps belonging to a run.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn list<Q>(&self, query: &Q) -> Result<ListRunStepsResponse, OpenAIError>
+    where
+        Q: Serialize + ?Sized,
+    {
+        self.client
+            .get_with_query(
+                &format!("/threads/{}/runs/{}/steps", self.thread_id, self.run_id),
+                &query,
+            )
+            .await
+    }
+}
--- a/lib/async-openai/src/threads.rs
+++ b/lib/async-openai/src/threads.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use crate::{
+    config::Config,
+    error::OpenAIError,
+    types::{
+        AssistantEventStream, CreateThreadAndRunRequest, CreateThreadRequest, DeleteThreadResponse,
+        ModifyThreadRequest, RunObject, ThreadObject,
+    },
+    Client, Messages, Runs,
+};
+/// Create threads that assistants can interact with.
+///
+/// Related guide: [Assistants](https://platform.openai.com/docs/assistants/overview)
+pub struct Threads<'c, C: Config> {
+    client: &'c Client<C>,
+}
+impl<'c, C: Config> Threads<'c, C> {
+    pub fn new(client: &'c Client<C>) -> Self {
+        Self { client }
+    }
+    /// Call [Messages] group API to manage message in [thread_id] thread.
+    pub fn messages(&self, thread_id: &str) -> Messages<C> {
+        Messages::new(self.client, thread_id)
+    }
+    /// Call [Runs] group API to manage runs in [thread_id] thread.
+    pub fn runs(&self, thread_id: &str) -> Runs<C> {
+        Runs::new(self.client, thread_id)
+    }
+    /// Create a thread and run it in one request.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create_and_run(
+        &self,
+        request: CreateThreadAndRunRequest,
+    ) -> Result<RunObject, OpenAIError> {
+        self.client.post("/threads/runs", request).await
+    }
+    /// Create a thread and run it in one request (streaming).
+    ///
+    /// byot: You must ensure "stream: true" in serialized `request`
+    #[crate::byot(
+        T0 = serde::Serialize,
+        R = serde::de::DeserializeOwned,
+        stream = "true",
+        where_clause = "R: std::marker::Send + 'static + TryFrom<eventsource_stream::Event, Error = OpenAIError>"
+    )]
+    #[allow(unused_mut)]
+    pub async fn create_and_run_stream(
+        &self,
+        mut request: CreateThreadAndRunRequest,
+    ) -> Result<AssistantEventStream, OpenAIError> {
+        #[cfg(not(feature = "byot"))]
+        {
+            if request.stream.is_some() && !request.stream.unwrap() {
+                return Err(OpenAIError::InvalidArgument(
+                    "When stream is false, use Threads::create_and_run".into(),
+                ));
+            }
+            request.stream = Some(true);
+        }
+        Ok(self
+            .client
+            .post_stream_mapped_raw_events("/threads/runs", request, TryFrom::try_from)
+            .await)
+    }
+    /// Create a thread.
+    #[crate::byot(T0 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn create(&self, request: CreateThreadRequest) -> Result<ThreadObject, OpenAIError> {
+        self.client.post("/threads", request).await
+    }
+    /// Retrieves a thread.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn retrieve(&self, thread_id: &str) -> Result<ThreadObject, OpenAIError> {
+        self.client.get(&format!("/threads/{thread_id}")).await
+    }
+    /// Modifies a thread.
+    #[crate::byot(T0 = std::fmt::Display, T1 = serde::Serialize, R = serde::de::DeserializeOwned)]
+    pub async fn update(
+        &self,
+        thread_id: &str,
+        request: ModifyThreadRequest,
+    ) -> Result<ThreadObject, OpenAIError> {
+        self.client
+            .post(&format!("/threads/{thread_id}"), request)
+            .await
+    }
+    /// Delete a thread.
+    #[crate::byot(T0 = std::fmt::Display, R = serde::de::DeserializeOwned)]
+    pub async fn delete(&self, thread_id: &str) -> Result<DeleteThreadResponse, OpenAIError> {
+        self.client.delete(&format!("/threads/{thread_id}")).await
+    }
+}
--- a/lib/async-openai/src/traits.rs
+++ b/lib/async-openai/src/traits.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+pub trait AsyncTryFrom<T>: Sized {
+    /// The type returned in the event of a conversion error.
+    type Error;
+    /// Performs the conversion.
+    fn try_from(value: T) -> impl std::future::Future<Output = Result<Self, Self::Error>> + Send;
+}
--- a/lib/async-openai/src/types/assistant.rs
+++ b/lib/async-openai/src/types/assistant.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use std::collections::HashMap;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use crate::error::OpenAIError;
+use super::{FunctionName, FunctionObject, ResponseFormat};
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub struct AssistantToolCodeInterpreterResources {
+    ///A list of [file](https://platform.openai.com/docs/api-reference/files) IDs made available to the `code_interpreter`` tool. There can be a maximum of 20 files associated with the tool.
+    pub file_ids: Vec<String>, // maxItems: 20
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub struct AssistantToolFileSearchResources {
+    /// The ID of the [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
+    pub vector_store_ids: Vec<String>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct AssistantToolResources {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_search: Option<AssistantToolFileSearchResources>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct CreateAssistantToolResources {
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub code_interpreter: Option<AssistantToolCodeInterpreterResources>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_search: Option<CreateAssistantToolFileSearchResources>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub struct CreateAssistantToolFileSearchResources {
+    ///  The [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) attached to this assistant. There can be a maximum of 1 vector store attached to the assistant.
+    pub vector_store_ids: Option<Vec<String>>,
+    /// A helper to create a [vector store](https://platform.openai.com/docs/api-reference/vector-stores/object) with file_ids and attach it to this assistant. There can be a maximum of 1 vector store attached to the assistant.
+    pub vector_stores: Option<Vec<AssistantVectorStore>>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub struct AssistantVectorStore {
+    /// A list of [file](https://platform.openai.com/docs/api-reference/files) IDs to add to the vector store. There can be a maximum of 10000 files in a vector store.
+    pub file_ids: Vec<String>,
+    /// The chunking strategy used to chunk the file(s). If not set, will use the `auto` strategy.
+    pub chunking_strategy: Option<AssistantVectorStoreChunkingStrategy>,
+    /// Set of 16 key-value pairs that can be attached to a vector store. This can be useful for storing additional information about the vector store in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
+    pub metadata: Option<HashMap<String, String>>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+#[serde(tag = "type")]
+pub enum AssistantVectorStoreChunkingStrategy {
+    /// The default strategy. This strategy currently uses a `max_chunk_size_tokens` of `800` and `chunk_overlap_tokens` of `400`.
+    #[default]
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "static")]
+    Static { r#static: StaticChunkingStrategy },
+}
+/// Static Chunking Strategy
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub struct StaticChunkingStrategy {
+    /// The maximum number of tokens in each chunk. The default value is `800`. The minimum value is `100` and the maximum value is `4096`.
+    pub max_chunk_size_tokens: u16,
+    /// The number of tokens that overlap between chunks. The default value is `400`.
+    ///
+    /// Note that the overlap must not exceed half of `max_chunk_size_tokens`.
+    pub chunk_overlap_tokens: u16,
+}
+/// Represents an `assistant` that can call the model and use tools.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct AssistantObject {
+    /// The identifier, which can be referenced in API endpoints.
+    pub id: String,
+    /// The object type, which is always `assistant`.
+    pub object: String,
+    /// The Unix timestamp (in seconds) for when the assistant was created.
+    pub created_at: i32,
+    /// The name of the assistant. The maximum length is 256 characters.
+    pub name: Option<String>,
+    /// The description of the assistant. The maximum length is 512 characters.
+    pub description: Option<String>,
+    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models) for descriptions of them.
+    pub model: String,
+    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
+    pub instructions: Option<String>,
+    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
+    #[serde(default)]
+    pub tools: Vec<AssistantTools>,
+    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    pub tool_resources: Option<AssistantToolResources>,
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
+    pub metadata: Option<HashMap<String, String>>,
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+    pub temperature: Option<f32>,
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    /// We generally recommend altering this or temperature but not both.
+    pub top_p: Option<f32>,
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
+/// Specifies the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4), and all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
+///
+/// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
+///
+/// Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
+///
+/// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+pub enum AssistantsApiResponseFormatOption {
+    #[default]
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(untagged)]
+    Format(ResponseFormat),
+}
+/// Retrieval tool
+#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
+pub struct AssistantToolsFileSearch {
+    /// Overrides for the file search tool.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub file_search: Option<AssistantToolsFileSearchOverrides>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct AssistantToolsFileSearchOverrides {
+    ///  The maximum number of results the file search tool should output. The default is 20 for gpt-4* models and 5 for gpt-3.5-turbo. This number should be between 1 and 50 inclusive.
+    ///
+    //// Note that the file search tool may output fewer than `max_num_results` results. See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search/customizing-file-search-settings) for more information.
+    pub max_num_results: Option<u8>,
+    pub ranking_options: Option<FileSearchRankingOptions>,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub enum FileSearchRanker {
+    #[serde(rename = "auto")]
+    Auto,
+    #[serde(rename = "default_2024_08_21")]
+    Default2024_08_21,
+}
+/// The ranking options for the file search. If not specified, the file search tool will use the `auto` ranker and a score_threshold of 0.
+///
+/// See the [file search tool documentation](https://platform.openai.com/docs/assistants/tools/file-search#customizing-file-search-settings) for more information.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct FileSearchRankingOptions {
+    /// The ranker to use for the file search. If not specified will use the `auto` ranker.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub ranker: Option<FileSearchRanker>,
+    /// The score threshold for the file search. All values must be a floating point number between 0 and 1.
+    pub score_threshold: f32,
+}
+/// Function tool
+#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
+pub struct AssistantToolsFunction {
+    pub function: FunctionObject,
+}
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum AssistantTools {
+    CodeInterpreter,
+    FileSearch(AssistantToolsFileSearch),
+    Function(AssistantToolsFunction),
+}
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "CreateAssistantRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateAssistantRequest {
+    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
+    pub model: String,
+    /// The name of the assistant. The maximum length is 256 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    /// The description of the assistant. The maximum length is 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<AssistantTools>>,
+    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_resources: Option<CreateAssistantToolResources>,
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    ///
+    /// We generally recommend altering this or temperature but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
+#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[builder(name = "ModifyAssistantRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ModifyAssistantRequest {
+    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub model: Option<String>,
+    /// The name of the assistant. The maximum length is 256 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub name: Option<String>,
+    /// The description of the assistant. The maximum length is 512 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub description: Option<String>,
+    /// The system instructions that the assistant uses. The maximum length is 256,000 characters.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    /// A list of tool enabled on the assistant. There can be a maximum of 128 tools per assistant. Tools can be of types `code_interpreter`, `file_search`, or `function`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tools: Option<Vec<AssistantTools>>,
+    /// A set of resources that are used by the assistant's tools. The resources are specific to the type of tool. For example, the `code_interpreter` tool requires a list of file IDs, while the `file_search` tool requires a list of vector store IDs.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub tool_resources: Option<AssistantToolResources>,
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maxium of 512 characters long.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub metadata: Option<HashMap<String, String>>,
+    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub temperature: Option<f32>,
+    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
+    ///
+    /// We generally recommend altering this or temperature but not both.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub top_p: Option<f32>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<AssistantsApiResponseFormatOption>,
+}
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct DeleteAssistantResponse {
+    pub id: String,
+    pub deleted: bool,
+    pub object: String,
+}
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct ListAssistantsResponse {
+    pub object: String,
+    pub data: Vec<AssistantObject>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+}
+/// Controls which (if any) tool is called by the model.
+/// `none` means the model will not call any tools and instead generates a message.
+/// `auto` is the default value and means the model can pick between generating a message or calling one or more tools.
+/// `required` means the model must call one or more tools before responding to the user.
+/// Specifying a particular tool like `{"type": "file_search"}` or `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum AssistantsApiToolChoiceOption {
+    #[default]
+    None,
+    Auto,
+    Required,
+    #[serde(untagged)]
+    Named(AssistantsNamedToolChoice),
+}
+/// Specifies a tool the model should use. Use to force the model to call a specific tool.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct AssistantsNamedToolChoice {
+    /// The type of the tool. If type is `function`, the function name must be set
+    pub r#type: AssistantToolType,
+    pub function: Option<FunctionName>,
+}
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum AssistantToolType {
+    #[default]
+    Function,
+    CodeInterpreter,
+    FileSearch,
+}
--- a/lib/async-openai/src/types/assistant_impls.rs
+++ b/lib/async-openai/src/types/assistant_impls.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use super::{
+    AssistantToolCodeInterpreterResources, AssistantToolFileSearchResources,
+    AssistantToolResources, AssistantTools, AssistantToolsFileSearch, AssistantToolsFunction,
+    CreateAssistantToolFileSearchResources, CreateAssistantToolResources, FunctionObject,
+};
+impl From<AssistantToolsFileSearch> for AssistantTools {
+    fn from(value: AssistantToolsFileSearch) -> Self {
+        Self::FileSearch(value)
+    }
+}
+impl From<AssistantToolsFunction> for AssistantTools {
+    fn from(value: AssistantToolsFunction) -> Self {
+        Self::Function(value)
+    }
+}
+impl From<FunctionObject> for AssistantToolsFunction {
+    fn from(value: FunctionObject) -> Self {
+        Self { function: value }
+    }
+}
+impl From<FunctionObject> for AssistantTools {
+    fn from(value: FunctionObject) -> Self {
+        Self::Function(value.into())
+    }
+}
+impl From<CreateAssistantToolFileSearchResources> for CreateAssistantToolResources {
+    fn from(value: CreateAssistantToolFileSearchResources) -> Self {
+        Self {
+            code_interpreter: None,
+            file_search: Some(value),
+        }
+    }
+}
+impl From<AssistantToolCodeInterpreterResources> for CreateAssistantToolResources {
+    fn from(value: AssistantToolCodeInterpreterResources) -> Self {
+        Self {
+            code_interpreter: Some(value),
+            file_search: None,
+        }
+    }
+}
+impl From<AssistantToolCodeInterpreterResources> for AssistantToolResources {
+    fn from(value: AssistantToolCodeInterpreterResources) -> Self {
+        Self {
+            code_interpreter: Some(value),
+            file_search: None,
+        }
+    }
+}
+impl From<AssistantToolFileSearchResources> for AssistantToolResources {
+    fn from(value: AssistantToolFileSearchResources) -> Self {
+        Self {
+            code_interpreter: None,
+            file_search: Some(value),
+        }
+    }
+}
--- a/lib/async-openai/src/types/assistant_stream.rs
+++ b/lib/async-openai/src/types/assistant_stream.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use std::pin::Pin;
+use futures::Stream;
+use serde::Deserialize;
+use crate::error::{map_deserialization_error, ApiError, OpenAIError};
+use super::{
+    MessageDeltaObject, MessageObject, RunObject, RunStepDeltaObject, RunStepObject, ThreadObject,
+};
+/// Represents an event emitted when streaming a Run.
+///
+/// Each event in a server-sent events stream has an `event` and `data` property:
+///
+/// ```text
+/// event: thread.created
+/// data: {"id": "thread_123", "object": "thread", ...}
+/// ```
+///
+/// We emit events whenever a new object is created, transitions to a new state, or is being
+/// streamed in parts (deltas). For example, we emit `thread.run.created` when a new run
+/// is created, `thread.run.completed` when a run completes, and so on. When an Assistant chooses
+/// to create a message during a run, we emit a `thread.message.created event`, a
+/// `thread.message.in_progress` event, many `thread.message.delta` events, and finally a
+/// `thread.message.completed` event.
+///
+/// We may add additional events over time, so we recommend handling unknown events gracefully
+/// in your code. See the [Assistants API quickstart](https://platform.openai.com/docs/assistants/overview) to learn how to
+/// integrate the Assistants API with streaming.
+#[derive(Debug, Deserialize, Clone)]
+#[serde(tag = "event", content = "data")]
+#[non_exhaustive]
+pub enum AssistantStreamEvent {
+    /// Occurs when a new [thread](https://platform.openai.com/docs/api-reference/threads/object) is created.
+    #[serde(rename = "thread.created")]
+    ThreadCreated(ThreadObject),
+    /// Occurs when a new [run](https://platform.openai.com/docs/api-reference/runs/object) is created.
+    #[serde(rename = "thread.run.created")]
+    ThreadRunCreated(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `queued` status.
+    #[serde(rename = "thread.run.queued")]
+    ThreadRunQueued(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to an `in_progress` status.
+    #[serde(rename = "thread.run.in_progress")]
+    ThreadRunInProgress(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `requires_action` status.
+    #[serde(rename = "thread.run.requires_action")]
+    ThreadRunRequiresAction(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is completed.
+    #[serde(rename = "thread.run.completed")]
+    ThreadRunCompleted(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) ends with status `incomplete`.
+    #[serde(rename = "thread.run.incomplete")]
+    ThreadRunIncomplete(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) fails.
+    #[serde(rename = "thread.run.failed")]
+    ThreadRunFailed(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) moves to a `cancelling` status.
+    #[serde(rename = "thread.run.cancelling")]
+    ThreadRunCancelling(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) is cancelled.
+    #[serde(rename = "thread.run.cancelled")]
+    ThreadRunCancelled(RunObject),
+    /// Occurs when a [run](https://platform.openai.com/docs/api-reference/runs/object) expires.
+    #[serde(rename = "thread.run.expired")]
+    ThreadRunExpired(RunObject),
+    /// Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is created.
+    #[serde(rename = "thread.run.step.created")]
+    ThreadRunStepCreated(RunStepObject),
+    /// Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) moves to an `in_progress` state.
+    #[serde(rename = "thread.run.step.in_progress")]
+    ThreadRunStepInProgress(RunStepObject),
+    /// Occurs when parts of a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) are being streamed.
+    #[serde(rename = "thread.run.step.delta")]
+    ThreadRunStepDelta(RunStepDeltaObject),
+    ///  Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is completed.
+    #[serde(rename = "thread.run.step.completed")]
+    ThreadRunStepCompleted(RunStepObject),
+    /// Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) fails.
+    #[serde(rename = "thread.run.step.failed")]
+    ThreadRunStepFailed(RunStepObject),
+    /// Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) is cancelled.
+    #[serde(rename = "thread.run.step.cancelled")]
+    ThreadRunStepCancelled(RunStepObject),
+    /// Occurs when a [run step](https://platform.openai.com/docs/api-reference/run-steps/step-object) expires.
+    #[serde(rename = "thread.run.step.expired")]
+    ThreadRunStepExpired(RunStepObject),
+    /// Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is created.
+    #[serde(rename = "thread.message.created")]
+    ThreadMessageCreated(MessageObject),
+    /// Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) moves to an `in_progress` state.
+    #[serde(rename = "thread.message.in_progress")]
+    ThreadMessageInProgress(MessageObject),
+    /// Occurs when parts of a [Message](https://platform.openai.com/docs/api-reference/messages/object) are being streamed.
+    #[serde(rename = "thread.message.delta")]
+    ThreadMessageDelta(MessageDeltaObject),
+    /// Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) is completed.
+    #[serde(rename = "thread.message.completed")]
+    ThreadMessageCompleted(MessageObject),
+    /// Occurs when a [message](https://platform.openai.com/docs/api-reference/messages/object) ends before it is completed.
+    #[serde(rename = "thread.message.incomplete")]
+    ThreadMessageIncomplete(MessageObject),
+    /// Occurs when an [error](https://platform.openai.com/docs/guides/error-codes/api-errors) occurs. This can happen due to an internal server error or a timeout.
+    #[serde(rename = "error")]
+    ErrorEvent(ApiError),
+    /// Occurs when a stream ends.
+    #[serde(rename = "done")]
+    Done(String),
+}
+pub type AssistantEventStream =
+    Pin<Box<dyn Stream<Item = Result<AssistantStreamEvent, OpenAIError>> + Send>>;
+impl TryFrom<eventsource_stream::Event> for AssistantStreamEvent {
+    type Error = OpenAIError;
+    fn try_from(value: eventsource_stream::Event) -> Result<Self, Self::Error> {
+        match value.event.as_str() {
+            "thread.created" => serde_json::from_str::<ThreadObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadCreated),
+            "thread.run.created" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunCreated),
+            "thread.run.queued" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunQueued),
+            "thread.run.in_progress" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunInProgress),
+            "thread.run.requires_action" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunRequiresAction),
+            "thread.run.completed" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunCompleted),
+            "thread.run.incomplete" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunIncomplete),
+            "thread.run.failed" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunFailed),
+            "thread.run.cancelling" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunCancelling),
+            "thread.run.cancelled" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunCancelled),
+            "thread.run.expired" => serde_json::from_str::<RunObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunExpired),
+            "thread.run.step.created" => serde_json::from_str::<RunStepObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunStepCreated),
+            "thread.run.step.in_progress" => {
+                serde_json::from_str::<RunStepObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadRunStepInProgress)
+            }
+            "thread.run.step.delta" => {
+                serde_json::from_str::<RunStepDeltaObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadRunStepDelta)
+            }
+            "thread.run.step.completed" => {
+                serde_json::from_str::<RunStepObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadRunStepCompleted)
+            }
+            "thread.run.step.failed" => serde_json::from_str::<RunStepObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunStepFailed),
+            "thread.run.step.cancelled" => {
+                serde_json::from_str::<RunStepObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadRunStepCancelled)
+            }
+            "thread.run.step.expired" => serde_json::from_str::<RunStepObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadRunStepExpired),
+            "thread.message.created" => serde_json::from_str::<MessageObject>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ThreadMessageCreated),
+            "thread.message.in_progress" => {
+                serde_json::from_str::<MessageObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadMessageInProgress)
+            }
+            "thread.message.delta" => {
+                serde_json::from_str::<MessageDeltaObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadMessageDelta)
+            }
+            "thread.message.completed" => {
+                serde_json::from_str::<MessageObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadMessageCompleted)
+            }
+            "thread.message.incomplete" => {
+                serde_json::from_str::<MessageObject>(value.data.as_str())
+                    .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                    .map(AssistantStreamEvent::ThreadMessageIncomplete)
+            }
+            "error" => serde_json::from_str::<ApiError>(value.data.as_str())
+                .map_err(|e| map_deserialization_error(e, value.data.as_bytes()))
+                .map(AssistantStreamEvent::ErrorEvent),
+            "done" => Ok(AssistantStreamEvent::Done(value.data)),
+            _ => Err(OpenAIError::StreamError(
+                "Unrecognized event: {value:?#}".into(),
+            )),
+        }
+    }
+}
--- a/lib/async-openai/src/types/audio.rs
+++ b/lib/async-openai/src/types/audio.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use bytes::Bytes;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use super::InputSource;
+use crate::error::OpenAIError;
+#[derive(Debug, Default, Clone, PartialEq)]
+pub struct AudioInput {
+    pub source: InputSource,
+}
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
+#[serde(rename_all = "snake_case")]
+pub enum AudioResponseFormat {
+    #[default]
+    Json,
+    Text,
+    Srt,
+    VerboseJson,
+    Vtt,
+}
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum SpeechResponseFormat {
+    #[default]
+    Mp3,
+    Opus,
+    Aac,
+    Flac,
+    Pcm,
+    Wav,
+}
+#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+#[non_exhaustive]
+pub enum Voice {
+    #[default]
+    Alloy,
+    Ash,
+    Ballad,
+    Coral,
+    Echo,
+    Fable,
+    Onyx,
+    Nova,
+    Sage,
+    Shimmer,
+}
+#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)]
+pub enum SpeechModel {
+    #[default]
+    #[serde(rename = "tts-1")]
+    Tts1,
+    #[serde(rename = "tts-1-hd")]
+    Tts1Hd,
+    #[serde(untagged)]
+    Other(String),
+}
+#[derive(Debug, Default, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum TimestampGranularity {
+    Word,
+    #[default]
+    Segment,
+}
+#[derive(Clone, Default, Debug, Builder, PartialEq)]
+#[builder(name = "CreateTranscriptionRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateTranscriptionRequest {
+    /// The audio file to transcribe, in one of these formats: mp3, mp4, mpeg, mpga, m4a, wav, or webm.
+    pub file: AudioInput,
+    /// ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is currently available.
+    pub model: String,
+    /// An optional text to guide the model's style or continue a previous audio segment. The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should match the audio language.
+    pub prompt: Option<String>,
+    /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
+    pub response_format: Option<AudioResponseFormat>,
+    /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.
+    pub temperature: Option<f32>, // default: 0
+    /// The language of the input audio. Supplying the input language in [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will improve accuracy and latency.
+    pub language: Option<String>,
+    /// The timestamp granularities to populate for this transcription. `response_format` must be set `verbose_json` to use timestamp granularities. Either or both of these options are supported: `word`, or `segment`. Note: There is no additional latency for segment timestamps, but generating word timestamps incurs additional latency.
+    pub timestamp_granularities: Option<Vec<TimestampGranularity>>,
+}
+/// Represents a transcription response returned by model, based on the provided
+/// input.
+#[derive(Debug, Deserialize, Clone, Serialize)]
+pub struct CreateTranscriptionResponseJson {
+    /// The transcribed text.
+    pub text: String,
+}
+/// Represents a verbose json transcription response returned by model, based on
+/// the provided input.
+#[derive(Debug, Deserialize, Clone, Serialize)]
+pub struct CreateTranscriptionResponseVerboseJson {
+    /// The language of the input audio.
+    pub language: String,
+    /// The duration of the input audio.
+    pub duration: f32,
+    /// The transcribed text.
+    pub text: String,
+    /// Extracted words and their corresponding timestamps.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub words: Option<Vec<TranscriptionWord>>,
+    /// Segments of the transcribed text and their corresponding details.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub segments: Option<Vec<TranscriptionSegment>>,
+}
+#[derive(Debug, Deserialize, Clone, Serialize)]
+pub struct TranscriptionWord {
+    /// The text content of the word.
+    pub word: String,
+    /// Start time of the word in seconds.
+    pub start: f32,
+    /// End time of the word in seconds.
+    pub end: f32,
+}
+#[derive(Debug, Deserialize, Clone, Serialize)]
+pub struct TranscriptionSegment {
+    /// Unique identifier of the segment.
+    pub id: i32,
+    // Seek offset of the segment.
+    pub seek: i32,
+    /// Start time of the segment in seconds.
+    pub start: f32,
+    /// End time of the segment in seconds.
+    pub end: f32,
+    /// Text content of the segment.
+    pub text: String,
+    /// Array of token IDs for the text content.
+    pub tokens: Vec<i32>,
+    /// Temperature parameter used for generating the segment.
+    pub temperature: f32,
+    /// Average logprob of the segment. If the value is lower than -1, consider
+    /// the logprobs failed.
+    pub avg_logprob: f32,
+    /// Compression ratio of the segment. If the value is greater than 2.4,
+    /// consider the compression failed.
+    pub compression_ratio: f32,
+    /// Probability of no speech in the segment. If the value is higher than 1.0
+    /// and the `avg_logprob` is below -1, consider this segment silent.
+    pub no_speech_prob: f32,
+}
+#[derive(Clone, Default, Debug, Builder, PartialEq, Serialize, Deserialize)]
+#[builder(name = "CreateSpeechRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateSpeechRequest {
+    /// The text to generate audio for. The maximum length is 4096 characters.
+    pub input: String,
+    /// One of the available [TTS models](https://platform.openai.com/docs/models/tts): `tts-1` or `tts-1-hd`
+    pub model: SpeechModel,
+    /// The voice to use when generating the audio. Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer` and `verse`.
+    /// Previews of the voices are available in the [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
+    pub voice: Voice,
+    /// Control the voice of your generated audio with additional instructions.
+    /// Does not work with `tts-1` or `tts-1-hd`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub instructions: Option<String>,
+    /// The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub response_format: Option<SpeechResponseFormat>,
+    /// The speed of the generated audio. Select a value from 0.25 to 4.0. 1.0 is the default.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub speed: Option<f32>, // default: 1.0
+}
+#[derive(Clone, Default, Debug, Builder, PartialEq)]
+#[builder(name = "CreateTranslationRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct CreateTranslationRequest {
+    /// The audio file object (not file name) translate, in one of these
+    ///formats: flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
+    pub file: AudioInput,
+    /// ID of the model to use. Only `whisper-1` (which is powered by our open source Whisper V2 model) is currently available.
+    pub model: String,
+    /// An optional text to guide the model's style or continue a previous audio segment. The [prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting) should be in English.
+    pub prompt: Option<String>,
+    /// The format of the transcript output, in one of these options: json, text, srt, verbose_json, or vtt.
+    pub response_format: Option<AudioResponseFormat>,
+    /// The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic. If set to 0, the model will use [log probability](https://en.wikipedia.org/wiki/Log_probability) to automatically increase the temperature until certain thresholds are hit.
+    pub temperature: Option<f32>, // default: 0
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct CreateTranslationResponseJson {
+    pub text: String,
+}
+#[derive(Debug, Deserialize, Clone, Serialize)]
+pub struct CreateTranslationResponseVerboseJson {
+    /// The language of the output translation (always `english`).
+    pub language: String,
+    /// The duration of the input audio.
+    pub duration: String,
+    /// The translated text.
+    pub text: String,
+    /// Segments of the translated text and their corresponding details.
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub segments: Option<Vec<TranscriptionSegment>>,
+}
+#[derive(Debug, Clone)]
+pub struct CreateSpeechResponse {
+    pub bytes: Bytes,
+}
--- a/lib/async-openai/src/types/audit_log.rs
+++ b/lib/async-openai/src/types/audit_log.rs
--- a/lib/async-openai/src/types/batch.rs
+++ b/lib/async-openai/src/types/batch.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
+// Original Copyright (c) 2022 Himanshu Neema
+// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+//
+// Modifications Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
+// Licensed under Apache 2.0
+use std::collections::HashMap;
+use derive_builder::Builder;
+use serde::{Deserialize, Serialize};
+use crate::error::OpenAIError;
+#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
+#[builder(name = "BatchRequestArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option), default)]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct BatchRequest {
+    /// The ID of an uploaded file that contains requests for the new batch.
+    ///
+    /// See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file.
+    ///
+    /// Your input file must be formatted as a [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input), and must be uploaded with the purpose `batch`. The file can contain up to 50,000 requests, and can be up to 100 MB in size.
+    pub input_file_id: String,
+    /// The endpoint to be used for all requests in the batch. Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported. Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000 embedding inputs across all requests in the batch.
+    pub endpoint: BatchEndpoint,
+    /// The time frame within which the batch should be processed. Currently only `24h` is supported.
+    pub completion_window: BatchCompletionWindow,
+    /// Optional custom metadata for the batch.
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Default)]
+pub enum BatchEndpoint {
+    #[default]
+    #[serde(rename = "/v1/chat/completions")]
+    V1ChatCompletions,
+    #[serde(rename = "/v1/embeddings")]
+    V1Embeddings,
+    #[serde(rename = "/v1/completions")]
+    V1Completions,
+}
+#[derive(Debug, Clone, PartialEq, Serialize, Default, Deserialize)]
+pub enum BatchCompletionWindow {
+    #[default]
+    #[serde(rename = "24h")]
+    W24H,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct Batch {
+    pub id: String,
+    /// The object type, which is always `batch`.
+    pub object: String,
+    /// The OpenAI API endpoint used by the batch.
+    pub endpoint: String,
+    pub errors: Option<BatchErrors>,
+    /// The ID of the input file for the batch.
+    pub input_file_id: String,
+    /// The time frame within which the batch should be processed.
+    pub completion_window: String,
+    /// The current status of the batch.
+    pub status: BatchStatus,
+    /// The ID of the file containing the outputs of successfully executed requests.
+    pub output_file_id: Option<String>,
+    /// The ID of the file containing the outputs of requests with errors.
+    pub error_file_id: Option<String>,
+    /// The Unix timestamp (in seconds) for when the batch was created.
+    pub created_at: u32,
+    /// The Unix timestamp (in seconds) for when the batch started processing.
+    pub in_progress_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch will expire.
+    pub expires_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch started finalizing.
+    pub finalizing_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch was completed.
+    pub completed_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch failed.
+    pub failed_at: Option<u32>,
+    /// he Unix timestamp (in seconds) for when the batch expired.
+    pub expired_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch started cancelling.
+    pub cancelling_at: Option<u32>,
+    /// The Unix timestamp (in seconds) for when the batch was cancelled.
+    pub cancelled_at: Option<u32>,
+    /// The request counts for different statuses within the batch.
+    pub request_counts: Option<BatchRequestCounts>,
+    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
+    pub metadata: Option<HashMap<String, serde_json::Value>>,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchErrors {
+    /// The object type, which is always `list`.
+    pub object: String,
+    pub data: Vec<BatchError>,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchError {
+    /// An error code identifying the error type.
+    pub code: String,
+    /// A human-readable message providing more details about the error.
+    pub message: String,
+    /// The name of the parameter that caused the error, if applicable.
+    pub param: Option<String>,
+    /// The line number of the input file where the error occurred, if applicable.
+    pub line: Option<u32>,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "snake_case")]
+pub enum BatchStatus {
+    Validating,
+    Failed,
+    InProgress,
+    Finalizing,
+    Completed,
+    Expired,
+    Cancelling,
+    Cancelled,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchRequestCounts {
+    /// Total number of requests in the batch.
+    pub total: u32,
+    /// Number of requests that have been completed successfully.
+    pub completed: u32,
+    /// Number of requests that have failed.
+    pub failed: u32,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct ListBatchesResponse {
+    pub data: Vec<Batch>,
+    pub first_id: Option<String>,
+    pub last_id: Option<String>,
+    pub has_more: bool,
+    pub object: String,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[serde(rename_all = "UPPERCASE")]
+pub enum BatchRequestInputMethod {
+    POST,
+}
+/// The per-line object of the batch input file
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchRequestInput {
+    /// A developer-provided per-request id that will be used to match outputs to inputs. Must be unique for each request in a batch.
+    pub custom_id: String,
+    /// The HTTP method to be used for the request. Currently only `POST` is supported.
+    pub method: BatchRequestInputMethod,
+    /// The OpenAI API relative URL to be used for the request. Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
+    pub url: BatchEndpoint,
+    pub body: Option<serde_json::Value>,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchRequestOutputResponse {
+    /// The HTTP status code of the response
+    pub status_code: u16,
+    /// An unique identifier for the OpenAI API request. Please include this request ID when contacting support.
+    pub request_id: String,
+    /// The JSON body of the response
+    pub body: serde_json::Value,
+}
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchRequestOutputError {
+    /// A machine-readable error code.
+    pub code: String,
+    /// A human-readable error message.
+    pub message: String,
+}
+/// The per-line object of the batch output and error files
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+pub struct BatchRequestOutput {
+    pub id: String,
+    /// A developer-provided per-request id that will be used to match outputs to inputs.
+    pub custom_id: String,
+    pub response: Option<BatchRequestOutputResponse>,
+    ///  For requests that failed with a non-HTTP error, this will contain more information on the cause of the failure.
+    pub error: Option<BatchRequestOutputError>,
+}