refactor(3/3): switch dynamo-protocols to upstream async-openai types (#7625)

Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>

refactor(3/3): switch dynamo-protocols to upstream async-openai types (#7625)
Co-authored-by: Dmitry Tokarev <dtokarev@nvidia.com>
fd5cc288 · ishandhanani · GitHub · d517fb80 · d517fb80 · d517fb80
Unverified Commit fd5cc288 authored Apr 08, 2026 by ishandhanani Committed by GitHub Apr 08, 2026
20 changed files
--- a/lib/protocols/src/types/audit_log.rs
+++ b/lib/protocols/src/types/audit_log.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use serde::{Deserialize, Serialize};
-
-/// The event type.
-#[derive(Debug, Serialize, Deserialize)]
-pub enum AuditLogEventType {
-    #[serde(rename = "api_key.created")]
-    ApiKeyCreated,
-    #[serde(rename = "api_key.updated")]
-    ApiKeyUpdated,
-    #[serde(rename = "api_key.deleted")]
-    ApiKeyDeleted,
-    #[serde(rename = "invite.sent")]
-    InviteSent,
-    #[serde(rename = "invite.accepted")]
-    InviteAccepted,
-    #[serde(rename = "invite.deleted")]
-    InviteDeleted,
-    #[serde(rename = "login.succeeded")]
-    LoginSucceeded,
-    #[serde(rename = "login.failed")]
-    LoginFailed,
-    #[serde(rename = "logout.succeeded")]
-    LogoutSucceeded,
-    #[serde(rename = "logout.failed")]
-    LogoutFailed,
-    #[serde(rename = "organization.updated")]
-    OrganizationUpdated,
-    #[serde(rename = "project.created")]
-    ProjectCreated,
-    #[serde(rename = "project.updated")]
-    ProjectUpdated,
-    #[serde(rename = "project.archived")]
-    ProjectArchived,
-    #[serde(rename = "service_account.created")]
-    ServiceAccountCreated,
-    #[serde(rename = "service_account.updated")]
-    ServiceAccountUpdated,
-    #[serde(rename = "service_account.deleted")]
-    ServiceAccountDeleted,
-    #[serde(rename = "user.added")]
-    UserAdded,
-    #[serde(rename = "user.updated")]
-    UserUpdated,
-    #[serde(rename = "user.deleted")]
-    UserDeleted,
-}
-
-/// Represents a list of audit logs.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ListAuditLogsResponse {
-    /// The object type, which is always `list`.
-    pub object: String,
-    /// A list of `AuditLog` objects.
-    pub data: Vec<AuditLog>,
-    /// The first `audit_log_id` in the retrieved `list`.
-    pub first_id: String,
-    /// The last `audit_log_id` in the retrieved `list`.
-    pub last_id: String,
-    /// The `has_more` property is used for pagination to indicate there are additional results.
-    pub has_more: bool,
-}
-
-/// The project that the action was scoped to. Absent for actions not scoped to projects.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProject {
-    /// The project ID.
-    pub id: String,
-    /// The project title.
-    pub name: String,
-}
-
-/// The actor who performed the audit logged action.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogActor {
-    /// The type of actor. Is either `session` or `api_key`.
-    pub r#type: String,
-    /// The session in which the audit logged action was performed.
-    pub session: Option<AuditLogActorSession>,
-    /// The API Key used to perform the audit logged action.
-    pub api_key: Option<AuditLogActorApiKey>,
-}
-
-/// The session in which the audit logged action was performed.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogActorSession {
-    /// The user who performed the audit logged action.
-    pub user: AuditLogActorUser,
-    /// The IP address from which the action was performed.
-    pub ip_address: String,
-}
-
-/// The API Key used to perform the audit logged action.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogActorApiKey {
-    /// The tracking id of the API key.
-    pub id: String,
-    /// The type of API key. Can be either `user` or `service_account`.
-    pub r#type: AuditLogActorApiKeyType,
-    /// The user who performed the audit logged action, if applicable.
-    pub user: Option<AuditLogActorUser>,
-    /// The service account that performed the audit logged action, if applicable.
-    pub service_account: Option<AuditLogActorServiceAccount>,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename_all = "snake_case")]
-pub enum AuditLogActorApiKeyType {
-    User,
-    ServiceAccount,
-}
-
-/// The user who performed the audit logged action.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogActorUser {
-    /// The user id.
-    pub id: String,
-    /// The user email.
-    pub email: String,
-}
-
-/// The service account that performed the audit logged action.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogActorServiceAccount {
-    /// The service account id.
-    pub id: String,
-}
-
-/// A log of a user action or configuration change within this organization.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLog {
-    /// The ID of this log.
-    pub id: String,
-    /// The event type.
-    pub r#type: AuditLogEventType,
-    /// The Unix timestamp (in seconds) of the event.
-    pub effective_at: u32,
-    /// The project that the action was scoped to. Absent for actions not scoped to projects.
-    pub project: Option<AuditLogProject>,
-    /// The actor who performed the audit logged action.
-    pub actor: AuditLogActor,
-    /// The details for events with the type `api_key.created`.
-    #[serde(rename = "api_key.created")]
-    pub api_key_created: Option<AuditLogApiKeyCreated>,
-    /// The details for events with the type `api_key.updated`.
-    #[serde(rename = "api_key.updated")]
-    pub api_key_updated: Option<AuditLogApiKeyUpdated>,
-    /// The details for events with the type `api_key.deleted`.
-    #[serde(rename = "api_key.deleted")]
-    pub api_key_deleted: Option<AuditLogApiKeyDeleted>,
-    /// The details for events with the type `invite.sent`.
-    #[serde(rename = "invite.sent")]
-    pub invite_sent: Option<AuditLogInviteSent>,
-    /// The details for events with the type `invite.accepted`.
-    #[serde(rename = "invite.accepted")]
-    pub invite_accepted: Option<AuditLogInviteAccepted>,
-    /// The details for events with the type `invite.deleted`.
-    #[serde(rename = "invite.deleted")]
-    pub invite_deleted: Option<AuditLogInviteDeleted>,
-    /// The details for events with the type `login.failed`.
-    #[serde(rename = "login.failed")]
-    pub login_failed: Option<AuditLogLoginFailed>,
-    /// The details for events with the type `logout.failed`.
-    #[serde(rename = "logout.failed")]
-    pub logout_failed: Option<AuditLogLogoutFailed>,
-    /// The details for events with the type `organization.updated`.
-    #[serde(rename = "organization.updated")]
-    pub organization_updated: Option<AuditLogOrganizationUpdated>,
-    /// The details for events with the type `project.created`.
-    #[serde(rename = "project.created")]
-    pub project_created: Option<AuditLogProjectCreated>,
-    /// The details for events with the type `project.updated`.
-    #[serde(rename = "project.updated")]
-    pub project_updated: Option<AuditLogProjectUpdated>,
-    /// The details for events with the type `project.archived`.
-    #[serde(rename = "project.archived")]
-    pub project_archived: Option<AuditLogProjectArchived>,
-    /// The details for events with the type `service_account.created`.
-    #[serde(rename = "service_account.created")]
-    pub service_account_created: Option<AuditLogServiceAccountCreated>,
-    /// The details for events with the type `service_account.updated`.
-    #[serde(rename = "service_account.updated")]
-    pub service_account_updated: Option<AuditLogServiceAccountUpdated>,
-    /// The details for events with the type `service_account.deleted`.
-    #[serde(rename = "service_account.deleted")]
-    pub service_account_deleted: Option<AuditLogServiceAccountDeleted>,
-    /// The details for events with the type `user.added`.
-    #[serde(rename = "user.added")]
-    pub user_added: Option<AuditLogUserAdded>,
-    /// The details for events with the type `user.updated`.
-    #[serde(rename = "user.updated")]
-    pub user_updated: Option<AuditLogUserUpdated>,
-    /// The details for events with the type `user.deleted`.
-    #[serde(rename = "user.deleted")]
-    pub user_deleted: Option<AuditLogUserDeleted>,
-}
-
-/// The details for events with the type `api_key.created`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogApiKeyCreated {
-    /// The tracking ID of the API key.
-    pub id: String,
-    /// The payload used to create the API key.
-    pub data: Option<AuditLogApiKeyCreatedData>,
-}
-
-/// The payload used to create the API key.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogApiKeyCreatedData {
-    /// A list of scopes allowed for the API key, e.g. `["api.model.request"]`.
-    pub scopes: Option<Vec<String>>,
-}
-
-/// The details for events with the type `api_key.updated`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogApiKeyUpdated {
-    /// The tracking ID of the API key.
-    pub id: String,
-    /// The payload used to update the API key.
-    pub changes_requested: Option<AuditLogApiKeyUpdatedChangesRequested>,
-}
-
-/// The payload used to update the API key.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogApiKeyUpdatedChangesRequested {
-    /// A list of scopes allowed for the API key, e.g. `["api.model.request"]`.
-    pub scopes: Option<Vec<String>>,
-}
-
-/// The details for events with the type `api_key.deleted`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogApiKeyDeleted {
-    /// The tracking ID of the API key.
-    pub id: String,
-}
-
-/// The details for events with the type `invite.sent`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogInviteSent {
-    /// The ID of the invite.
-    pub id: String,
-    /// The payload used to create the invite.
-    pub data: Option<AuditLogInviteSentData>,
-}
-
-/// The payload used to create the invite.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogInviteSentData {
-    /// The email invited to the organization.
-    pub email: String,
-    /// The role the email was invited to be. Is either `owner` or `member`.
-    pub role: String,
-}
-
-/// The details for events with the type `invite.accepted`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogInviteAccepted {
-    /// The ID of the invite.
-    pub id: String,
-}
-
-/// The details for events with the type `invite.deleted`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogInviteDeleted {
-    /// The ID of the invite.
-    pub id: String,
-}
-
-/// The details for events with the type `login.failed`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogLoginFailed {
-    /// The error code of the failure.
-    pub error_code: String,
-    /// The error message of the failure.
-    pub error_message: String,
-}
-
-/// The details for events with the type `logout.failed`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogLogoutFailed {
-    /// The error code of the failure.
-    pub error_code: String,
-    /// The error message of the failure.
-    pub error_message: String,
-}
-
-/// The details for events with the type `organization.updated`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogOrganizationUpdated {
-    /// The organization ID.
-    pub id: String,
-    /// The payload used to update the organization settings.
-    pub changes_requested: Option<AuditLogOrganizationUpdatedChangesRequested>,
-}
-
-/// The payload used to update the organization settings.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogOrganizationUpdatedChangesRequested {
-    /// The organization title.
-    pub title: Option<String>,
-    /// The organization description.
-    pub description: Option<String>,
-    /// The organization name.
-    pub name: Option<String>,
-    /// The organization settings.
-    pub settings: Option<AuditLogOrganizationUpdatedChangesRequestedSettings>,
-}
-
-/// The organization settings.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogOrganizationUpdatedChangesRequestedSettings {
-    /// Visibility of the threads page which shows messages created with the Assistants API and Playground. One of `ANY_ROLE`, `OWNERS`, or `NONE`.
-    pub threads_ui_visibility: Option<String>,
-    /// Visibility of the usage dashboard which shows activity and costs for your organization. One of `ANY_ROLE` or `OWNERS`.
-    pub usage_dashboard_visibility: Option<String>,
-}
-
-/// The details for events with the type `project.created`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProjectCreated {
-    /// The project ID.
-    pub id: String,
-    /// The payload used to create the project.
-    pub data: Option<AuditLogProjectCreatedData>,
-}
-
-/// The payload used to create the project.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProjectCreatedData {
-    /// The project name.
-    pub name: String,
-    /// The title of the project as seen on the dashboard.
-    pub title: Option<String>,
-}
-
-/// The details for events with the type `project.updated`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProjectUpdated {
-    /// The project ID.
-    pub id: String,
-    /// The payload used to update the project.
-    pub changes_requested: Option<AuditLogProjectUpdatedChangesRequested>,
-}
-
-/// The payload used to update the project.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProjectUpdatedChangesRequested {
-    /// The title of the project as seen on the dashboard.
-    pub title: Option<String>,
-}
-
-/// The details for events with the type `project.archived`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogProjectArchived {
-    /// The project ID.
-    pub id: String,
-}
-
-/// The details for events with the type `service_account.created`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogServiceAccountCreated {
-    /// The service account ID.
-    pub id: String,
-    /// The payload used to create the service account.
-    pub data: Option<AuditLogServiceAccountCreatedData>,
-}
-
-/// The payload used to create the service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogServiceAccountCreatedData {
-    /// The role of the service account. Is either `owner` or `member`.
-    pub role: String,
-}
-
-/// The details for events with the type `service_account.updated`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogServiceAccountUpdated {
-    /// The service account ID.
-    pub id: String,
-    /// The payload used to updated the service account.
-    pub changes_requested: Option<AuditLogServiceAccountUpdatedChangesRequested>,
-}
-
-/// The payload used to updated the service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogServiceAccountUpdatedChangesRequested {
-    /// The role of the service account. Is either `owner` or `member`.
-    pub role: String,
-}
-
-/// The details for events with the type `service_account.deleted`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogServiceAccountDeleted {
-    /// The service account ID.
-    pub id: String,
-}
-
-/// The details for events with the type `user.added`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogUserAdded {
-    /// The user ID.
-    pub id: String,
-    /// The payload used to add the user to the project.
-    pub data: Option<AuditLogUserAddedData>,
-}
-
-/// The payload used to add the user to the project.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogUserAddedData {
-    /// The role of the user. Is either `owner` or `member`.
-    pub role: String,
-}
-
-/// The details for events with the type `user.updated`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogUserUpdated {
-    /// The project ID.
-    pub id: String,
-    /// The payload used to update the user.
-    pub changes_requested: Option<AuditLogUserUpdatedChangesRequested>,
-}
-
-/// The payload used to update the user.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogUserUpdatedChangesRequested {
-    /// The role of the user. Is either `owner` or `member`.
-    pub role: String,
-}
-
-/// The details for events with the type `user.deleted`.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct AuditLogUserDeleted {
-    /// The user ID.
-    pub id: String,
-}
--- a/lib/protocols/src/types/batch.rs
+++ b/lib/protocols/src/types/batch.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use std::collections::HashMap;
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
-#[builder(name = "BatchRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct BatchRequest {
-    /// The ID of an uploaded file that contains requests for the new batch.
-    ///
-    /// See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file.
-    ///
-    /// Your input file must be formatted as a [JSONL file](https://platform.openai.com/docs/api-reference/batch/request-input), and must be uploaded with the purpose `batch`. The file can contain up to 50,000 requests, and can be up to 100 MB in size.
-    pub input_file_id: String,
-
-    /// The endpoint to be used for all requests in the batch. Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported. Note that `/v1/embeddings` batches are also restricted to a maximum of 50,000 embedding inputs across all requests in the batch.
-    pub endpoint: BatchEndpoint,
-
-    /// The time frame within which the batch should be processed. Currently only `24h` is supported.
-    pub completion_window: BatchCompletionWindow,
-
-    /// Optional custom metadata for the batch.
-    pub metadata: Option<HashMap<String, serde_json::Value>>,
-}
-
-#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, Default)]
-pub enum BatchEndpoint {
-    #[default]
-    #[serde(rename = "/v1/chat/completions")]
-    V1ChatCompletions,
-    #[serde(rename = "/v1/embeddings")]
-    V1Embeddings,
-    #[serde(rename = "/v1/completions")]
-    V1Completions,
-}
-
-#[derive(Debug, Clone, PartialEq, Serialize, Default, Deserialize)]
-pub enum BatchCompletionWindow {
-    #[default]
-    #[serde(rename = "24h")]
-    W24H,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct Batch {
-    pub id: String,
-    /// The object type, which is always `batch`.
-    pub object: String,
-    /// The OpenAI API endpoint used by the batch.
-    pub endpoint: String,
-    pub errors: Option<BatchErrors>,
-    /// The ID of the input file for the batch.
-    pub input_file_id: String,
-    /// The time frame within which the batch should be processed.
-    pub completion_window: String,
-    /// The current status of the batch.
-    pub status: BatchStatus,
-    /// The ID of the file containing the outputs of successfully executed requests.
-    pub output_file_id: Option<String>,
-    /// The ID of the file containing the outputs of requests with errors.
-    pub error_file_id: Option<String>,
-    /// The Unix timestamp (in seconds) for when the batch was created.
-    pub created_at: u32,
-    /// The Unix timestamp (in seconds) for when the batch started processing.
-    pub in_progress_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch will expire.
-    pub expires_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch started finalizing.
-    pub finalizing_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch was completed.
-    pub completed_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch failed.
-    pub failed_at: Option<u32>,
-    /// he Unix timestamp (in seconds) for when the batch expired.
-    pub expired_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch started cancelling.
-    pub cancelling_at: Option<u32>,
-    /// The Unix timestamp (in seconds) for when the batch was cancelled.
-    pub cancelled_at: Option<u32>,
-    /// The request counts for different statuses within the batch.
-    pub request_counts: Option<BatchRequestCounts>,
-    /// Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format. Keys can be a maximum of 64 characters long and values can be a maximum of 512 characters long.
-    pub metadata: Option<HashMap<String, serde_json::Value>>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchErrors {
-    /// The object type, which is always `list`.
-    pub object: String,
-    pub data: Vec<BatchError>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchError {
-    /// An error code identifying the error type.
-    pub code: String,
-    /// A human-readable message providing more details about the error.
-    pub message: String,
-    /// The name of the parameter that caused the error, if applicable.
-    pub param: Option<String>,
-    /// The line number of the input file where the error occurred, if applicable.
-    pub line: Option<u32>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-#[serde(rename_all = "snake_case")]
-pub enum BatchStatus {
-    Validating,
-    Failed,
-    InProgress,
-    Finalizing,
-    Completed,
-    Expired,
-    Cancelling,
-    Cancelled,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchRequestCounts {
-    /// Total number of requests in the batch.
-    pub total: u32,
-    /// Number of requests that have been completed successfully.
-    pub completed: u32,
-    /// Number of requests that have failed.
-    pub failed: u32,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct ListBatchesResponse {
-    pub data: Vec<Batch>,
-    pub first_id: Option<String>,
-    pub last_id: Option<String>,
-    pub has_more: bool,
-    pub object: String,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-#[serde(rename_all = "UPPERCASE")]
-pub enum BatchRequestInputMethod {
-    POST,
-}
-
-/// The per-line object of the batch input file
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchRequestInput {
-    /// A developer-provided per-request id that will be used to match outputs to inputs. Must be unique for each request in a batch.
-    pub custom_id: String,
-    /// The HTTP method to be used for the request. Currently only `POST` is supported.
-    pub method: BatchRequestInputMethod,
-    /// The OpenAI API relative URL to be used for the request. Currently `/v1/chat/completions`, `/v1/embeddings`, and `/v1/completions` are supported.
-    pub url: BatchEndpoint,
-    pub body: Option<serde_json::Value>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchRequestOutputResponse {
-    /// The HTTP status code of the response
-    pub status_code: u16,
-    /// An unique identifier for the OpenAI API request. Please include this request ID when contacting support.
-    pub request_id: String,
-    /// The JSON body of the response
-    pub body: serde_json::Value,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchRequestOutputError {
-    /// A machine-readable error code.
-    pub code: String,
-    /// A human-readable error message.
-    pub message: String,
-}
-
-/// The per-line object of the batch output and error files
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct BatchRequestOutput {
-    pub id: String,
-    /// A developer-provided per-request id that will be used to match outputs to inputs.
-    pub custom_id: String,
-    pub response: Option<BatchRequestOutputResponse>,
-    ///  For requests that failed with a non-HTTP error, this will contain more information on the cause of the failure.
-    pub error: Option<BatchRequestOutputError>,
-}
--- a/lib/protocols/src/types/chat.rs
+++ b/lib/protocols/src/types/chat.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
+// Re-exports upstream async-openai chat types and defines inference-serving
+// extensions on top. Types prefixed with `Dynamo` or entirely absent from the
+// upstream spec are documented with the rationale for the extension.

-use std::{collections::HashMap, pin::Pin};
+use std::pin::Pin;

 use derive_builder::Builder;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-
 use url::Url;
-use uuid::{Uuid, uuid};
+use uuid::Uuid;

 use crate::error::OpenAIError;

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum Prompt {
-    String(String),
-    StringArray(Vec<String>),
-    // Minimum value is 0, maximum value is 4_294_967_295 (inclusive).
-    IntegerArray(Vec<u32>),
-    ArrayOfIntegerArray(Vec<Vec<u32>>),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum Stop {
-    String(String),           // nullable: true
-    StringArray(Vec<String>), // minItems: 1; maxItems: 4
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Logprobs {
-    pub tokens: Vec<String>,
-    pub token_logprobs: Vec<Option<f32>>, // Option is to account for null value in the list
-    pub top_logprobs: Vec<serde_json::Value>,
-    pub text_offset: Vec<u32>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum CompletionFinishReason {
-    Stop,
-    Length,
-    ContentFilter,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Choice {
-    pub text: String,
-    pub index: u32,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<Logprobs>,
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub finish_reason: Option<CompletionFinishReason>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub enum ChatCompletionFunctionCall {
-    /// The model does not call a function, and responds to the end-user.
-    #[serde(rename = "none")]
-    None,
-    /// The model can pick between an end-user or calling a function.
-    #[serde(rename = "auto")]
-    Auto,
-
-    // In spec this is ChatCompletionFunctionCallOption
-    // based on feedback from @m1guelpf in https://github.com/64bit/async-openai/pull/118
-    // it is diverged from the spec
-    /// Forces the model to call the specified function.
-    #[serde(untagged)]
-    Function { name: String },
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum Role {
-    System,
-    #[default]
-    User,
-    Assistant,
-    Tool,
-    Function,
-}
-
-/// The name and arguments of a function that should be called, as generated by the model.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FunctionCall {
-    /// The name of the function to call.
-    pub name: String,
-    /// The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function.
-    pub arguments: String,
-}
-
-/// Usage statistics for the completion request.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
-pub struct CompletionUsage {
-    /// Number of tokens in the prompt.
-    pub prompt_tokens: u32,
-    /// Number of tokens in the generated completion.
-    pub completion_tokens: u32,
-    /// Total number of tokens used in the request (prompt + completion).
-    pub total_tokens: u32,
-    /// Breakdown of tokens used in the prompt.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub prompt_tokens_details: Option<PromptTokensDetails>,
-    /// Breakdown of tokens used in a completion.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub completion_tokens_details: Option<CompletionTokensDetails>,
-}
-
-/// Breakdown of tokens used in a completion.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
-pub struct PromptTokensDetails {
-    /// Audio input tokens present in the prompt.
-    pub audio_tokens: Option<u32>,
-    /// Cached tokens present in the prompt.
-    pub cached_tokens: Option<u32>,
-}
-
-/// Breakdown of tokens used in a completion.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
-pub struct CompletionTokensDetails {
-    pub accepted_prediction_tokens: Option<u32>,
-    /// Audio input tokens generated by the model.
-    pub audio_tokens: Option<u32>,
-    /// Tokens generated by the model for reasoning.
-    pub reasoning_tokens: Option<u32>,
-    ///  When using Predicted Outputs, the number of tokens in the
-    /// prediction that did not appear in the completion. However, like
-    /// reasoning tokens, these tokens are still counted in the total
-    /// completion tokens for purposes of billing, output, and context
-    /// window limits.
-    pub rejected_prediction_tokens: Option<u32>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestDeveloperMessageArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestDeveloperMessage {
-    /// The contents of the developer message.
-    pub content: ChatCompletionRequestDeveloperMessageContent,
-
-    /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub name: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum ChatCompletionRequestDeveloperMessageContent {
-    Text(String),
-    Array(Vec<ChatCompletionRequestMessageContentPartText>),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestSystemMessageArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestSystemMessage {
-    /// The contents of the system message.
-    pub content: ChatCompletionRequestSystemMessageContent,
-    /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub name: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestMessageContentPartTextArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestMessageContentPartText {
-    pub text: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-pub struct ChatCompletionRequestMessageContentPartRefusal {
-    /// The refusal message generated by the model.
-    pub refusal: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+// ---------------------------------------------------------------------------
+// Re-exports from upstream async-openai (unchanged types)
+// ---------------------------------------------------------------------------
+// These types are structurally identical to the upstream definitions.
+// Consumers should use them via `dynamo_protocols::types::*` as before.
+
+pub use async_openai::types::chat::{
+    ChatChoiceLogprobs,
+    ChatCompletionAudio,
+    ChatCompletionAudioFormat,
+    ChatCompletionAudioVoice,
+    ChatCompletionFunctionCall,
+    ChatCompletionFunctions,
+    ChatCompletionFunctionsArgs,
+    ChatCompletionMessageToolCallChunk,
+    ChatCompletionRequestAssistantMessageAudio,
+    ChatCompletionRequestAssistantMessageContent,
+    ChatCompletionRequestAssistantMessageContentPart,
+    ChatCompletionRequestDeveloperMessage,
+    ChatCompletionRequestDeveloperMessageArgs,
+    ChatCompletionRequestDeveloperMessageContent,
+    ChatCompletionRequestFunctionMessage,
+    ChatCompletionRequestFunctionMessageArgs,
+    ChatCompletionRequestMessageContentPartAudio,
+    ChatCompletionRequestMessageContentPartRefusal,
+    ChatCompletionRequestMessageContentPartText,
+    ChatCompletionRequestSystemMessage,
+    // Builder types (generated by derive_builder)
+    ChatCompletionRequestSystemMessageArgs,
+    ChatCompletionRequestSystemMessageContent,
+    ChatCompletionRequestSystemMessageContentPart,
+    ChatCompletionRequestToolMessage,
+    ChatCompletionRequestToolMessageArgs,
+    ChatCompletionRequestToolMessageContent,
+    ChatCompletionRequestToolMessageContentPart,
+    ChatCompletionResponseMessageAudio,
+    ChatCompletionTokenLogprob,
+    Choice,
+    CompletionFinishReason,
+    CompletionTokensDetails,
+    CompletionUsage,
+    FunctionCall,
+    FunctionCallStream,
+    FunctionObject,
+    FunctionObjectArgs,
+    InputAudio,
+    InputAudioFormat,
+    Logprobs,
+    PredictionContent,
+    PredictionContentContent,
+    Prompt,
+    PromptTokensDetails,
+    ReasoningEffort,
+    ResponseFormat,
+    ResponseFormatJsonSchema,
+    Role,
+    ServiceTier,
+    TopLogprobs,
+    WebSearchContextSize,
+    WebSearchLocation,
+    WebSearchOptions,
+    WebSearchUserLocation,
+    WebSearchUserLocationType,
+};
+
+// Upstream renamed Stop -> StopConfiguration; re-export under old name for compat
+pub use async_openai::types::chat::StopConfiguration as Stop;
+
+// Upstream renamed FinishReason (streaming) -- re-export
+pub use async_openai::types::chat::FinishReason;
+
+// Upstream uses FunctionType where we used ChatCompletionToolType.
+// Re-export both names for compatibility.
+pub use async_openai::types::chat::FunctionType;
+
+// ---------------------------------------------------------------------------
+// Types with structural differences from upstream (kept locally)
+// ---------------------------------------------------------------------------
+
+/// Image detail level. Kept locally because upstream uses different field types in ImageUrl.
+#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageDetail {
    #[default]
@@ -203,231 +104,196 @@ pub enum ImageDetail {
    High,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "ImageUrlArgs")]
+/// Image content part -- uses our extended `ImageUrl` with `url::Url` and `uuid`.
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "ChatCompletionRequestMessageContentPartImageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct ImageUrl {
-    /// Either a URL of the image or the base64 encoded image data.
-    pub url: url::Url,
-    /// Specifies the detail level of the image. Learn more in the [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
-    pub detail: Option<ImageDetail>,
-    /// Optional unique identifier for the image.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub uuid: Option<uuid::Uuid>,
+pub struct ChatCompletionRequestMessageContentPartImage {
+    pub image_url: ImageUrl,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "VideoUrlArgs")]
+/// Image URL with `url::Url` type and optional UUID.
+///
+/// Differs from upstream: uses `url::Url` instead of `String`, adds `uuid` field
+/// for tracking multimodal assets through the pipeline.
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "ImageUrlArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct VideoUrl {
-    /// Either a URL of the video or the base64 encoded video data.
-    pub url: url::Url,
-    /// Specifies the detail level of the video processing.
+pub struct ImageUrl {
+    pub url: Url,
    pub detail: Option<ImageDetail>,
-    /// Optional unique identifier for the video.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub uuid: Option<uuid::Uuid>,
+    pub uuid: Option<Uuid>,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestMessageContentPartImageArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option))]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestMessageContentPartImage {
-    pub image_url: ImageUrl,
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ChatCompletionToolType {
+    #[default]
+    Function,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestMessageContentPartVideoArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option))]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestMessageContentPartVideo {
-    pub video_url: VideoUrl,
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct FunctionName {
+    pub name: String,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "AudioUrlArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option))]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct AudioUrl {
-    /// URL of the audio file
-    pub url: url::Url,
-    /// Optional unique identifier for the audio.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub uuid: Option<uuid::Uuid>,
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+pub struct ChatCompletionNamedToolChoice {
+    pub r#type: ChatCompletionToolType,
+    pub function: FunctionName,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestMessageContentPartAudioUrlArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option))]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestMessageContentPartAudioUrl {
-    pub audio_url: AudioUrl,
+fn default_function_type() -> FunctionType {
+    FunctionType::Function
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum InputAudioFormat {
-    Wav,
-    #[default]
-    Mp3,
+/// Tool call kept locally to preserve `type: "function"` in unary request/response payloads.
+///
+/// Differs from upstream: `type` is serialized by default and also defaults to
+/// `function` when omitted during deserialization, preserving compatibility with
+/// both Dynamo's historical wire format and upstream spec-compliant inputs.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+pub struct ChatCompletionMessageToolCall {
+    pub id: String,
+    #[serde(default = "default_function_type")]
+    pub r#type: FunctionType,
+    pub function: FunctionCall,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-pub struct InputAudio {
-    /// Base64 encoded audio data.
-    pub data: String,
-    /// The format of the encoded audio data. Currently supports "wav" and "mp3".
-    pub format: InputAudioFormat,
+/// Tool choice enum kept locally because upstream changed variant names.
+#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ChatCompletionToolChoiceOption {
+    #[default]
+    None,
+    Auto,
+    Required,
+    #[serde(untagged)]
+    Named(ChatCompletionNamedToolChoice),
 }

-/// Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestMessageContentPartAudioArgs")]
+#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
+#[builder(name = "ChatCompletionToolArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestMessageContentPartAudio {
-    pub input_audio: InputAudio,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum ChatCompletionRequestUserMessageContentPart {
-    Text(ChatCompletionRequestMessageContentPartText),
-    ImageUrl(ChatCompletionRequestMessageContentPartImage),
-    VideoUrl(ChatCompletionRequestMessageContentPartVideo),
-    AudioUrl(ChatCompletionRequestMessageContentPartAudioUrl),
-    InputAudio(ChatCompletionRequestMessageContentPartAudio),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum ChatCompletionRequestSystemMessageContentPart {
-    Text(ChatCompletionRequestMessageContentPartText),
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum ChatCompletionRequestAssistantMessageContentPart {
-    Text(ChatCompletionRequestMessageContentPartText),
-    Refusal(ChatCompletionRequestMessageContentPartRefusal),
+pub struct ChatCompletionTool {
+    #[builder(default = "ChatCompletionToolType::Function")]
+    pub r#type: ChatCompletionToolType,
+    pub function: FunctionObject,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum ChatCompletionRequestToolMessageContentPart {
-    Text(ChatCompletionRequestMessageContentPartText),
-}
+// ---------------------------------------------------------------------------
+// Inference-serving extensions (not in upstream)
+// ---------------------------------------------------------------------------

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+/// Matched stop condition from the backend.
+///
+/// Inference backends (vLLM, SGLang) report which stop condition triggered:
+/// - `String`: a matched user-provided stop sequence
+/// - `Int`: a matched stop token ID
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
 #[serde(untagged)]
-pub enum ChatCompletionRequestSystemMessageContent {
-    /// The text contents of the system message.
-    Text(String),
-    /// An array of content parts with a defined type. For system messages, only type `text` is supported.
-    Array(Vec<ChatCompletionRequestSystemMessageContentPart>),
+pub enum StopReason {
+    String(String),
+    Int(i64),
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+/// Reasoning content from a previous assistant turn.
+///
+/// Deserializes from either:
+/// - A plain string: `"reasoning_content": "thinking..."` -> `Text("thinking...")`
+/// - An array of strings: `"reasoning_content": ["seg1", "seg2"]` -> `Segments(["seg1", "seg2"])`
+///
+/// The `Segments` variant preserves interleaved reasoning order needed for KV cache-correct
+/// context reconstruction. `segments[i]` is the reasoning that preceded `tool_calls[i]`;
+/// `segments[tool_calls.len()]` is any trailing reasoning after the last tool call.
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
 #[serde(untagged)]
-pub enum ChatCompletionRequestUserMessageContent {
-    /// The text contents of the message.
+pub enum ReasoningContent {
+    /// Flat string -- single reasoning block or legacy backward-compat form.
    Text(String),
-    /// An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text, image, or audio inputs.
-    Array(Vec<ChatCompletionRequestUserMessageContentPart>),
+    /// Interleaved segments. segments[i] precedes tool_calls[i];
+    /// segments[N] is trailing reasoning after the last tool call.
+    Segments(Vec<String>),
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum ChatCompletionRequestAssistantMessageContent {
-    /// The text contents of the message.
-    Text(String),
-    /// An array of content parts with a defined type. Can be one or more of type `text`, or exactly one of type `refusal`.
-    Array(Vec<ChatCompletionRequestAssistantMessageContentPart>),
-}
+impl ReasoningContent {
+    /// Join all segments (or return text as-is) into a single flat string.
+    pub fn to_flat_string(&self) -> String {
+        match self {
+            ReasoningContent::Text(s) => s.clone(),
+            ReasoningContent::Segments(segs) => segs
+                .iter()
+                .filter(|s| !s.is_empty())
+                .cloned()
+                .collect::<Vec<_>>()
+                .join("\n"),
+        }
+    }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum ChatCompletionRequestToolMessageContent {
-    /// The text contents of the tool message.
-    Text(String),
-    /// An array of content parts with a defined type. For tool messages, only type `text` is supported.
-    Array(Vec<ChatCompletionRequestToolMessageContentPart>),
+    /// Returns the segments if this is the `Segments` variant, `None` for `Text`.
+    pub fn segments(&self) -> Option<&[String]> {
+        match self {
+            ReasoningContent::Segments(segs) => Some(segs),
+            ReasoningContent::Text(_) => None,
+        }
+    }
 }

-// Omni Specific Multimodal Content Types
-// These types are used for assistant message responses that contain multimodal content
+// -- Multimodal content types for responses (not in upstream) --

 /// Response content part for text in assistant messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionResponseContentPartText {
    pub text: String,
 }

 /// Response content part for image URLs in assistant messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionResponseContentPartImageUrl {
    pub image_url: ImageUrlResponse,
 }

 /// Response content part for video URLs in assistant messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionResponseContentPartVideoUrl {
    pub video_url: VideoUrlResponse,
 }

 /// Response content part for audio URLs in assistant messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionResponseContentPartAudioUrl {
    pub audio_url: AudioUrlResponse,
 }

-/// Image URL in response messages (supports data URLs with base64)
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ImageUrlResponse {
-    /// The URL of the image, either a URL or a data URL (data:image/png;base64,...)
    pub url: String,
-    /// Optional detail level (for compatibility with OpenAI)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub detail: Option<String>,
 }

-/// Video URL in response messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct VideoUrlResponse {
-    /// The URL of the video, either a URL or a data URL
    pub url: String,
 }

-/// Audio URL in response messages
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct AudioUrlResponse {
-    /// The URL of the audio, either a URL or a data URL
    pub url: String,
 }

-/// Content parts for assistant responses supporting multiple modalities (text, images, videos, audio)
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+/// Content parts for assistant responses supporting multiple modalities
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ChatCompletionResponseContentPart {
    Text(ChatCompletionResponseContentPartText),
@@ -436,8 +302,12 @@ pub enum ChatCompletionResponseContentPart {
    AudioUrl(ChatCompletionResponseContentPartAudioUrl),
 }

-/// Assistant message content - can be a simple string or an array of content parts
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
+/// Assistant message content -- can be a simple string or multimodal content parts.
+///
+/// Upstream uses `Option<String>` for the content field. We extend this to
+/// support multimodal responses (text + images + video + audio) from backends
+/// like vLLM that can return non-text content.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionMessageContent {
    /// Simple text content (backward compatible)
@@ -446,132 +316,153 @@ pub enum ChatCompletionMessageContent {
    Parts(Vec<ChatCompletionResponseContentPart>),
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+// -- Multimodal input types (video/audio URL support, not in upstream) --
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "VideoUrlArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option))]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct VideoUrl {
+    pub url: Url,
+    pub detail: Option<ImageDetail>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub uuid: Option<Uuid>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "ChatCompletionRequestMessageContentPartVideoArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option))]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ChatCompletionRequestMessageContentPartVideo {
+    pub video_url: VideoUrl,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "AudioUrlArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option))]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct AudioUrl {
+    pub url: Url,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub uuid: Option<Uuid>,
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[builder(name = "ChatCompletionRequestMessageContentPartAudioUrlArgs")]
+#[builder(pattern = "mutable")]
+#[builder(setter(into, strip_option))]
+#[builder(derive(Debug))]
+#[builder(build_fn(error = "OpenAIError"))]
+pub struct ChatCompletionRequestMessageContentPartAudioUrl {
+    pub audio_url: AudioUrl,
+}
+
+// -- Extended request/response types --
+
+/// User message content -- references our extended content part enum.
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(untagged)]
+pub enum ChatCompletionRequestUserMessageContent {
+    Text(String),
+    Array(Vec<ChatCompletionRequestUserMessageContentPart>),
+}
+
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestUserMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct ChatCompletionRequestUserMessage {
-    /// The contents of the user message.
    pub content: ChatCompletionRequestUserMessageContent,
-    /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-pub struct ChatCompletionRequestAssistantMessageAudio {
-    /// Unique identifier for a previous audio response from the model.
-    pub id: String,
+impl Default for ChatCompletionRequestUserMessageContent {
+    fn default() -> Self {
+        Self::Text(String::new())
+    }
 }

-/// Reasoning content from a previous assistant turn.
-///
-/// This is an untagged enum that deserializes from either:
-/// - A plain string: `"reasoning_content": "thinking..."` -> `Text("thinking...")`
-/// - An array of strings: `"reasoning_content": ["seg1", "seg2"]` -> `Segments(["seg1", "seg2"])`
-///
-/// The `Segments` variant preserves interleaved reasoning order needed for KV cache–correct
-/// context reconstruction. `segments[i]` is the reasoning that preceded `tool_calls[i]`;
-/// `segments[tool_calls.len()]` is any trailing reasoning after the last tool call.
-/// `segments.len() == tool_calls.len() + 1` always when set.
-#[derive(ToSchema, Serialize, Deserialize, Clone, Debug, PartialEq)]
-#[serde(untagged)]
-pub enum ReasoningContent {
-    /// Flat string — single reasoning block or legacy backward-compat form.
-    Text(String),
-    /// Interleaved segments. segments[i] precedes tool_calls[i];
-    /// segments[N] is trailing reasoning after the last tool call.
-    /// segments.len() == tool_calls.len() + 1.
-    Segments(Vec<String>),
+impl From<&str> for ChatCompletionRequestUserMessageContent {
+    fn from(value: &str) -> Self {
+        Self::Text(value.into())
+    }
 }

-impl ReasoningContent {
-    /// Join all segments (or return text as-is) into a single flat string.
-    pub fn to_flat_string(&self) -> String {
-        match self {
-            ReasoningContent::Text(s) => s.clone(),
-            ReasoningContent::Segments(segs) => segs
-                .iter()
-                .filter(|s| !s.is_empty())
-                .cloned()
-                .collect::<Vec<_>>()
-                .join("\n"),
-        }
+impl From<String> for ChatCompletionRequestUserMessageContent {
+    fn from(value: String) -> Self {
+        Self::Text(value)
    }
+}

-    /// Returns the segments if this is the `Segments` variant, `None` for `Text`.
-    pub fn segments(&self) -> Option<&[String]> {
-        match self {
-            ReasoningContent::Segments(segs) => Some(segs),
-            ReasoningContent::Text(_) => None,
-        }
+impl From<Vec<ChatCompletionRequestUserMessageContentPart>>
+    for ChatCompletionRequestUserMessageContent
+{
+    fn from(value: Vec<ChatCompletionRequestUserMessageContentPart>) -> Self {
+        Self::Array(value)
    }
 }

-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+/// User message content part with video and audio URL support.
+///
+/// Extends upstream `ChatCompletionRequestUserMessageContentPart` with:
+/// - `VideoUrl`: video input for multimodal models
+/// - `AudioUrl`: audio URL input (distinct from base64 InputAudio)
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[serde(tag = "type")]
+#[serde(rename_all = "snake_case")]
+pub enum ChatCompletionRequestUserMessageContentPart {
+    Text(ChatCompletionRequestMessageContentPartText),
+    ImageUrl(ChatCompletionRequestMessageContentPartImage),
+    VideoUrl(ChatCompletionRequestMessageContentPartVideo),
+    AudioUrl(ChatCompletionRequestMessageContentPartAudioUrl),
+    InputAudio(ChatCompletionRequestMessageContentPartAudio),
+}
+
+/// Assistant message with reasoning content support.
+///
+/// Extends upstream `ChatCompletionRequestAssistantMessage` with:
+/// - `reasoning_content`: interleaved reasoning segments for KV cache correctness
+///   (DeepSeek-R1, QwQ models)
+#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestAssistantMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct ChatCompletionRequestAssistantMessage {
-    /// The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<ChatCompletionRequestAssistantMessageContent>,
    /// Reasoning content from a previous assistant turn.
-    ///
-    /// When serialized as a plain string, represents a flat reasoning block (backward-compatible
-    /// with Jinja chat templates). When serialized as an array of strings, represents
-    /// interleaved reasoning segments preserving per-position order for KV cache correctness.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_content: Option<ReasoningContent>,
-    /// The refusal message by the assistant.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub refusal: Option<String>,
-    /// An optional name for the participant. Provides the model information to differentiate between participants of the same role.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub name: Option<String>,
-    /// Data about a previous audio response from the model.
-    /// [Learn more](https://platform.openai.com/docs/guides/audio).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub audio: Option<ChatCompletionRequestAssistantMessageAudio>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ChatCompletionMessageToolCall>>,
-    /// Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.
    #[deprecated]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub function_call: Option<FunctionCall>,
 }

-/// Tool message
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestToolMessageArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestToolMessage {
-    /// The contents of the tool message.
-    pub content: ChatCompletionRequestToolMessageContent,
-    pub tool_call_id: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
-#[builder(name = "ChatCompletionRequestFunctionMessageArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionRequestFunctionMessage {
-    /// The return value from the function call, to return to the model.
-    pub content: Option<String>,
-    /// The name of the function to call.
-    pub name: String,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
+/// Chat completion request message enum.
+///
+/// Redefined to use our extended `ChatCompletionRequestAssistantMessage`
+/// (with reasoning_content) and `ChatCompletionRequestUserMessage`
+/// (which references our extended content parts with video/audio).
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "role")]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionRequestMessage {
@@ -583,852 +474,252 @@ pub enum ChatCompletionRequestMessage {
    Function(ChatCompletionRequestFunctionMessage),
 }

-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ChatCompletionMessageToolCall {
-    /// The ID of the tool call.
-    pub id: String,
-    /// The type of the tool. Currently, only `function` is supported.
-    pub r#type: ChatCompletionToolType,
-    /// The function that the model called.
-    pub function: FunctionCall,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-pub struct ChatCompletionResponseMessageAudio {
-    /// Unique identifier for this audio response.
-    pub id: String,
-    /// The Unix timestamp (in seconds) for when this audio response will no longer be accessible on the server for use in multi-turn conversations.
-    pub expires_at: u32,
-    /// Base64 encoded audio bytes generated by the model, in the format specified in the request.
-    pub data: String,
-    /// Transcript of the audio generated by the model.
-    pub transcript: String,
+/// Response tier enum for responses (distinct from request `ServiceTier`).
+///
+/// Not in upstream -- backends report which tier actually served the request.
+#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[serde(rename_all = "lowercase")]
+pub enum ServiceTierResponse {
+    Scale,
+    Default,
+    Flex,
+    Priority,
 }

-/// A chat completion message generated by the model.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
+/// Chat completion response message with multimodal content and reasoning.
+///
+/// Extends upstream `ChatCompletionResponseMessage` with:
+/// - `content`: `Option<ChatCompletionMessageContent>` (multimodal) instead of `Option<String>`
+/// - `reasoning_content`: model reasoning output (DeepSeek-R1, QwQ)
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionResponseMessage {
-    /// The contents of the message - can be a string or array of content parts
    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<ChatCompletionMessageContent>,
-    /// The refusal message generated by the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub refusal: Option<String>,
-    /// The tool calls generated by the model, such as function calls.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ChatCompletionMessageToolCall>>,
-
-    /// The role of the author of this message.
    pub role: Role,
-
-    /// Deprecated and replaced by `tool_calls`.
-    /// The name and arguments of a function that should be called, as generated by the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[deprecated]
    pub function_call: Option<FunctionCall>,
-
-    /// If the audio output modality is requested, this object contains data about the audio response from the model. [Learn more](https://platform.openai.com/docs/guides/audio).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub audio: Option<ChatCompletionResponseMessageAudio>,
-
-    /// NVIDIA-specific extensions for the chat completion response.
+    /// Reasoning content produced by the model (DeepSeek-R1, QwQ).
    pub reasoning_content: Option<String>,
 }

-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
-#[builder(name = "ChatCompletionFunctionsArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-#[deprecated]
-pub struct ChatCompletionFunctions {
-    /// The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-    pub name: String,
-    /// A description of what the function does, used by the model to choose when and how to call the function.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-    /// The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
-    ///
-    /// Omitting `parameters` defines a function with an empty parameter list.
-    pub parameters: serde_json::Value,
-}
-
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
-#[builder(name = "FunctionObjectArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct FunctionObject {
-    /// The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-    pub name: String,
-    /// A description of what the function does, used by the model to choose when and how to call the function.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-    /// The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.
-    ///
-    /// Omitting `parameters` defines a function with an empty parameter list.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub parameters: Option<serde_json::Value>,
-
-    /// Whether to enable strict schema adherence when generating the function call. If set to true, the model will follow the exact schema defined in the `parameters` field. Only a subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured Outputs in the [function calling guide](https://platform.openai.com/docs/guides/function-calling).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub strict: Option<bool>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "snake_case")]
-pub enum ResponseFormat {
-    /// The type of response format being defined: `text`
-    Text,
-    /// The type of response format being defined: `json_object`
-    JsonObject,
-    /// The type of response format being defined: `json_schema`
-    JsonSchema {
-        json_schema: ResponseFormatJsonSchema,
-    },
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ResponseFormatJsonSchema {
-    /// A description of what the response format is for, used by the model to determine how to respond in the format.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-    /// The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64.
-    pub name: String,
-    /// The schema for the response format, described as a JSON Schema object.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub schema: Option<serde_json::Value>,
-    /// Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub strict: Option<bool>,
-}
-
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ChatCompletionToolType {
-    #[default]
-    Function,
-}
-
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
-#[builder(name = "ChatCompletionToolArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct ChatCompletionTool {
-    #[builder(default = "ChatCompletionToolType::Function")]
-    pub r#type: ChatCompletionToolType,
-    pub function: FunctionObject,
-}
-
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-pub struct FunctionName {
-    /// The name of the function to call.
-    pub name: String,
-}
-
-/// Specifies a tool the model should use. Use to force the model to call a specific function.
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-pub struct ChatCompletionNamedToolChoice {
-    /// The type of the tool. Currently, only `function` is supported.
-    pub r#type: ChatCompletionToolType,
-
-    pub function: FunctionName,
-}
-
-/// Controls which (if any) tool is called by the model.
-/// `none` means the model will not call any tool and instead generates a message.
-/// `auto` means the model can pick between generating a message or calling one or more tools.
-/// `required` means the model must call one or more tools.
-/// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
+/// Stream options with per-chunk usage reporting.
 ///
-/// `none` is the default when no tools are present. `auto` is the default if tools are present.
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ChatCompletionToolChoiceOption {
-    #[default]
-    None,
-    Auto,
-    Required,
-    #[serde(untagged)]
-    Named(ChatCompletionNamedToolChoice),
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
-#[serde(rename_all = "lowercase")]
-/// The amount of context window space to use for the search.
-pub enum WebSearchContextSize {
-    Low,
-    #[default]
-    Medium,
-    High,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum WebSearchUserLocationType {
-    Approximate,
-}
-
-/// Approximate location parameters for the search.
-#[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
-pub struct WebSearchLocation {
-    ///  The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`.
-    pub country: Option<String>,
-    /// Free text input for the region of the user, e.g. `California`.
-    pub region: Option<String>,
-    /// Free text input for the city of the user, e.g. `San Francisco`.
-    pub city: Option<String>,
-    /// The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. `America/Los_Angeles`.
-    pub timezone: Option<String>,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct WebSearchUserLocation {
-    //  The type of location approximation. Always `approximate`.
-    pub r#type: WebSearchUserLocationType,
-
-    pub approximate: WebSearchLocation,
-}
-
-/// Options for the web search tool.
-#[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
-pub struct WebSearchOptions {
-    /// High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default.
-    pub search_context_size: Option<WebSearchContextSize>,
-
-    /// Approximate location parameters for the search.
-    pub user_location: Option<WebSearchUserLocation>,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ServiceTier {
-    Auto,
-    Default,
-    Flex,
-    Scale,
-    Priority,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ServiceTierResponse {
-    Scale,
-    Default,
-    Flex,
-    Priority,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ReasoningEffort {
-    Minimal,
-    Low,
-    Medium,
-    High,
+/// Extends upstream `ChatCompletionStreamOptions` with:
+/// - `continuous_usage_stats`: emit usage in every chunk, not just the final one
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+pub struct ChatCompletionStreamOptions {
+    pub include_usage: bool,
+    /// When true, usage statistics are included in every streaming chunk.
+    /// Backends like vLLM/SGLang support this for real-time token counting.
+    #[serde(default)]
+    pub continuous_usage_stats: bool,
 }

-/// Output types that you would like the model to generate for this request.
-///
-/// Most models are capable of generating text, which is the default: `["text"]`
+/// Chat completion request with multimodal processor support.
 ///
-/// The `gpt-4o-audio-preview` model can also be used to [generate
-/// audio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `["text", "audio"]`
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ChatCompletionModalities {
-    Text,
-    Audio,
-}
-
-/// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(untagged)]
-pub enum PredictionContentContent {
-    /// The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
-    Text(String),
-    /// An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text inputs.
-    Array(Vec<ChatCompletionRequestMessageContentPartText>),
-}
-
-/// Static predicted output content, such as the content of a text file that is being regenerated.
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type", rename_all = "lowercase", content = "content")]
-pub enum PredictionContent {
-    /// The type of the predicted content you want to provide. This type is
-    /// currently always `content`.
-    Content(PredictionContentContent),
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ChatCompletionAudioVoice {
-    Alloy,
-    Ash,
-    Ballad,
-    Coral,
-    Echo,
-    Sage,
-    Shimmer,
-    Verse,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ChatCompletionAudioFormat {
-    Wav,
-    Mp3,
-    Flac,
-    Opus,
-    Pcm16,
-}
-
-#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct ChatCompletionAudio {
-    /// The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive).
-    pub voice: ChatCompletionAudioVoice,
-    /// Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`.
-    pub format: ChatCompletionAudioFormat,
-}
-
-#[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
+/// Extends upstream `CreateChatCompletionRequest` with:
+/// - `mm_processor_kwargs`: multimodal processor configuration (vLLM-specific)
+/// - Uses our extended `ChatCompletionRequestMessage` (with reasoning, video/audio)
+/// - Uses our extended `ChatCompletionStreamOptions` (with continuous_usage_stats)
+#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
 #[builder(name = "CreateChatCompletionRequestArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct CreateChatCompletionRequest {
-    /// A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message types (modalities) are supported, like [text](https://platform.openai.com/docs/guides/text-generation), [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio).
-    pub messages: Vec<ChatCompletionRequestMessage>, // min: 1
-
-    /// ID of the model to use.
-    /// See the [model endpoint compatibility](https://platform.openai.com/docs/models#model-endpoint-compatibility) table for details on which models work with the Chat API.
+    pub messages: Vec<ChatCompletionRequestMessage>,
    pub model: String,
-
-    /// Multimodal processor configuration parameters
+    /// Multimodal processor configuration (vLLM-specific)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub mm_processor_kwargs: Option<serde_json::Value>,
-
-    /// Whether or not to store the output of this chat completion request
-    ///
-    /// for use in our [model distillation](https://platform.openai.com/docs/guides/distillation) or [evals](https://platform.openai.com/docs/guides/evals) products.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub store: Option<bool>, // nullable: true, default: false
-
-    /// **o1 models only**
-    ///
-    /// Constrains effort on reasoning for
-    /// [reasoning models](https://platform.openai.com/docs/guides/reasoning).
-    ///
-    /// Currently supported values are `low`, `medium`, and `high`. Reducing
-    ///
-    /// reasoning effort can result in faster responses and fewer tokens
-    /// used on reasoning in a response.
+    pub store: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffort>,
-
-    ///  Developer-defined tags and values used for filtering completions in the [dashboard](https://platform.openai.com/chat-completions).
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<serde_json::Value>, // nullable: true
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
+    pub metadata: Option<serde_json::Value>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>, // min: -2.0, max: 2.0, default: 0
-
-    /// Modify the likelihood of specified tokens appearing in the completion.
-    ///
-    /// Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.
-    /// Mathematically, the bias is added to the logits generated by the model prior to sampling.
-    /// The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection;
-    /// values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
+    pub frequency_penalty: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logit_bias: Option<HashMap<String, serde_json::Value>>, // default: null
-
-    /// Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`.
+    pub logit_bias: Option<std::collections::HashMap<String, serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub logprobs: Option<bool>,
-
-    /// An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub top_logprobs: Option<u8>,
-
-    /// The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the chat completion.
-    ///
-    /// This value can be used to control [costs](https://openai.com/api/pricing/) for text generated via API.
-    /// This value is now deprecated in favor of `max_completion_tokens`, and is
-    /// not compatible with [o1 series models](https://platform.openai.com/docs/guides/reasoning).
    #[deprecated]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u32>,
-
-    /// An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_completion_tokens: Option<u32>,
-
-    /// How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub n: Option<u8>, // min:1, max: 128, default: 1
-
+    pub n: Option<u8>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub modalities: Option<Vec<ChatCompletionModalities>>,
-
-    /// Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/predicted-outputs),which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content.
+    pub modalities: Option<Vec<async_openai::types::chat::ResponseModalities>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prediction: Option<PredictionContent>,
-
-    /// Parameters for audio output. Required when audio output is requested with `modalities: ["audio"]`. [Learn more](https://platform.openai.com/docs/guides/audio).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub audio: Option<ChatCompletionAudio>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>, // min: -2.0, max: 2.0, default 0
-
-    /// An object specifying the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-    ///
-    /// Setting to `{ "type": "json_schema", "json_schema": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).
-    ///
-    /// Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the message the model generates is valid JSON.
-    ///
-    /// **Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly "stuck" request. Also note that the message content may be partially cut off if `finish_reason="length"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length.
+    pub presence_penalty: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub response_format: Option<ResponseFormat>,
-
-    ///  This feature is in Beta.
-    /// If specified, our system will make a best effort to sample deterministically, such that repeated requests
-    /// with the same `seed` and parameters should return the same result.
-    /// Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i64>,
-
-    /// Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:
-    /// - If set to 'auto', the system will utilize scale tier credits until they are exhausted.
-    /// - If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.
-    /// - When not set, the default behavior is 'auto'.
-    ///
-    /// When this parameter is set, the response body will include the `service_tier` utilized.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub service_tier: Option<ServiceTier>,
-
-    /// Up to 32 sequences where the API will stop generating further tokens.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Stop>,
-
-    /// If set, partial message deltas will be sent, like in ChatGPT.
-    /// Tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    /// as they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-    #[serde(skip_serializing_if = "Option::is_none")]
+    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub stream: Option<bool>,
-
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream_options: Option<ChatCompletionStreamOptions>,
-
-    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,
-    /// while lower values like 0.2 will make it more focused and deterministic.
-    ///
-    /// We generally recommend altering this or `top_p` but not both.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>, // min: 0, max: 2, default: 1,
-
-    /// An alternative to sampling with temperature, called nucleus sampling,
-    /// where the model considers the results of the tokens with top_p probability mass.
-    /// So 0.1 means only the tokens comprising the top 10% probability mass are considered.
-    ///
-    ///  We generally recommend altering this or `temperature` but not both.
+    pub temperature: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>, // min: 0, max: 1, default: 1
-
-    /// A list of tools the model may call. Currently, only functions are supported as a tool.
-    /// Use this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported.
+    pub top_p: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tools: Option<Vec<ChatCompletionTool>>,
-
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_choice: Option<ChatCompletionToolChoiceOption>,
-
-    /// Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub parallel_tool_calls: Option<bool>,
-
-    /// A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub user: Option<String>,
-
-    /// This tool searches the web for relevant results to use in a response.
-    /// Learn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub web_search_options: Option<WebSearchOptions>,
-
-    /// Deprecated in favor of `tool_choice`.
-    ///
-    /// Controls which (if any) function is called by the model.
-    /// `none` means the model will not call a function and instead generates a message.
-    /// `auto` means the model can pick between generating a message or calling a function.
-    /// Specifying a particular function via `{"name": "my_function"}` forces the model to call that function.
-    ///
-    /// `none` is the default when no functions are present. `auto` is the default if functions are present.
    #[deprecated]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub function_call: Option<ChatCompletionFunctionCall>,
-
-    /// Deprecated in favor of `tools`.
-    ///
-    /// A list of functions the model may generate JSON inputs for.
    #[deprecated]
    #[serde(skip_serializing_if = "Option::is_none")]
    pub functions: Option<Vec<ChatCompletionFunctions>>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub web_search_options: Option<WebSearchOptions>,
 }

-/// Options for streaming response. Only set this when you set `stream: true`.
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-pub struct ChatCompletionStreamOptions {
-    /// If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
-    pub include_usage: bool,
-    /// NVIDIA-specific and industrial common extensions for per chunk usage reporting.
-    #[serde(default)]
-    pub continuous_usage_stats: bool,
-}
-
-#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum FinishReason {
-    Stop,
-    Length,
-    ToolCalls,
-    ContentFilter,
-    FunctionCall,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct TopLogprobs {
-    /// The token.
-    pub token: String,
-    /// The log probability of this token.
-    pub logprob: f32,
-    /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
-    pub bytes: Option<Vec<u8>>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ChatCompletionTokenLogprob {
-    /// The token.
-    pub token: String,
-    /// The log probability of this token, if it is within the top 20 most likely tokens. Otherwise, the value `-9999.0` is used to signify that the token is very unlikely.
-    pub logprob: f32,
-    /// A list of integers representing the UTF-8 bytes representation of the token. Useful in instances where characters are represented by multiple tokens and their byte representations must be combined to generate the correct text representation. Can be `null` if there is no bytes representation for the token.
-    pub bytes: Option<Vec<u8>>,
-    ///  List of the most likely tokens and their log probability, at this token position. In rare cases, there may be fewer than the number of requested `top_logprobs` returned.
-    pub top_logprobs: Vec<TopLogprobs>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ChatChoiceLogprobs {
-    /// A list of message content tokens with log probability information.
-    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
-    pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum StopReason {
-    String(String), // matched user-provided stop sequence
-    Int(i64),       // matched stop token id (requires stop_token_id support)
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
+/// Chat choice with extended response message.
+///
+/// Uses our `ChatCompletionResponseMessage` (multimodal content + reasoning).
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatChoice {
-    /// The index of the choice in the list of choices.
    pub index: u32,
    pub message: ChatCompletionResponseMessage,
-    /// The reason the model stopped generating tokens. This will be `stop` if the model hit a natural stop point or a provided stop sequence,
-    /// `length` if the maximum number of tokens specified in the request was reached,
-    /// `content_filter` if content was omitted due to a flag from our content filters,
-    /// `tool_calls` if the model called a tool, or `function_call` (deprecated) if the model called a function.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub finish_reason: Option<FinishReason>,
-    /// Which stop string matched (if any).
-    /// This is only set when `finish_reason` is `"stop"` because a user-provided stop sequence was hit.
+    /// Matched stop condition from the backend.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop_reason: Option<StopReason>,
-    /// Log probability information for the choice.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub logprobs: Option<ChatChoiceLogprobs>,
 }

-/// Represents a chat completion response returned by model, based on the provided input.
-#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
+/// Non-streaming chat completion response.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateChatCompletionResponse {
-    /// A unique identifier for the chat completion.
    pub id: String,
-    /// A list of chat completion choices. Can be more than one if `n` is greater than 1.
    pub choices: Vec<ChatChoice>,
-    /// The Unix timestamp (in seconds) of when the chat completion was created.
    pub created: u32,
-    /// The model used for the chat completion.
    pub model: String,
-    /// The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub service_tier: Option<ServiceTierResponse>,
-    /// This fingerprint represents the backend configuration that the model runs with.
-    ///
-    /// Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub system_fingerprint: Option<String>,
-
-    /// The object type, which is always `chat.completion`.
    pub object: String,
    pub usage: Option<CompletionUsage>,
 }

-/// Parsed server side events stream until an \[DONE\] is received from server.
 pub type ChatCompletionResponseStream =
    Pin<Box<dyn Stream<Item = Result<CreateChatCompletionStreamResponse, OpenAIError>> + Send>>;

-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FunctionCallStream {
-    /// The name of the function to call.
-    pub name: Option<String>,
-    /// The arguments to call the function with, as generated by the model in JSON format.
-    /// Note that the model does not always generate valid JSON, and may hallucinate
-    /// parameters not defined by your function schema. Validate the arguments in your
-    /// code before calling your function.
-    pub arguments: Option<String>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ChatCompletionMessageToolCallChunk {
-    pub index: u32,
-    /// The ID of the tool call.
-    pub id: Option<String>,
-    /// The type of the tool. Currently, only `function` is supported.
-    pub r#type: Option<ChatCompletionToolType>,
-    pub function: Option<FunctionCallStream>,
-}
-
-/// A chat completion delta generated by streamed model responses.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
+/// Streaming delta with reasoning content.
+///
+/// Extends upstream `ChatCompletionStreamResponseDelta` with:
+/// - `content`: `Option<ChatCompletionMessageContent>` (multimodal) instead of `Option<String>`
+/// - `reasoning_content`: streaming reasoning tokens (DeepSeek-R1, QwQ)
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionStreamResponseDelta {
-    /// The contents of the chunk message - can be a string or array of content parts
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub content: Option<ChatCompletionMessageContent>,
-    /// Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model.
-    #[deprecated]
-    pub function_call: Option<FunctionCallStream>,
-
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub function_call: Option<ChatCompletionStreamResponseDeltaFunctionCall>,
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_calls: Option<Vec<ChatCompletionMessageToolCallChunk>>,
-    /// The role of the author of this message.
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub role: Option<Role>,
-    /// The refusal message generated by the model.
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub refusal: Option<String>,
-
-    /// NVIDIA-specific extensions for the chat completion response.
+    /// Streaming reasoning content (DeepSeek-R1, QwQ models).
+    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_content: Option<String>,
 }

-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+pub struct ChatCompletionStreamResponseDeltaFunctionCall {
+    pub name: Option<String>,
+    pub arguments: Option<String>,
+}
+
+/// Streaming chat choice with stop reason support.
+///
+/// Extends upstream `ChatChoiceStream` with:
+/// - `stop_reason`: the matched stop sequence (string) or stop token ID (integer)
+///   reported by inference backends
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct ChatChoiceStream {
-    /// The index of the choice in the list of choices.
    pub index: u32,
    pub delta: ChatCompletionStreamResponseDelta,
-    /// The reason the model stopped generating tokens. This will be
-    /// `stop` if the model hit a natural stop point or a provided
-    /// stop sequence,
-    ///
-    /// `length` if the maximum number of tokens specified in the
-    /// request was reached,
-    /// `content_filter` if content was omitted due to a flag from our
-    /// content filters,
-    /// `tool_calls` if the model called a tool, or `function_call`
-    /// (deprecated) if the model called a function.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub finish_reason: Option<FinishReason>,
-    /// Which stop string matched (if any).
-    /// This is only set when `finish_reason` is `"stop"` because a user-provided stop sequence was hit.
+    /// Matched stop condition from the backend.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop_reason: Option<StopReason>,
-    /// Log probability information for the choice.
-    #[serde(skip_serializing_if = "Option::is_none")]
    pub logprobs: Option<ChatChoiceLogprobs>,
 }

-#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
-/// Represents a streamed chunk of a chat completion response returned by model, based on the provided input.
+/// Streaming chat completion response with extended choices.
+#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateChatCompletionStreamResponse {
-    /// A unique identifier for the chat completion. Each chunk has the same ID.
    pub id: String,
-    /// A list of chat completion choices. Can contain more than one elements if `n` is greater than 1. Can also be empty for the last chunk if you set `stream_options: {"include_usage": true}`.
    pub choices: Vec<ChatChoiceStream>,
-
-    /// The Unix timestamp (in seconds) of when the chat completion was created. Each chunk has the same timestamp.
    pub created: u32,
-    /// The model to generate the completion.
    pub model: String,
-    /// The service tier used for processing the request. This field is only included if the `service_tier` parameter is specified in the request.
    pub service_tier: Option<ServiceTierResponse>,
-    /// This fingerprint represents the backend configuration that the model runs with.
-    /// Can be used in conjunction with the `seed` request parameter to understand when backend changes have been made that might impact determinism.
    pub system_fingerprint: Option<String>,
-    /// The object type, which is always `chat.completion.chunk`.
    pub object: String,
-
-    /// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
-    /// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
    pub usage: Option<CompletionUsage>,
 }

 #[cfg(test)]
 mod tests {
    use super::*;
-    use serde_json;

    #[test]
-    fn test_audio_url_content_part_json() {
-        let json = r#"{"type": "audio_url", "audio_url": {"url": "https://example.com/audio.mp3", "uuid": "67e55044-10b1-426f-9247-bb680e5fe0c8"}}"#;
-        let content_part: ChatCompletionRequestUserMessageContentPart =
-            serde_json::from_str(json).unwrap();
-
-        match content_part {
-            ChatCompletionRequestUserMessageContentPart::AudioUrl(part) => {
-                assert_eq!(
-                    part.audio_url.url,
-                    "https://example.com/audio.mp3".parse().unwrap()
-                );
-                assert_eq!(
-                    part.audio_url.uuid,
-                    Some(uuid!("67e55044-10b1-426f-9247-bb680e5fe0c8"))
-                );
+    fn tool_call_defaults_type_on_deserialize() {
+        let tool_call: ChatCompletionMessageToolCall = serde_json::from_value(serde_json::json!({
+            "id": "call_123",
+            "function": {
+                "name": "get_weather",
+                "arguments": "{\"location\":\"SF\"}"
            }
-            _ => panic!("Expected AudioUrl variant"),
-        }
-    }
-
-    #[test]
-    fn test_mm_processor_kwargs() {
-        let request = CreateChatCompletionRequest {
-            messages: vec![],
-            model: "test-model".to_string(),
-            mm_processor_kwargs: Some(serde_json::json!({"max_pixels": 768})),
-            ..Default::default()
-        };
-
-        let json = serde_json::to_string(&request).unwrap();
-        assert!(json.contains("mm_processor_kwargs"));
-    }
-
-    #[test]
-    fn test_assistant_request_reasoning_content_text_roundtrip() {
-        let json = r#"{
-            "model": "deepseek-v3.2",
-            "messages": [
-                {"role": "user", "content": "test"},
-                {
-                    "role": "assistant",
-                    "reasoning_content": "thinking...",
-                    "tool_calls": [{
-                        "id": "call_1",
-                        "type": "function",
-                        "function": {
-                            "name": "f",
-                            "arguments": "{}"
-                        }
-                    }]
-                }
-            ]
-        }"#;
-
-        let request: CreateChatCompletionRequest = serde_json::from_str(json).unwrap();
-        let assistant = match &request.messages[1] {
-            ChatCompletionRequestMessage::Assistant(msg) => msg,
-            _ => panic!("expected assistant message"),
-        };
+        }))
+        .unwrap();

-        assert_eq!(
-            assistant.reasoning_content,
-            Some(ReasoningContent::Text("thinking...".into()))
-        );
-        assert_eq!(
-            assistant
-                .reasoning_content
-                .as_ref()
-                .unwrap()
-                .to_flat_string(),
-            "thinking..."
-        );
-        assert!(
-            assistant
-                .reasoning_content
-                .as_ref()
-                .unwrap()
-                .segments()
-                .is_none()
-        );
-
-        let serialized = serde_json::to_value(&request).unwrap();
-        assert_eq!(
-            serialized["messages"][1]["reasoning_content"],
-            serde_json::Value::String("thinking...".to_string())
-        );
+        assert_eq!(tool_call.r#type, FunctionType::Function);
    }

    #[test]
-    fn test_assistant_request_reasoning_content_segments_roundtrip() {
-        let json = r#"{
-            "model": "deepseek-v3.2",
-            "messages": [
-                {"role": "user", "content": "test"},
-                {
-                    "role": "assistant",
-                    "reasoning_content": ["seg1", "seg2", ""],
-                    "tool_calls": [{
-                        "id": "call_1",
-                        "type": "function",
-                        "function": {"name": "f1", "arguments": "{}"}
-                    }, {
-                        "id": "call_2",
-                        "type": "function",
-                        "function": {"name": "f2", "arguments": "{}"}
-                    }]
-                }
-            ]
-        }"#;
-
-        let request: CreateChatCompletionRequest = serde_json::from_str(json).unwrap();
-        let assistant = match &request.messages[1] {
-            ChatCompletionRequestMessage::Assistant(msg) => msg,
-            _ => panic!("expected assistant message"),
+    fn tool_call_serializes_type_for_wire_compat() {
+        let tool_call = ChatCompletionMessageToolCall {
+            id: "call_123".into(),
+            r#type: FunctionType::Function,
+            function: FunctionCall {
+                name: "get_weather".into(),
+                arguments: "{\"location\":\"SF\"}".into(),
+            },
        };

-        assert_eq!(
-            assistant.reasoning_content,
-            Some(ReasoningContent::Segments(vec![
-                "seg1".into(),
-                "seg2".into(),
-                "".into()
-            ]))
-        );
-        assert_eq!(
-            assistant
-                .reasoning_content
-                .as_ref()
-                .unwrap()
-                .to_flat_string(),
-            "seg1\nseg2"
-        );
-        let segs = assistant
-            .reasoning_content
-            .as_ref()
-            .unwrap()
-            .segments()
-            .expect("should be Segments");
-        assert_eq!(segs.len(), 3);
-
-        let serialized = serde_json::to_value(&request).unwrap();
-        assert_eq!(
-            serialized["messages"][1]["reasoning_content"],
-            serde_json::json!(["seg1", "seg2", ""])
-        );
+        let json = serde_json::to_value(tool_call).unwrap();
+        assert_eq!(json["type"], "function");
    }
 }
--- a/lib/protocols/src/types/common.rs
+++ b/lib/protocols/src/types/common.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use std::path::PathBuf;
-
-use bytes::Bytes;
-use serde::{Deserialize, Serialize};
-
-#[derive(Debug, Clone, PartialEq)]
-pub enum InputSource {
-    Path { path: PathBuf },
-    Bytes { filename: String, bytes: Bytes },
-    VecU8 { filename: String, vec: Vec<u8> },
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum OrganizationRole {
-    Owner,
-    Reader,
-}
--- a/lib/protocols/src/types/completion.rs
+++ b/lib/protocols/src/types/completion.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
+// Re-exports upstream async-openai completion types and defines
+// inference-serving extensions.

-use std::{collections::HashMap, pin::Pin};
+use std::collections::HashMap;
+use std::pin::Pin;

 use derive_builder::Builder;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;

 use crate::error::OpenAIError;

 use super::{ChatCompletionStreamOptions, Choice, CompletionUsage, Prompt, Stop};

+// Re-export response type from upstream (identical)
+pub use async_openai::types::completions::CreateCompletionResponse;
+
 /// Custom deserializer for the echo parameter that only accepts booleans.
 /// Rejects integers and strings with clear error messages.
 fn deserialize_echo_bool<'de, D>(deserializer: D) -> Result<Option<bool>, D::Error>
 where
    D: serde::Deserializer<'de>,
 {
-    // Outer visitor: handles Option semantics (Some/None/null)
    struct StrictBoolVisitor;

    impl<'de> serde::de::Visitor<'de> for StrictBoolVisitor {
        type Value = Option<bool>;

-        // Required by Visitor trait
        fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
            formatter.write_str("echo parameter to be a boolean (true or false) or null")
        }
@@ -58,7 +55,6 @@ where
        }
    }

-    // Inner visitor: validates type is boolean, rejects integers and strings
    struct BoolOnlyVisitor;

    impl<'de> serde::de::Visitor<'de> for BoolOnlyVisitor {
@@ -75,7 +71,6 @@ where
            Ok(Some(value))
        }

-        // Explicitly reject strings (including "null", "true", "false")
        fn visit_str<E>(self, value: &str) -> Result<Self::Value, E>
        where
            E: serde::de::Error,
@@ -90,142 +85,62 @@ where
    deserializer.deserialize_option(StrictBoolVisitor)
 }

-#[derive(ToSchema, Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)]
+/// Completion request with inference-serving extensions.
+///
+/// Extends upstream `CreateCompletionRequest` with:
+/// - `prompt_embeds`: base64-encoded PyTorch tensor for pre-computed embeddings
+/// - `echo`: strict bool validation (rejects integers/strings)
+/// - `stream_options`: uses our extended `ChatCompletionStreamOptions` (with `continuous_usage_stats`)
+#[derive(Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)]
 #[builder(name = "CreateCompletionRequestArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
 #[builder(derive(Debug))]
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct CreateCompletionRequest {
-    /// ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them.
    pub model: String,
-
-    /// The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.
-    ///
-    /// Note that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document.
    pub prompt: Prompt,
-
    /// Base64-encoded PyTorch tensor containing pre-computed embeddings.
    /// At least one of prompt or prompt_embeds is required.
-    /// If both are provided, prompt_embeds takes precedence.
-    /// Maximum size: 10MB decoded.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub prompt_embeds: Option<String>,
-
-    /// The suffix that comes after a completion of inserted text.
-    ///
-    /// This parameter is only supported for `gpt-3.5-turbo-instruct`.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub suffix: Option<String>, // default: null
-
-    /// The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the completion.
-    ///
-    /// The token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.
+    pub suffix: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub max_tokens: Option<u32>,
-
-    /// What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.
-    ///
-    /// We generally recommend altering this or `top_p` but not both.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub temperature: Option<f32>, // min: 0, max: 2, default: 1,
-
-    /// An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.
-    ///
-    ///  We generally recommend altering this or `temperature` but not both.
+    pub temperature: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub top_p: Option<f32>, // min: 0, max: 1, default: 1
-
-    /// How many completions to generate for each prompt.
-
-    /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
-    ///
+    pub top_p: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub n: Option<u8>, // min:1 max: 128, default: 1
-
-    /// Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    /// as they become available, with the stream terminated by a `data: [DONE]` message.
+    pub n: Option<u8>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub stream: Option<bool>, // nullable: true
-
+    pub stream: Option<bool>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stream_options: Option<ChatCompletionStreamOptions>,
-
-    /// Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.
-    ///
-    /// The maximum value for `logprobs` is 5.
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logprobs: Option<u8>, // min:0 , max: 5, default: null, nullable: true
-
-    /// Echo back the prompt in addition to the completion
+    pub logprobs: Option<u8>,
+    /// Echo back the prompt in addition to the completion.
+    /// Strict bool validation -- rejects integers and strings.
    #[serde(skip_serializing_if = "Option::is_none")]
    #[serde(default, deserialize_with = "deserialize_echo_bool")]
    pub echo: Option<bool>,
-
-    ///  Up to 32 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub stop: Option<Stop>,
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.
-    ///
-    /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub presence_penalty: Option<f32>, // min: -2.0, max: 2.0, default 0
-
-    /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.
-    ///
-    /// [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
+    pub presence_penalty: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub frequency_penalty: Option<f32>, // min: -2.0, max: 2.0, default: 0
-
-    /// Generates `best_of` completions server-side and returns the "best" (the one with the highest log probability per token). Results cannot be streamed.
-    ///
-    /// When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.
-    ///
-    /// **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
+    pub frequency_penalty: Option<f32>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub best_of: Option<u8>, //min: 0, max: 20, default: 1
-
-    /// Modify the likelihood of specified tokens appearing in the completion.
-    ///
-    /// Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.
-    ///
-    /// As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from being generated.
+    pub best_of: Option<u8>,
    #[serde(skip_serializing_if = "Option::is_none")]
-    pub logit_bias: Option<HashMap<String, serde_json::Value>>, // default: null
-
-    /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids).
+    pub logit_bias: Option<HashMap<String, serde_json::Value>>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub user: Option<String>,
-
-    /// If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.
-    ///
-    /// Determinism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub seed: Option<i64>,
 }

-#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct CreateCompletionResponse {
-    /// A unique identifier for the completion.
-    pub id: String,
-    pub choices: Vec<Choice>,
-    /// The Unix timestamp (in seconds) of when the completion was created.
-    pub created: u32,
-
-    /// The model used for completion.
-    pub model: String,
-    /// This fingerprint represents the backend configuration that the model runs with.
-    ///
-    /// Can be used in conjunction with the `seed` request parameter to understand when backend changes have been
-    /// made that might impact determinism.
-    pub system_fingerprint: Option<String>,
-
-    /// The object type, which is always "text_completion"
-    pub object: String,
-    pub usage: Option<CompletionUsage>,
-}
-
 /// Parsed server side events stream until an \[DONE\] is received from server.
 pub type CompletionResponseStream =
    Pin<Box<dyn Stream<Item = Result<CreateCompletionResponse, OpenAIError>> + Send>>;

--- a/lib/protocols/src/types/embedding.rs
+++ b/lib/protocols/src/types/embedding.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use base64::engine::{Engine, general_purpose};
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-
-use crate::error::OpenAIError;
-
-#[derive(ToSchema, Debug, Serialize, Clone, PartialEq, Deserialize)]
-#[serde(untagged)]
-pub enum EmbeddingInput {
-    String(String),
-    StringArray(Vec<String>),
-    // Minimum value is 0, maximum value is 100257 (inclusive).
-    IntegerArray(Vec<u32>),
-    ArrayOfIntegerArray(Vec<Vec<u32>>),
-}
-
-#[derive(ToSchema, Debug, Serialize, Default, Clone, PartialEq, Deserialize)]
-#[serde(rename_all = "lowercase")]
-pub enum EncodingFormat {
-    #[default]
-    Float,
-    Base64,
-}
-
-#[derive(ToSchema, Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
-#[builder(name = "CreateEmbeddingRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateEmbeddingRequest {
-    /// ID of the model to use. You can use the
-    /// [List models](https://platform.openai.com/docs/api-reference/models/list)
-    /// API to see all of your available models, or see our
-    /// [Model overview](https://platform.openai.com/docs/models/overview)
-    /// for descriptions of them.
-    pub model: String,
-
-    ///  Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.
-    pub input: EmbeddingInput,
-
-    /// The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). Defaults to float
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub encoding_format: Option<EncodingFormat>,
-
-    /// A unique identifier representing your end-user, which will help OpenAI
-    ///  to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-
-    /// The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub dimensions: Option<u32>,
-}
-
-/// Represents an embedding vector returned by embedding endpoint.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Embedding {
-    /// The index of the embedding in the list of embeddings.
-    pub index: u32,
-    /// The object type, which is always "embedding".
-    pub object: String,
-    /// The embedding vector, which is a list of floats. The length of vector
-    /// depends on the model as listed in the [embedding guide](https://platform.openai.com/docs/guides/embeddings).
-    pub embedding: Vec<f32>,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Base64EmbeddingVector(pub String);
-
-impl From<Base64EmbeddingVector> for Vec<f32> {
-    fn from(value: Base64EmbeddingVector) -> Self {
-        let bytes = general_purpose::STANDARD
-            .decode(value.0)
-            .expect("openai base64 encoding to be valid");
-        let chunks = bytes.chunks_exact(4);
-        chunks
-            .map(|chunk| f32::from_le_bytes([chunk[0], chunk[1], chunk[2], chunk[3]]))
-            .collect()
-    }
-}
-
-/// Represents an base64-encoded embedding vector returned by embedding endpoint.
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Base64Embedding {
-    /// The index of the embedding in the list of embeddings.
-    pub index: u32,
-    /// The object type, which is always "embedding".
-    pub object: String,
-    /// The embedding vector, encoded in base64.
-    pub embedding: Base64EmbeddingVector,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct EmbeddingUsage {
-    /// The number of tokens used by the prompt.
-    pub prompt_tokens: u32,
-    /// The total number of tokens used by the request.
-    pub total_tokens: u32,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct CreateEmbeddingResponse {
-    pub object: String,
-    /// The name of the model used to generate the embedding.
-    pub model: String,
-    /// The list of embeddings generated by the model.
-    pub data: Vec<Embedding>,
-    /// The usage information for the request.
-    pub usage: EmbeddingUsage,
-}
-
-#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct CreateBase64EmbeddingResponse {
-    pub object: String,
-    /// The name of the model used to generate the embedding.
-    pub model: String,
-    /// The list of embeddings generated by the model.
-    pub data: Vec<Base64Embedding>,
-    /// The usage information for the request.
-    pub usage: EmbeddingUsage,
-}
--- a/lib/protocols/src/types/file.rs
+++ b/lib/protocols/src/types/file.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-use super::InputSource;
-
-#[derive(Debug, Default, Clone, PartialEq)]
-pub struct FileInput {
-    pub source: InputSource,
-}
-
-#[derive(Debug, Default, Clone, PartialEq)]
-pub enum FilePurpose {
-    Assistants,
-    Batch,
-    #[default]
-    FineTune,
-    Vision,
-}
-
-#[derive(Debug, Default, Clone, Builder, PartialEq)]
-#[builder(name = "CreateFileRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateFileRequest {
-    /// The File object (not file name) to be uploaded.
-    pub file: FileInput,
-
-    /// The intended purpose of the uploaded file.
-    ///
-    /// Use "assistants" for [Assistants](https://platform.openai.com/docs/api-reference/assistants) and [Message](https://platform.openai.com/docs/api-reference/messages) files, "vision" for Assistants image file inputs, "batch" for [Batch API](https://platform.openai.com/docs/guides/batch), and "fine-tune" for [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning).
-    pub purpose: FilePurpose,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct ListFilesResponse {
-    pub object: String,
-    pub data: Vec<OpenAIFile>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct DeleteFileResponse {
-    pub id: String,
-    pub object: String,
-    pub deleted: bool,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub enum OpenAIFilePurpose {
-    #[serde(rename = "assistants")]
-    Assistants,
-    #[serde(rename = "assistants_output")]
-    AssistantsOutput,
-    #[serde(rename = "batch")]
-    Batch,
-    #[serde(rename = "batch_output")]
-    BatchOutput,
-    #[serde(rename = "fine-tune")]
-    FineTune,
-    #[serde(rename = "fine-tune-results")]
-    FineTuneResults,
-    #[serde(rename = "vision")]
-    Vision,
-}
-
-/// The `File` object represents a document that has been uploaded to OpenAI.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct OpenAIFile {
-    /// The file identifier, which can be referenced in the API endpoints.
-    pub id: String,
-    /// The object type, which is always "file".
-    pub object: String,
-    /// The size of the file in bytes.
-    pub bytes: u32,
-    /// The Unix timestamp (in seconds) for when the file was created.
-    pub created_at: u32,
-    /// The name of the file.
-    pub filename: String,
-    /// The intended purpose of the file. Supported values are `assistants`, `assistants_output`, `batch`, `batch_output`, `fine-tune`, `fine-tune-results` and `vision`.
-    pub purpose: OpenAIFilePurpose,
-    /// Deprecated. The current status of the file, which can be either `uploaded`, `processed`, or `error`.
-    #[deprecated]
-    pub status: Option<String>,
-    /// Deprecated. For details on why a fine-tuning training file failed validation, see the `error` field on `fine_tuning.job`.
-    #[deprecated]
-    pub status_details: Option<String>, // nullable: true
-}
--- a/lib/protocols/src/types/fine_tuning.rs
+++ b/lib/protocols/src/types/fine_tuning.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-#[serde(untagged)]
-pub enum NEpochs {
-    NEpochs(u8),
-    #[default]
-    #[serde(rename = "auto")]
-    Auto,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-#[serde(untagged)]
-pub enum BatchSize {
-    BatchSize(u16),
-    #[default]
-    #[serde(rename = "auto")]
-    Auto,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-#[serde(untagged)]
-pub enum LearningRateMultiplier {
-    LearningRateMultiplier(f32),
-    #[default]
-    #[serde(rename = "auto")]
-    Auto,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-pub struct Hyperparameters {
-    /// Number of examples in each batch. A larger batch size means that model parameters
-    /// are updated less frequently, but with lower variance.
-    pub batch_size: BatchSize,
-    /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid
-    /// overfitting.
-    pub learning_rate_multiplier: LearningRateMultiplier,
-    /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset.
-    pub n_epochs: NEpochs,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-#[serde(untagged)]
-pub enum Beta {
-    Beta(f32),
-    #[default]
-    #[serde(rename = "auto")]
-    Auto,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, PartialEq)]
-pub struct DPOHyperparameters {
-    /// The beta value for the DPO method. A higher beta value will increase the weight of the penalty between the policy and reference model.
-    pub beta: Beta,
-    /// Number of examples in each batch. A larger batch size means that model parameters
-    /// are updated less frequently, but with lower variance.
-    pub batch_size: BatchSize,
-    /// Scaling factor for the learning rate. A smaller learning rate may be useful to avoid
-    /// overfitting.
-    pub learning_rate_multiplier: LearningRateMultiplier,
-    /// The number of epochs to train the model for. An epoch refers to one full cycle through the training dataset.
-    pub n_epochs: NEpochs,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Default, Builder, PartialEq)]
-#[builder(name = "CreateFineTuningJobRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateFineTuningJobRequest {
-    /// The name of the model to fine-tune. You can select one of the
-    /// [supported models](https://platform.openai.com/docs/guides/fine-tuning#which-models-can-be-fine-tuned).
-    pub model: String,
-
-    /// The ID of an uploaded file that contains training data.
-    ///
-    /// See [upload file](https://platform.openai.com/docs/api-reference/files/create) for how to upload a file.
-    ///
-    /// Your dataset must be formatted as a JSONL file. Additionally, you must upload your file with the purpose `fine-tune`.
-    ///
-    /// The contents of the file should differ depending on if the model uses the [chat](https://platform.openai.com/docs/api-reference/fine-tuning/chat-input), [completions](https://platform.openai.com/docs/api-reference/fine-tuning/completions-input) format, or if the fine-tuning method uses the [preference](https://platform.openai.com/docs/api-reference/fine-tuning/preference-input) format.
-    ///
-    /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details.
-    pub training_file: String,
-
-    /// The hyperparameters used for the fine-tuning job.
-    /// This value is now deprecated in favor of `method`, and should be passed in under the `method` parameter.
-    #[deprecated]
-    pub hyperparameters: Option<Hyperparameters>,
-
-    /// A string of up to 64 characters that will be added to your fine-tuned model name.
-    ///
-    /// For example, a `suffix` of "custom-model-name" would produce a model name like `ft:gpt-4o-mini:openai:custom-model-name:7p4lURel`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub suffix: Option<String>, // default: null, minLength:1, maxLength:40
-
-    /// The ID of an uploaded file that contains validation data.
-    ///
-    /// If you provide this file, the data is used to generate validation
-    /// metrics periodically during fine-tuning. These metrics can be viewed in
-    /// the fine-tuning results file.
-    /// The same data should not be present in both train and validation files.
-    ///
-    /// Your dataset must be formatted as a JSONL file. You must upload your file with the purpose `fine-tune`.
-    ///
-    /// See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning) for more details.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub validation_file: Option<String>,
-
-    /// A list of integrations to enable for your fine-tuning job.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub integrations: Option<Vec<FineTuningIntegration>>,
-
-    /// The seed controls the reproducibility of the job. Passing in the same seed and job parameters should produce the same results, but may differ in rare cases.
-    /// If a seed is not specified, one will be generated for you.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub seed: Option<u32>, // min:0, max: 2147483647
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub method: Option<FineTuneMethod>,
-}
-
-/// The method used for fine-tuning.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(tag = "type", rename_all = "lowercase")]
-pub enum FineTuneMethod {
-    Supervised {
-        supervised: FineTuneSupervisedMethod,
-    },
-    DPO {
-        dpo: FineTuneDPOMethod,
-    },
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FineTuneSupervisedMethod {
-    pub hyperparameters: Hyperparameters,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FineTuneDPOMethod {
-    pub hyperparameters: DPOHyperparameters,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize, Default)]
-#[serde(rename_all = "lowercase")]
-pub enum FineTuningJobIntegrationType {
-    #[default]
-    Wandb,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FineTuningIntegration {
-    /// The type of integration to enable. Currently, only "wandb" (Weights and Biases) is supported.
-    pub r#type: FineTuningJobIntegrationType,
-
-    /// The settings for your integration with Weights and Biases. This payload specifies the project that
-    /// metrics will be sent to. Optionally, you can set an explicit display name for your run, add tags
-    /// to your run, and set a default entity (team, username, etc) to be associated with your run.
-    pub wandb: WandB,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct WandB {
-    /// The name of the project that the new run will be created under.
-    pub project: String,
-    /// A display name to set for the run. If not set, we will use the Job ID as the name.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub name: Option<String>,
-    /// The entity to use for the run. This allows you to set the team or username of the WandB user that you would
-    /// like associated with the run. If not set, the default entity for the registered WandB API key is used.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub entity: Option<String>,
-    /// A list of tags to be attached to the newly created run. These tags are passed through directly to WandB. Some
-    /// default tags are generated by OpenAI: "openai/finetune", "openai/{base-model}", "openai/{ftjob-abcdef}".
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tags: Option<Vec<String>>,
-}
-
-/// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FineTuneJobError {
-    ///  A machine-readable error code.
-    pub code: String,
-    ///  A human-readable error message.
-    pub message: String,
-    /// The parameter that was invalid, usually `training_file` or `validation_file`.
-    /// This field will be null if the failure was not parameter-specific.
-    pub param: Option<String>, // nullable true
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum FineTuningJobStatus {
-    ValidatingFiles,
-    Queued,
-    Running,
-    Succeeded,
-    Failed,
-    Cancelled,
-}
-
-/// The `fine_tuning.job` object represents a fine-tuning job that has been created through the API.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct FineTuningJob {
-    /// The object identifier, which can be referenced in the API endpoints.
-    pub id: String,
-    /// The Unix timestamp (in seconds) for when the fine-tuning job was created.
-    pub created_at: u32,
-    /// For fine-tuning jobs that have `failed`, this will contain more information on the cause of the failure.
-    pub error: Option<FineTuneJobError>,
-    /// The name of the fine-tuned model that is being created.
-    /// The value will be null if the fine-tuning job is still running.
-    pub fine_tuned_model: Option<String>, // nullable: true
-    /// The Unix timestamp (in seconds) for when the fine-tuning job was finished.
-    /// The value will be null if the fine-tuning job is still running.
-    pub finished_at: Option<u32>, // nullable true
-
-    /// The hyperparameters used for the fine-tuning job.
-    /// See the [fine-tuning guide](/docs/guides/fine-tuning) for more details.
-    pub hyperparameters: Hyperparameters,
-
-    ///  The base model that is being fine-tuned.
-    pub model: String,
-
-    /// The object type, which is always "fine_tuning.job".
-    pub object: String,
-    /// The organization that owns the fine-tuning job.
-    pub organization_id: String,
-
-    /// The compiled results file ID(s) for the fine-tuning job.
-    /// You can retrieve the results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    pub result_files: Vec<String>,
-
-    /// The current status of the fine-tuning job, which can be either
-    /// `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
-    pub status: FineTuningJobStatus,
-
-    /// The total number of billable tokens processed by this fine-tuning job. The value will be null if the fine-tuning job is still running.
-    pub trained_tokens: Option<u32>,
-
-    /// The file ID used for training. You can retrieve the training data with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    pub training_file: String,
-
-    ///  The file ID used for validation. You can retrieve the validation results with the [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    pub validation_file: Option<String>,
-
-    /// A list of integrations to enable for this fine-tuning job.
-    pub integrations: Option<Vec<FineTuningIntegration>>, // maxItems: 5
-
-    /// The seed used for the fine-tuning job.
-    pub seed: u32,
-
-    /// The Unix timestamp (in seconds) for when the fine-tuning job is estimated to finish. The value will be null if the fine-tuning job is not running.
-    pub estimated_finish: Option<u32>,
-
-    pub method: Option<FineTuneMethod>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ListPaginatedFineTuningJobsResponse {
-    pub data: Vec<FineTuningJob>,
-    pub has_more: bool,
-    pub object: String,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ListFineTuningJobEventsResponse {
-    pub data: Vec<FineTuningJobEvent>,
-    pub object: String,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ListFineTuningJobCheckpointsResponse {
-    pub data: Vec<FineTuningJobCheckpoint>,
-    pub object: String,
-    pub first_id: Option<String>,
-    pub last_id: Option<String>,
-    pub has_more: bool,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum Level {
-    Info,
-    Warn,
-    Error,
-}
-
-///Fine-tuning job event object
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FineTuningJobEvent {
-    /// The object identifier.
-    pub id: String,
-    /// The Unix timestamp (in seconds) for when the fine-tuning job event was created.
-    pub created_at: u32,
-    /// The log level of the event.
-    pub level: Level,
-    /// The message of the event.
-    pub message: String,
-    /// The object type, which is always "fine_tuning.job.event".
-    pub object: String,
-    /// The type of event.
-    pub r#type: Option<FineTuningJobEventType>,
-    /// The data associated with the event.
-    pub data: Option<serde_json::Value>,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum FineTuningJobEventType {
-    Message,
-    Metrics,
-}
-
-/// The `fine_tuning.job.checkpoint` object represents a model checkpoint for a fine-tuning job that is ready to use.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FineTuningJobCheckpoint {
-    /// The checkpoint identifier, which can be referenced in the API endpoints.
-    pub id: String,
-    /// The Unix timestamp (in seconds) for when the checkpoint was created.
-    pub created_at: u32,
-    /// The name of the fine-tuned checkpoint model that is created.
-    pub fine_tuned_model_checkpoint: String,
-    /// The step number that the checkpoint was created at.
-    pub step_number: u32,
-    /// Metrics at the step number during the fine-tuning job.
-    pub metrics: FineTuningJobCheckpointMetrics,
-    /// The name of the fine-tuning job that this checkpoint was created from.
-    pub fine_tuning_job_id: String,
-    /// The object type, which is always "fine_tuning.job.checkpoint".
-    pub object: String,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct FineTuningJobCheckpointMetrics {
-    pub step: u32,
-    pub train_loss: f32,
-    pub train_mean_token_accuracy: f32,
-    pub valid_loss: f32,
-    pub valid_mean_token_accuracy: f32,
-    pub full_valid_loss: f32,
-    pub full_valid_mean_token_accuracy: f32,
-}
--- a/lib/protocols/src/types/image.rs
+++ b/lib/protocols/src/types/image.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-use super::InputSource;
-
-#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-pub enum ImageSize {
-    #[serde(rename = "256x256")]
-    S256x256,
-    #[serde(rename = "512x512")]
-    S512x512,
-    #[default]
-    #[serde(rename = "1024x1024")]
-    S1024x1024,
-    #[serde(rename = "1792x1024")]
-    S1792x1024,
-    #[serde(rename = "1024x1792")]
-    S1024x1792,
-}
-
-#[derive(Default, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-pub enum DallE2ImageSize {
-    #[serde(rename = "256x256")]
-    S256x256,
-    #[serde(rename = "512x512")]
-    S512x512,
-    #[default]
-    #[serde(rename = "1024x1024")]
-    S1024x1024,
-}
-
-#[derive(Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageResponseFormat {
-    #[default]
-    Url,
-    #[serde(rename = "b64_json")]
-    B64Json,
-}
-
-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-pub enum ImageModel {
-    #[default]
-    #[serde(rename = "dall-e-2")]
-    DallE2,
-    #[serde(rename = "dall-e-3")]
-    DallE3,
-    #[serde(untagged)]
-    Other(String),
-}
-
-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageQuality {
-    #[default]
-    Standard,
-    HD,
-    High,
-    Medium,
-    Low,
-    Auto,
-}
-
-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageStyle {
-    #[default]
-    Vivid,
-    Natural,
-}
-
-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ImageModeration {
-    #[default]
-    Auto,
-    Low,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, Default, Builder, PartialEq)]
-#[builder(name = "CreateImageRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateImageRequest {
-    /// A text description of the desired image(s).
-    pub prompt: String,
-
-    /// The model to use for image generation.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<ImageModel>,
-
-    /// The number of images to generate. Must be between 1 and 10.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub n: Option<u8>, // min:1 max:10 default:1
-
-    /// The quality of the image that will be generated. `hd` creates images with finer details and greater
-    /// consistency across the image. This param is only supported for `dall-e-3`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub quality: Option<ImageQuality>,
-
-    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the image has been generated.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub response_format: Option<ImageResponseFormat>,
-
-    /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`.
-    /// Must be one of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub size: Option<ImageSize>,
-
-    /// The style of the generated images. Must be one of `vivid` or `natural`.
-    /// Vivid causes the model to lean towards generating hyper-real and dramatic images.
-    /// Natural causes the model to produce more natural, less hyper-real looking images.
-    /// This param is only supported for `dall-e-3`.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub style: Option<ImageStyle>,
-
-    /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub user: Option<String>,
-
-    /// Control the content-moderation level for images generated by gpt-image-1.
-    /// Must be either `low` for less restrictive filtering or `auto` (default value).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub moderation: Option<ImageModeration>,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-#[serde(untagged)]
-pub enum Image {
-    /// The URL of the generated image, if `response_format` is `url` (default).
-    Url {
-        url: String,
-        revised_prompt: Option<String>,
-    },
-    /// The base64-encoded JSON of the generated image, if `response_format` is `b64_json`.
-    B64Json {
-        b64_json: std::sync::Arc<String>,
-        revised_prompt: Option<String>,
-    },
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ImagesResponse {
-    pub created: u32,
-    pub data: Vec<std::sync::Arc<Image>>,
-}
-
-#[derive(Debug, Default, Clone, PartialEq)]
-pub struct ImageInput {
-    pub source: InputSource,
-}
-
-#[derive(Debug, Clone, Default, Builder, PartialEq)]
-#[builder(name = "CreateImageEditRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateImageEditRequest {
-    /// The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask is not provided, image must have transparency, which will be used as the mask.
-    pub image: ImageInput,
-
-    /// A text description of the desired image(s). The maximum length is 1000 characters.
-    pub prompt: String,
-
-    /// An additional image whose fully transparent areas (e.g. where alpha is zero) indicate where `image` should be edited. Must be a valid PNG file, less than 4MB, and have the same dimensions as `image`.
-    pub mask: Option<ImageInput>,
-
-    /// The model to use for image generation. Only `dall-e-2` is supported at this time.
-    pub model: Option<ImageModel>,
-
-    /// The number of images to generate. Must be between 1 and 10.
-    pub n: Option<u8>, // min:1 max:10 default:1
-
-    /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
-    pub size: Option<DallE2ImageSize>,
-
-    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`.
-    pub response_format: Option<ImageResponseFormat>,
-
-    /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids).
-    pub user: Option<String>,
-}
-
-#[derive(Debug, Default, Clone, Builder, PartialEq)]
-#[builder(name = "CreateImageVariationRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateImageVariationRequest {
-    /// The image to use as the basis for the variation(s). Must be a valid PNG file, less than 4MB, and square.
-    pub image: ImageInput,
-
-    /// The model to use for image generation. Only `dall-e-2` is supported at this time.
-    pub model: Option<ImageModel>,
-
-    /// The number of images to generate. Must be between 1 and 10.
-    pub n: Option<u8>, // min:1 max:10 default:1
-
-    /// The size of the generated images. Must be one of `256x256`, `512x512`, or `1024x1024`.
-    pub size: Option<DallE2ImageSize>,
-
-    /// The format in which the generated images are returned. Must be one of `url` or `b64_json`.
-    pub response_format: Option<ImageResponseFormat>,
-
-    /// A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids).
-    pub user: Option<String>,
-}
--- a/lib/protocols/src/types/impls.rs
+++ b/lib/protocols/src/types/impls.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use std::{
-    fmt::Display,
-    path::{Path, PathBuf},
-};
+// Convenience trait implementations for locally-defined types.
+// Types re-exported from upstream async-openai already have their own impls.

-use crate::{
-    download::{download_url, save_b64},
-    error::OpenAIError,
-    traits::AsyncTryFrom,
-    types::InputSource,
-    util::{create_all_dir, create_file_part},
-};
-
-use bytes::Bytes;
+use std::fmt::Display;

 use super::{
-    AddUploadPartRequest,
-    AudioInput,
-    AudioResponseFormat,
-    AudioUrl,
-    ChatCompletionFunctionCall,
-    ChatCompletionFunctions,
-    ChatCompletionNamedToolChoice,
-    ChatCompletionRequestAssistantMessage,
-    ChatCompletionRequestAssistantMessageContent,
-    ChatCompletionRequestDeveloperMessage,
-    ChatCompletionRequestDeveloperMessageContent,
-    ChatCompletionRequestFunctionMessage,
-    ChatCompletionRequestMessage,
-    ChatCompletionRequestMessageContentPartAudio,
-    ChatCompletionRequestMessageContentPartAudioUrl,
-    ChatCompletionRequestMessageContentPartImage,
-    ChatCompletionRequestMessageContentPartText,
-    ChatCompletionRequestMessageContentPartVideo,
-    ChatCompletionRequestSystemMessage,
-    ChatCompletionRequestSystemMessageContent,
-    ChatCompletionRequestToolMessage,
-    ChatCompletionRequestToolMessageContent,
-    ChatCompletionRequestUserMessage,
-    ChatCompletionRequestUserMessageContent,
-    ChatCompletionRequestUserMessageContentPart,
-    ChatCompletionToolChoiceOption,
-    CreateFileRequest,
-    CreateImageEditRequest,
-    CreateImageVariationRequest,
-    CreateMessageRequestContent,
-    CreateSpeechResponse,
-    CreateTranscriptionRequest,
-    CreateTranslationRequest,
-    DallE2ImageSize,
-    EmbeddingInput,
-    FileInput,
-    FilePurpose,
-    FunctionName,
-    Image,
-    ImageInput,
-    ImageModel,
-    ImageResponseFormat,
-    ImageSize,
-    ImageUrl,
-    ImagesResponse,
-    ModerationInput,
-    Prompt,
-    Role,
-    Stop,
-    TimestampGranularity,
-    VideoUrl,
-    // responses types now have their own impls in responses/impls.rs
+    AudioUrl, ChatCompletionNamedToolChoice, ChatCompletionRequestAssistantMessage,
+    ChatCompletionRequestAssistantMessageContent, ChatCompletionRequestMessage,
+    ChatCompletionRequestMessageContentPartAudio, ChatCompletionRequestMessageContentPartAudioUrl,
+    ChatCompletionRequestMessageContentPartImage, ChatCompletionRequestMessageContentPartText,
+    ChatCompletionRequestMessageContentPartVideo, ChatCompletionRequestUserMessageContentPart,
+    ChatCompletionToolChoiceOption, ChatCompletionToolType, FunctionName, ImageUrl, VideoUrl,
 };

-/// for `impl_from!(T, Enum)`, implements
-/// - `From<T>`
-/// - `From<Vec<T>>`
-/// - `From<&Vec<T>>`
-/// - `From<[T; N]>`
-/// - `From<&[T; N]>`
-///
-/// for `T: Into<String>` and `Enum` having variants `String(String)` and `StringArray(Vec<String>)`
-macro_rules! impl_from {
-    ($from_typ:ty, $to_typ:ty) => {
-        // From<T> -> String variant
-        impl From<$from_typ> for $to_typ {
-            fn from(value: $from_typ) -> Self {
-                <$to_typ>::String(value.into())
-            }
-        }
-
-        // From<Vec<T>> -> StringArray variant
-        impl From<Vec<$from_typ>> for $to_typ {
-            fn from(value: Vec<$from_typ>) -> Self {
-                <$to_typ>::StringArray(value.iter().map(|v| v.to_string()).collect())
-            }
-        }
-
-        // From<&Vec<T>> -> StringArray variant
-        impl From<&Vec<$from_typ>> for $to_typ {
-            fn from(value: &Vec<$from_typ>) -> Self {
-                <$to_typ>::StringArray(value.iter().map(|v| v.to_string()).collect())
-            }
-        }
-
-        // From<[T; N]> -> StringArray variant
-        impl<const N: usize> From<[$from_typ; N]> for $to_typ {
-            fn from(value: [$from_typ; N]) -> Self {
-                <$to_typ>::StringArray(value.into_iter().map(|v| v.to_string()).collect())
-            }
-        }
-
-        // From<&[T; N]> -> StringArray variatn
-        impl<const N: usize> From<&[$from_typ; N]> for $to_typ {
-            fn from(value: &[$from_typ; N]) -> Self {
-                <$to_typ>::StringArray(value.into_iter().map(|v| v.to_string()).collect())
-            }
-        }
-    };
-}
-
-// From String "family" to Prompt
-impl_from!(&str, Prompt);
-impl_from!(String, Prompt);
-impl_from!(&String, Prompt);
-
-// From String "family" to Stop
-impl_from!(&str, Stop);
-impl_from!(String, Stop);
-impl_from!(&String, Stop);
-
-// From String "family" to ModerationInput
-impl_from!(&str, ModerationInput);
-impl_from!(String, ModerationInput);
-impl_from!(&String, ModerationInput);
-
-// From String "family" to EmbeddingInput
-impl_from!(&str, EmbeddingInput);
-impl_from!(String, EmbeddingInput);
-impl_from!(&String, EmbeddingInput);
-
-/// for `impl_default!(Enum)`, implements `Default` for `Enum` as `Enum::String("")` where `Enum` has `String` variant
-macro_rules! impl_default {
-    ($for_typ:ty) => {
-        impl Default for $for_typ {
-            fn default() -> Self {
-                Self::String("".into())
-            }
-        }
-    };
-}
-
-impl_default!(Prompt);
-impl_default!(ModerationInput);
-impl_default!(EmbeddingInput);
-
-impl Default for InputSource {
-    fn default() -> Self {
-        InputSource::Path {
-            path: PathBuf::new(),
-        }
-    }
-}
-
-/// for `impl_input!(Struct)` where
-/// ```text
-/// Struct {
-///     source: InputSource
-/// }
-/// ```
-/// implements methods `from_bytes` and `from_vec_u8`,
-/// and `From<P>` for `P: AsRef<Path>`
-macro_rules! impl_input {
-    ($for_typ:ty) => {
-        impl $for_typ {
-            pub fn from_bytes(filename: String, bytes: Bytes) -> Self {
-                Self {
-                    source: InputSource::Bytes { filename, bytes },
-                }
-            }
-
-            pub fn from_vec_u8(filename: String, vec: Vec<u8>) -> Self {
-                Self {
-                    source: InputSource::VecU8 { filename, vec },
-                }
-            }
-        }
-
-        impl<P: AsRef<Path>> From<P> for $for_typ {
-            fn from(path: P) -> Self {
-                let path_buf = path.as_ref().to_path_buf();
-                Self {
-                    source: InputSource::Path { path: path_buf },
-                }
-            }
-        }
-    };
-}
-
-impl_input!(AudioInput);
-impl_input!(FileInput);
-impl_input!(ImageInput);
-
-impl Display for ImageSize {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Self::S256x256 => "256x256",
-                Self::S512x512 => "512x512",
-                Self::S1024x1024 => "1024x1024",
-                Self::S1792x1024 => "1792x1024",
-                Self::S1024x1792 => "1024x1792",
-            }
-        )
-    }
-}
-
-impl Display for DallE2ImageSize {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Self::S256x256 => "256x256",
-                Self::S512x512 => "512x512",
-                Self::S1024x1024 => "1024x1024",
-            }
-        )
-    }
-}
-
-impl Display for ImageModel {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Self::DallE2 => "dall-e-2",
-                Self::DallE3 => "dall-e-3",
-                Self::Other(other) => other,
-            }
-        )
-    }
-}
-
-impl Display for ImageResponseFormat {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Self::Url => "url",
-                Self::B64Json => "b64_json",
-            }
-        )
-    }
-}
-
-impl Display for AudioResponseFormat {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                AudioResponseFormat::Json => "json",
-                AudioResponseFormat::Srt => "srt",
-                AudioResponseFormat::Text => "text",
-                AudioResponseFormat::VerboseJson => "verbose_json",
-                AudioResponseFormat::Vtt => "vtt",
-            }
-        )
-    }
-}
-
-impl Display for TimestampGranularity {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                TimestampGranularity::Word => "word",
-                TimestampGranularity::Segment => "segment",
-            }
-        )
-    }
-}
-
-impl Display for Role {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Role::User => "user",
-                Role::System => "system",
-                Role::Assistant => "assistant",
-                Role::Function => "function",
-                Role::Tool => "tool",
-            }
-        )
-    }
-}
-
-impl Display for FilePurpose {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        write!(
-            f,
-            "{}",
-            match self {
-                Self::Assistants => "assistants",
-                Self::Batch => "batch",
-                Self::FineTune => "fine-tune",
-                Self::Vision => "vision",
-            }
-        )
-    }
-}
-
-impl ImagesResponse {
-    /// Save each image in a dedicated Tokio task and return paths to saved files.
-    /// For [ResponseFormat::Url] each file is downloaded in dedicated Tokio task.
-    pub async fn save<P: AsRef<Path>>(&self, dir: P) -> Result<Vec<PathBuf>, OpenAIError> {
-        create_all_dir(dir.as_ref())?;
-
-        let mut handles = vec![];
-        for id in self.data.clone() {
-            let dir_buf = PathBuf::from(dir.as_ref());
-            handles.push(tokio::spawn(async move { id.save(dir_buf).await }));
-        }
-
-        let results = futures::future::join_all(handles).await;
-        let mut errors = vec![];
-        let mut paths = vec![];
-
-        for result in results {
-            match result {
-                Ok(inner) => match inner {
-                    Ok(path) => paths.push(path),
-                    Err(e) => errors.push(e),
-                },
-                Err(e) => errors.push(OpenAIError::FileSaveError(e.to_string())),
-            }
-        }
-
-        if errors.is_empty() {
-            Ok(paths)
-        } else {
-            Err(OpenAIError::FileSaveError(
-                errors
-                    .into_iter()
-                    .map(|e| e.to_string())
-                    .collect::<Vec<String>>()
-                    .join("; "),
-            ))
-        }
-    }
-}
-
-impl CreateSpeechResponse {
-    pub async fn save<P: AsRef<Path>>(&self, file_path: P) -> Result<(), OpenAIError> {
-        let dir = file_path.as_ref().parent();
-
-        if let Some(dir) = dir {
-            create_all_dir(dir)?;
-        }
-
-        tokio::fs::write(file_path, &self.bytes)
-            .await
-            .map_err(|e| OpenAIError::FileSaveError(e.to_string()))?;
-
-        Ok(())
-    }
-}
-
-impl Image {
-    async fn save<P: AsRef<Path>>(&self, dir: P) -> Result<PathBuf, OpenAIError> {
-        match self {
-            Image::Url { url, .. } => download_url(url, dir).await,
-            Image::B64Json { b64_json, .. } => save_b64(b64_json, dir).await,
-        }
-    }
-}
-
-macro_rules! impl_from_for_integer_array {
-    ($from_typ:ty, $to_typ:ty) => {
-        impl<const N: usize> From<[$from_typ; N]> for $to_typ {
-            fn from(value: [$from_typ; N]) -> Self {
-                Self::IntegerArray(value.to_vec())
-            }
-        }
-
-        impl<const N: usize> From<&[$from_typ; N]> for $to_typ {
-            fn from(value: &[$from_typ; N]) -> Self {
-                Self::IntegerArray(value.to_vec())
-            }
-        }
-
-        impl From<Vec<$from_typ>> for $to_typ {
-            fn from(value: Vec<$from_typ>) -> Self {
-                Self::IntegerArray(value)
-            }
-        }
-
-        impl From<&Vec<$from_typ>> for $to_typ {
-            fn from(value: &Vec<$from_typ>) -> Self {
-                Self::IntegerArray(value.clone())
-            }
-        }
-    };
-}
-
-impl_from_for_integer_array!(u32, EmbeddingInput);
-impl_from_for_integer_array!(u32, Prompt);
-
-macro_rules! impl_from_for_array_of_integer_array {
-    ($from_typ:ty, $to_typ:ty) => {
-        impl From<Vec<Vec<$from_typ>>> for $to_typ {
-            fn from(value: Vec<Vec<$from_typ>>) -> Self {
-                Self::ArrayOfIntegerArray(value)
-            }
-        }
-
-        impl From<&Vec<Vec<$from_typ>>> for $to_typ {
-            fn from(value: &Vec<Vec<$from_typ>>) -> Self {
-                Self::ArrayOfIntegerArray(value.clone())
-            }
-        }
-
-        impl<const M: usize, const N: usize> From<[[$from_typ; N]; M]> for $to_typ {
-            fn from(value: [[$from_typ; N]; M]) -> Self {
-                Self::ArrayOfIntegerArray(value.iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const M: usize, const N: usize> From<[&[$from_typ; N]; M]> for $to_typ {
-            fn from(value: [&[$from_typ; N]; M]) -> Self {
-                Self::ArrayOfIntegerArray(value.iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const M: usize, const N: usize> From<&[[$from_typ; N]; M]> for $to_typ {
-            fn from(value: &[[$from_typ; N]; M]) -> Self {
-                Self::ArrayOfIntegerArray(value.iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const M: usize, const N: usize> From<&[&[$from_typ; N]; M]> for $to_typ {
-            fn from(value: &[&[$from_typ; N]; M]) -> Self {
-                Self::ArrayOfIntegerArray(value.iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const N: usize> From<[Vec<$from_typ>; N]> for $to_typ {
-            fn from(value: [Vec<$from_typ>; N]) -> Self {
-                Self::ArrayOfIntegerArray(value.to_vec())
-            }
-        }
+use crate::error::OpenAIError;

-        impl<const N: usize> From<&[Vec<$from_typ>; N]> for $to_typ {
-            fn from(value: &[Vec<$from_typ>; N]) -> Self {
-                Self::ArrayOfIntegerArray(value.to_vec())
-            }
-        }
-
-        impl<const N: usize> From<[&Vec<$from_typ>; N]> for $to_typ {
-            fn from(value: [&Vec<$from_typ>; N]) -> Self {
-                Self::ArrayOfIntegerArray(value.into_iter().map(|inner| inner.clone()).collect())
-            }
-        }
-
-        impl<const N: usize> From<&[&Vec<$from_typ>; N]> for $to_typ {
-            fn from(value: &[&Vec<$from_typ>; N]) -> Self {
-                Self::ArrayOfIntegerArray(
-                    value
-                        .to_vec()
-                        .into_iter()
-                        .map(|inner| inner.clone())
-                        .collect(),
-                )
-            }
-        }
-
-        impl<const N: usize> From<Vec<[$from_typ; N]>> for $to_typ {
-            fn from(value: Vec<[$from_typ; N]>) -> Self {
-                Self::ArrayOfIntegerArray(value.into_iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const N: usize> From<&Vec<[$from_typ; N]>> for $to_typ {
-            fn from(value: &Vec<[$from_typ; N]>) -> Self {
-                Self::ArrayOfIntegerArray(value.into_iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const N: usize> From<Vec<&[$from_typ; N]>> for $to_typ {
-            fn from(value: Vec<&[$from_typ; N]>) -> Self {
-                Self::ArrayOfIntegerArray(value.into_iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-
-        impl<const N: usize> From<&Vec<&[$from_typ; N]>> for $to_typ {
-            fn from(value: &Vec<&[$from_typ; N]>) -> Self {
-                Self::ArrayOfIntegerArray(value.into_iter().map(|inner| inner.to_vec()).collect())
-            }
-        }
-    };
-}
-
-impl_from_for_array_of_integer_array!(u32, EmbeddingInput);
-impl_from_for_array_of_integer_array!(u32, Prompt);
-
-impl From<&str> for ChatCompletionFunctionCall {
-    fn from(value: &str) -> Self {
-        match value {
-            "auto" => Self::Auto,
-            "none" => Self::None,
-            _ => Self::Function { name: value.into() },
-        }
-    }
-}
+// --- From impls for locally-defined types ---

 impl From<&str> for FunctionName {
    fn from(value: &str) -> Self {
@@ -542,7 +34,7 @@ impl From<String> for FunctionName {
 impl From<&str> for ChatCompletionNamedToolChoice {
    fn from(value: &str) -> Self {
        Self {
-            r#type: super::ChatCompletionToolType::Function,
+            r#type: ChatCompletionToolType::Function,
            function: value.into(),
        }
    }
@@ -551,7 +43,7 @@ impl From<&str> for ChatCompletionNamedToolChoice {
 impl From<String> for ChatCompletionNamedToolChoice {
    fn from(value: String) -> Self {
        Self {
-            r#type: super::ChatCompletionToolType::Function,
+            r#type: ChatCompletionToolType::Function,
            function: value.into(),
        }
    }
@@ -577,78 +69,51 @@ impl From<String> for ChatCompletionToolChoiceOption {
    }
 }

-impl From<(String, serde_json::Value)> for ChatCompletionFunctions {
-    fn from(value: (String, serde_json::Value)) -> Self {
-        Self {
-            name: value.0,
-            description: None,
-            parameters: value.1,
-        }
-    }
-}
-
-// todo: write macro for bunch of same looking From trait implementations below
+// From message types into ChatCompletionRequestMessage enum
+// Note: types from upstream (SystemMessage, ToolMessage, etc.) need From impls
+// on our local ChatCompletionRequestMessage enum.

-impl From<ChatCompletionRequestUserMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestUserMessage) -> Self {
+impl From<super::ChatCompletionRequestUserMessage> for ChatCompletionRequestMessage {
+    fn from(value: super::ChatCompletionRequestUserMessage) -> Self {
        Self::User(value)
    }
 }

-impl From<ChatCompletionRequestSystemMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestSystemMessage) -> Self {
+impl From<async_openai::types::chat::ChatCompletionRequestSystemMessage>
+    for ChatCompletionRequestMessage
+{
+    fn from(value: async_openai::types::chat::ChatCompletionRequestSystemMessage) -> Self {
        Self::System(value)
    }
 }

-impl From<ChatCompletionRequestDeveloperMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestDeveloperMessage) -> Self {
+impl From<async_openai::types::chat::ChatCompletionRequestDeveloperMessage>
+    for ChatCompletionRequestMessage
+{
+    fn from(value: async_openai::types::chat::ChatCompletionRequestDeveloperMessage) -> Self {
        Self::Developer(value)
    }
 }

-impl From<ChatCompletionRequestAssistantMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestAssistantMessage) -> Self {
-        Self::Assistant(value)
-    }
-}
-
-impl From<ChatCompletionRequestFunctionMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestFunctionMessage) -> Self {
-        Self::Function(value)
-    }
-}
-
-impl From<ChatCompletionRequestToolMessage> for ChatCompletionRequestMessage {
-    fn from(value: ChatCompletionRequestToolMessage) -> Self {
+impl From<async_openai::types::chat::ChatCompletionRequestToolMessage>
+    for ChatCompletionRequestMessage
+{
+    fn from(value: async_openai::types::chat::ChatCompletionRequestToolMessage) -> Self {
        Self::Tool(value)
    }
 }

-impl From<ChatCompletionRequestUserMessageContent> for ChatCompletionRequestUserMessage {
-    fn from(value: ChatCompletionRequestUserMessageContent) -> Self {
-        Self {
-            content: value,
-            name: None,
-        }
-    }
-}
-
-impl From<ChatCompletionRequestSystemMessageContent> for ChatCompletionRequestSystemMessage {
-    fn from(value: ChatCompletionRequestSystemMessageContent) -> Self {
-        Self {
-            content: value,
-            name: None,
-        }
+impl From<async_openai::types::chat::ChatCompletionRequestFunctionMessage>
+    for ChatCompletionRequestMessage
+{
+    fn from(value: async_openai::types::chat::ChatCompletionRequestFunctionMessage) -> Self {
+        Self::Function(value)
    }
 }

-impl From<ChatCompletionRequestDeveloperMessageContent> for ChatCompletionRequestDeveloperMessage {
-    fn from(value: ChatCompletionRequestDeveloperMessageContent) -> Self {
-        Self {
-            content: value,
-            name: None,
-        }
+impl From<ChatCompletionRequestAssistantMessage> for ChatCompletionRequestMessage {
+    fn from(value: ChatCompletionRequestAssistantMessage) -> Self {
+        Self::Assistant(value)
    }
 }

@@ -661,102 +126,6 @@ impl From<ChatCompletionRequestAssistantMessageContent> for ChatCompletionReques
    }
 }

-impl From<&str> for ChatCompletionRequestUserMessageContent {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestUserMessageContent::Text(value.into())
-    }
-}
-
-impl From<String> for ChatCompletionRequestUserMessageContent {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestUserMessageContent::Text(value)
-    }
-}
-
-impl From<&str> for ChatCompletionRequestSystemMessageContent {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestSystemMessageContent::Text(value.into())
-    }
-}
-
-impl From<String> for ChatCompletionRequestSystemMessageContent {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestSystemMessageContent::Text(value)
-    }
-}
-
-impl From<&str> for ChatCompletionRequestDeveloperMessageContent {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestDeveloperMessageContent::Text(value.into())
-    }
-}
-
-impl From<String> for ChatCompletionRequestDeveloperMessageContent {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestDeveloperMessageContent::Text(value)
-    }
-}
-
-impl From<&str> for ChatCompletionRequestAssistantMessageContent {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestAssistantMessageContent::Text(value.into())
-    }
-}
-
-impl From<String> for ChatCompletionRequestAssistantMessageContent {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestAssistantMessageContent::Text(value)
-    }
-}
-
-impl From<&str> for ChatCompletionRequestToolMessageContent {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestToolMessageContent::Text(value.into())
-    }
-}
-
-impl From<String> for ChatCompletionRequestToolMessageContent {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestToolMessageContent::Text(value)
-    }
-}
-
-impl From<&str> for ChatCompletionRequestUserMessage {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestUserMessageContent::Text(value.into()).into()
-    }
-}
-
-impl From<String> for ChatCompletionRequestUserMessage {
-    fn from(value: String) -> Self {
-        value.as_str().into()
-    }
-}
-
-impl From<&str> for ChatCompletionRequestSystemMessage {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestSystemMessageContent::Text(value.into()).into()
-    }
-}
-
-impl From<&str> for ChatCompletionRequestDeveloperMessage {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestDeveloperMessageContent::Text(value.into()).into()
-    }
-}
-
-impl From<String> for ChatCompletionRequestSystemMessage {
-    fn from(value: String) -> Self {
-        value.as_str().into()
-    }
-}
-
-impl From<String> for ChatCompletionRequestDeveloperMessage {
-    fn from(value: String) -> Self {
-        value.as_str().into()
-    }
-}
-
 impl From<&str> for ChatCompletionRequestAssistantMessage {
    fn from(value: &str) -> Self {
        ChatCompletionRequestAssistantMessageContent::Text(value.into()).into()
@@ -769,13 +138,7 @@ impl From<String> for ChatCompletionRequestAssistantMessage {
    }
 }

-impl From<Vec<ChatCompletionRequestUserMessageContentPart>>
-    for ChatCompletionRequestUserMessageContent
-{
-    fn from(value: Vec<ChatCompletionRequestUserMessageContentPart>) -> Self {
-        ChatCompletionRequestUserMessageContent::Array(value)
-    }
-}
+// From content parts into UserMessageContentPart enum

 impl From<ChatCompletionRequestMessageContentPartText>
    for ChatCompletionRequestUserMessageContentPart
@@ -817,17 +180,7 @@ impl From<ChatCompletionRequestMessageContentPartAudioUrl>
    }
 }

-impl From<&str> for ChatCompletionRequestMessageContentPartText {
-    fn from(value: &str) -> Self {
-        ChatCompletionRequestMessageContentPartText { text: value.into() }
-    }
-}
-
-impl From<String> for ChatCompletionRequestMessageContentPartText {
-    fn from(value: String) -> Self {
-        ChatCompletionRequestMessageContentPartText { text: value }
-    }
-}
+// URL type conversions

 impl From<&str> for ImageUrl {
    fn from(value: &str) -> Self {
@@ -886,209 +239,3 @@ impl From<String> for AudioUrl {
        }
    }
 }
-
-impl From<String> for CreateMessageRequestContent {
-    fn from(value: String) -> Self {
-        Self::Content(value)
-    }
-}
-
-impl From<&str> for CreateMessageRequestContent {
-    fn from(value: &str) -> Self {
-        Self::Content(value.to_string())
-    }
-}
-
-impl Default for ChatCompletionRequestUserMessageContent {
-    fn default() -> Self {
-        ChatCompletionRequestUserMessageContent::Text("".into())
-    }
-}
-
-impl Default for CreateMessageRequestContent {
-    fn default() -> Self {
-        Self::Content("".into())
-    }
-}
-
-impl Default for ChatCompletionRequestDeveloperMessageContent {
-    fn default() -> Self {
-        ChatCompletionRequestDeveloperMessageContent::Text("".into())
-    }
-}
-
-impl Default for ChatCompletionRequestSystemMessageContent {
-    fn default() -> Self {
-        ChatCompletionRequestSystemMessageContent::Text("".into())
-    }
-}
-
-impl Default for ChatCompletionRequestToolMessageContent {
-    fn default() -> Self {
-        ChatCompletionRequestToolMessageContent::Text("".into())
-    }
-}
-
-// start: types to multipart from
-
-impl AsyncTryFrom<CreateTranscriptionRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: CreateTranscriptionRequest) -> Result<Self, Self::Error> {
-        let audio_part = create_file_part(request.file.source).await?;
-
-        let mut form = reqwest::multipart::Form::new()
-            .part("file", audio_part)
-            .text("model", request.model);
-
-        if let Some(prompt) = request.prompt {
-            form = form.text("prompt", prompt);
-        }
-
-        if let Some(response_format) = request.response_format {
-            form = form.text("response_format", response_format.to_string())
-        }
-
-        if let Some(temperature) = request.temperature {
-            form = form.text("temperature", temperature.to_string())
-        }
-
-        if let Some(language) = request.language {
-            form = form.text("language", language);
-        }
-
-        if let Some(timestamp_granularities) = request.timestamp_granularities {
-            for tg in timestamp_granularities {
-                form = form.text("timestamp_granularities[]", tg.to_string());
-            }
-        }
-
-        Ok(form)
-    }
-}
-
-impl AsyncTryFrom<CreateTranslationRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: CreateTranslationRequest) -> Result<Self, Self::Error> {
-        let audio_part = create_file_part(request.file.source).await?;
-
-        let mut form = reqwest::multipart::Form::new()
-            .part("file", audio_part)
-            .text("model", request.model);
-
-        if let Some(prompt) = request.prompt {
-            form = form.text("prompt", prompt);
-        }
-
-        if let Some(response_format) = request.response_format {
-            form = form.text("response_format", response_format.to_string())
-        }
-
-        if let Some(temperature) = request.temperature {
-            form = form.text("temperature", temperature.to_string())
-        }
-        Ok(form)
-    }
-}
-
-impl AsyncTryFrom<CreateImageEditRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: CreateImageEditRequest) -> Result<Self, Self::Error> {
-        let image_part = create_file_part(request.image.source).await?;
-
-        let mut form = reqwest::multipart::Form::new()
-            .part("image", image_part)
-            .text("prompt", request.prompt);
-
-        if let Some(mask) = request.mask {
-            let mask_part = create_file_part(mask.source).await?;
-            form = form.part("mask", mask_part);
-        }
-
-        if let Some(model) = request.model {
-            form = form.text("model", model.to_string())
-        }
-
-        if request.n.is_some() {
-            form = form.text("n", request.n.unwrap().to_string())
-        }
-
-        if request.size.is_some() {
-            form = form.text("size", request.size.unwrap().to_string())
-        }
-
-        if request.response_format.is_some() {
-            form = form.text(
-                "response_format",
-                request.response_format.unwrap().to_string(),
-            )
-        }
-
-        if request.user.is_some() {
-            form = form.text("user", request.user.unwrap())
-        }
-        Ok(form)
-    }
-}
-
-impl AsyncTryFrom<CreateImageVariationRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: CreateImageVariationRequest) -> Result<Self, Self::Error> {
-        let image_part = create_file_part(request.image.source).await?;
-
-        let mut form = reqwest::multipart::Form::new().part("image", image_part);
-
-        if let Some(model) = request.model {
-            form = form.text("model", model.to_string())
-        }
-
-        if request.n.is_some() {
-            form = form.text("n", request.n.unwrap().to_string())
-        }
-
-        if request.size.is_some() {
-            form = form.text("size", request.size.unwrap().to_string())
-        }
-
-        if request.response_format.is_some() {
-            form = form.text(
-                "response_format",
-                request.response_format.unwrap().to_string(),
-            )
-        }
-
-        if request.user.is_some() {
-            form = form.text("user", request.user.unwrap())
-        }
-        Ok(form)
-    }
-}
-
-impl AsyncTryFrom<CreateFileRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: CreateFileRequest) -> Result<Self, Self::Error> {
-        let file_part = create_file_part(request.file.source).await?;
-        let form = reqwest::multipart::Form::new()
-            .part("file", file_part)
-            .text("purpose", request.purpose.to_string());
-        Ok(form)
-    }
-}
-
-impl AsyncTryFrom<AddUploadPartRequest> for reqwest::multipart::Form {
-    type Error = OpenAIError;
-
-    async fn try_from(request: AddUploadPartRequest) -> Result<Self, Self::Error> {
-        let file_part = create_file_part(request.data).await?;
-        let form = reqwest::multipart::Form::new().part("data", file_part);
-        Ok(form)
-    }
-}
-
-// end: types to multipart form
-
-// Responses API impls are now in responses/impls.rs
--- a/lib/protocols/src/types/invites.rs
+++ b/lib/protocols/src/types/invites.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use crate::types::OpenAIError;
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use super::OrganizationRole;
-
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum InviteStatus {
-    Accepted,
-    Expired,
-    Pending,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Builder)]
-#[builder(name = "InviteRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option))]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct InviteRequest {
-    pub email: String,
-    pub role: OrganizationRole,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InviteListResponse {
-    pub object: String,
-    pub data: Vec<Invite>,
-    pub first_id: Option<String>,
-    pub last_id: Option<String>,
-    pub has_more: Option<bool>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct InviteDeleteResponse {
-    /// The object type, which is always `organization.invite.deleted`
-    pub object: String,
-    pub id: String,
-    pub deleted: bool,
-}
-
-/// Represents an individual `invite` to the organization.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct Invite {
-    /// The object type, which is always `organization.invite`
-    pub object: String,
-    /// The identifier, which can be referenced in API endpoints
-    pub id: String,
-    /// The email address of the individual to whom the invite was sent
-    pub email: String,
-    /// `owner` or `reader`
-    pub role: OrganizationRole,
-    /// `accepted`, `expired`, or `pending`
-    pub status: InviteStatus,
-    /// The Unix timestamp (in seconds) of when the invite was sent.
-    pub invited_at: u32,
-    /// The Unix timestamp (in seconds) of when the invite expires.
-    pub expires_at: u32,
-    /// The Unix timestamp (in seconds) of when the invite was accepted.
-    pub accepted_at: Option<u32>,
-}
--- a/lib/protocols/src/types/mcp/impls.rs
+++ b/lib/protocols/src/types/mcp/impls.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-use crate::types::mcp::{
-    MCPToolAllowedTools, MCPToolApprovalFilter, MCPToolApprovalSetting, MCPToolFilter,
-    MCPToolRequireApproval,
-};
-
-// MCPToolRequireApproval ergonomics
-
-impl From<MCPToolApprovalSetting> for MCPToolRequireApproval {
-    fn from(setting: MCPToolApprovalSetting) -> Self {
-        MCPToolRequireApproval::ApprovalSetting(setting)
-    }
-}
-
-impl From<MCPToolApprovalFilter> for MCPToolRequireApproval {
-    fn from(filter: MCPToolApprovalFilter) -> Self {
-        MCPToolRequireApproval::Filter(filter)
-    }
-}
-
-// MCPToolAllowedTools ergonomics
-
-impl From<MCPToolFilter> for MCPToolAllowedTools {
-    fn from(filter: MCPToolFilter) -> Self {
-        MCPToolAllowedTools::Filter(filter)
-    }
-}
-
-impl From<Vec<String>> for MCPToolAllowedTools {
-    fn from(tools: Vec<String>) -> Self {
-        MCPToolAllowedTools::List(tools)
-    }
-}
-
-impl From<Vec<&str>> for MCPToolAllowedTools {
-    fn from(tools: Vec<&str>) -> Self {
-        MCPToolAllowedTools::List(tools.into_iter().map(|s| s.to_string()).collect())
-    }
-}
-
-impl From<&[&str]> for MCPToolAllowedTools {
-    fn from(tools: &[&str]) -> Self {
-        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
-    }
-}
-
-impl<const N: usize> From<[&str; N]> for MCPToolAllowedTools {
-    fn from(tools: [&str; N]) -> Self {
-        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
-    }
-}
-
-impl From<&Vec<String>> for MCPToolAllowedTools {
-    fn from(tools: &Vec<String>) -> Self {
-        MCPToolAllowedTools::List(tools.clone())
-    }
-}
-
-impl From<&Vec<&str>> for MCPToolAllowedTools {
-    fn from(tools: &Vec<&str>) -> Self {
-        MCPToolAllowedTools::List(tools.iter().map(|s| s.to_string()).collect())
-    }
-}
-
-impl From<&str> for MCPToolAllowedTools {
-    fn from(tool: &str) -> Self {
-        MCPToolAllowedTools::List(vec![tool.to_string()])
-    }
-}
-
-impl From<String> for MCPToolAllowedTools {
-    fn from(tool: String) -> Self {
-        MCPToolAllowedTools::List(vec![tool])
-    }
-}
--- a/lib/protocols/src/types/mcp/mcp_.rs
+++ b/lib/protocols/src/types/mcp/mcp_.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-use utoipa::ToSchema;
-
-use crate::error::OpenAIError;
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, ToSchema)]
-#[serde(rename_all = "snake_case")]
-pub enum McpToolConnectorId {
-    ConnectorDropbox,
-    ConnectorGmail,
-    ConnectorGooglecalendar,
-    ConnectorGoogledrive,
-    ConnectorMicrosoftteams,
-    ConnectorOutlookcalendar,
-    ConnectorOutlookemail,
-    ConnectorSharepoint,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq, Default, ToSchema)]
-#[builder(
-    name = "MCPToolArgs",
-    pattern = "mutable",
-    setter(into, strip_option),
-    default
-)]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct MCPTool {
-    /// A label for this MCP server, used to identify it in tool calls.
-    pub server_label: String,
-
-    /// List of allowed tool names or a filter object.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub allowed_tools: Option<MCPToolAllowedTools>,
-
-    /// An OAuth access token that can be used with a remote MCP server, either with a custom MCP
-    /// server URL or a service connector. Your application must handle the OAuth authorization
-    /// flow and provide the token here.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub authorization: Option<String>,
-
-    /// Identifier for service connectors, like those available in ChatGPT. One of `server_url` or
-    /// `connector_id` must be provided. Learn more about service connectors [here](https://platform.openai.com/docs/guides/tools-remote-mcp#connectors).
-    ///
-    /// Currently supported `connector_id` values are:
-    /// - Dropbox: `connector_dropbox`
-    /// - Gmail: `connector_gmail`
-    /// - Google Calendar: `connector_googlecalendar`
-    /// - Google Drive: `connector_googledrive`
-    /// - Microsoft Teams: `connector_microsoftteams`
-    /// - Outlook Calendar: `connector_outlookcalendar`
-    /// - Outlook Email: `connector_outlookemail`
-    /// - SharePoint: `connector_sharepoint`
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub connector_id: Option<McpToolConnectorId>,
-
-    /// Optional HTTP headers to send to the MCP server. Use for authentication or other purposes.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub headers: Option<serde_json::Value>,
-
-    /// Specify which of the MCP server's tools require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub require_approval: Option<MCPToolRequireApproval>,
-
-    /// Optional description of the MCP server, used to provide more context.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub server_description: Option<String>,
-
-    /// The URL for the MCP server. One of `server_url` or `connector_id` must be provided.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub server_url: Option<String>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-#[serde(untagged)]
-pub enum MCPToolAllowedTools {
-    /// A string array of allowed tool names
-    List(Vec<String>),
-    /// A filter object to specify which tools are allowed.
-    Filter(MCPToolFilter),
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-pub struct MCPToolFilter {
-    /// Indicates whether or not a tool modifies data or is read-only.
-    /// If an MCP server is annotated with [readOnlyHint](https://modelcontextprotocol.io/specification/2025-06-18/schema#toolannotations-readonlyhint),
-    /// it will match this filter.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub read_only: Option<bool>,
-    /// List of allowed tool names.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub tool_names: Option<Vec<String>>,
-}
-
-/// Approval policy or filter for MCP tools.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-#[serde(untagged)]
-pub enum MCPToolRequireApproval {
-    /// Specify which of the MCP server's tools require approval. Can be
-    /// `always`, `never`, or a filter object associated with tools
-    /// that require approval.
-    Filter(MCPToolApprovalFilter),
-    /// Specify a single approval policy for all tools. One of `always` or
-    /// `never`. When set to `always`, all tools will require approval. When
-    /// set to `never`, all tools will not require approval.
-    ApprovalSetting(MCPToolApprovalSetting),
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-#[serde(rename_all = "lowercase")]
-pub enum MCPToolApprovalSetting {
-    Always,
-    Never,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-pub struct MCPToolApprovalFilter {
-    /// A list of tools that always require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub always: Option<MCPToolFilter>,
-    /// A list of tools that never require approval.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub never: Option<MCPToolFilter>,
-}
-
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, ToSchema)]
-pub struct MCPListToolsTool {
-    /// The JSON schema describing the tool's input.
-    pub input_schema: serde_json::Value,
-    /// The name of the tool.
-    pub name: String,
-    /// Additional annotations about the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub annotations: Option<serde_json::Value>,
-    /// The description of the tool.
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub description: Option<String>,
-}
--- a/lib/protocols/src/types/mcp/mod.rs
+++ b/lib/protocols/src/types/mcp/mod.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-
-mod impls;
-mod mcp_;
-pub use mcp_::*;
--- a/lib/protocols/src/types/message.rs
+++ b/lib/protocols/src/types/message.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use std::collections::HashMap;
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-use super::{AudioUrl, ImageDetail, ImageUrl, VideoUrl};
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
-#[serde(rename_all = "lowercase")]
-pub enum MessageRole {
-    #[default]
-    User,
-    Assistant,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum MessageStatus {
-    InProgress,
-    Incomplete,
-    Completed,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(rename_all = "snake_case")]
-pub enum MessageIncompleteDetailsType {
-    ContentFilter,
-    MaxTokens,
-    RunCancelled,
-    RunExpired,
-    RunFailed,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageIncompleteDetails {
-    /// The reason the message is incomplete.
-    pub reason: MessageIncompleteDetailsType,
-}
-
-///  Represents a message within a [thread](https://platform.openai.com/docs/api-reference/threads).
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageObject {
-    /// The identifier, which can be referenced in API endpoints.
-    pub id: String,
-    /// The object type, which is always `thread.message`.
-    pub object: String,
-    /// The Unix timestamp (in seconds) for when the message was created.
-    pub created_at: i32,
-    /// The [thread](https://platform.openai.com/docs/api-reference/threads) ID that this message belongs to.
-    pub thread_id: String,
-
-    /// The status of the message, which can be either `in_progress`, `incomplete`, or `completed`.
-    pub status: Option<MessageStatus>,
-
-    /// On an incomplete message, details about why the message is incomplete.
-    pub incomplete_details: Option<MessageIncompleteDetails>,
-
-    /// The Unix timestamp (in seconds) for when the message was completed.
-    pub completed_at: Option<u32>,
-
-    /// The Unix timestamp (in seconds) for when the message was marked as incomplete.
-    pub incomplete_at: Option<u32>,
-
-    /// The entity that produced the message. One of `user` or `assistant`.
-    pub role: MessageRole,
-
-    /// The content of the message in array of text and/or images.
-    pub content: Vec<MessageContent>,
-
-    /// If applicable, the ID of the [assistant](https://platform.openai.com/docs/api-reference/assistants) that authored this message.
-    pub assistant_id: Option<String>,
-
-    /// The ID of the [run](https://platform.openai.com/docs/api-reference/runs) associated with the creation of this message. Value is `null` when messages are created manually using the create message or create thread endpoints.
-    pub run_id: Option<String>,
-
-    /// A list of files attached to the message, and the tools they were added to.
-    pub attachments: Option<Vec<MessageAttachment>>,
-
-    pub metadata: Option<HashMap<String, serde_json::Value>>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageAttachment {
-    /// The ID of the file to attach to the message.
-    pub file_id: String,
-    /// The tools to add this file to.
-    pub tools: Vec<MessageAttachmentTool>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageAttachmentTool {
-    CodeInterpreter,
-    FileSearch,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageContent {
-    Text(MessageContentTextObject),
-    ImageFile(MessageContentImageFileObject),
-    ImageUrl(MessageContentImageUrlObject),
-    VideoUrl(MessageContentVideoUrlObject),
-    AudioUrl(MessageContentAudioUrlObject),
-    Refusal(MessageContentRefusalObject),
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentRefusalObject {
-    pub refusal: String,
-}
-
-/// The text content that is part of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentTextObject {
-    pub text: TextData,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct TextData {
-    /// The data that makes up the text.
-    pub value: String,
-    pub annotations: Vec<MessageContentTextAnnotations>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageContentTextAnnotations {
-    /// A citation within the message that points to a specific quote from a specific File associated with the assistant or the message. Generated when the assistant uses the "retrieval" tool to search files.
-    FileCitation(MessageContentTextAnnotationsFileCitationObject),
-    /// A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a file.
-    FilePath(MessageContentTextAnnotationsFilePathObject),
-}
-
-/// A citation within the message that points to a specific quote from a specific File associated with the assistant or the message. Generated when the assistant uses the "file_search" tool to search files.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentTextAnnotationsFileCitationObject {
-    /// The text in the message content that needs to be replaced.
-    pub text: String,
-    pub file_citation: FileCitation,
-    pub start_index: u32,
-    pub end_index: u32,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct FileCitation {
-    /// The ID of the specific File the citation is from.
-    pub file_id: String,
-    /// The specific quote in the file.
-    pub quote: Option<String>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentTextAnnotationsFilePathObject {
-    /// The text in the message content that needs to be replaced.
-    pub text: String,
-    pub file_path: FilePath,
-    pub start_index: u32,
-    pub end_index: u32,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct FilePath {
-    /// The ID of the file that was generated.
-    pub file_id: String,
-}
-
-/// References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentImageFileObject {
-    pub image_file: ImageFile,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct ImageFile {
-    /// The [File](https://platform.openai.com/docs/api-reference/files) ID of the image in the message content. Set `purpose="vision"` when uploading the File if you need to later display the file content.
-    pub file_id: String,
-    /// Specifies the detail level of the image if specified by the user. `low` uses fewer tokens, you can opt in to high resolution using `high`.
-    pub detail: Option<ImageDetail>,
-}
-
-/// References an image URL in the content of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentImageUrlObject {
-    pub image_url: ImageUrl,
-}
-
-/// References a video URL in the content of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentVideoUrlObject {
-    pub video_url: VideoUrl,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageContentAudioUrlObject {
-    pub audio_url: AudioUrl,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageRequestContentTextObject {
-    /// Text content to be sent to the model
-    pub text: String,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(untagged)]
-pub enum CreateMessageRequestContent {
-    /// The text contents of the message.
-    Content(String),
-    /// An array of content parts with a defined type, each can be of type `text` or images can be passed with `image_url` or `image_file`. Image types are only supported on [Vision-compatible models](https://platform.openai.com/docs/models/overview).
-    ContentArray(Vec<MessageContentInput>),
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageContentInput {
-    Text(MessageRequestContentTextObject),
-    ImageFile(MessageContentImageFileObject),
-    ImageUrl(MessageContentImageUrlObject),
-    VideoUrl(MessageContentVideoUrlObject),
-    AudioUrl(MessageContentAudioUrlObject),
-}
-#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
-#[builder(name = "CreateMessageRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateMessageRequest {
-    /// The role of the entity that is creating the message. Allowed values include:
-    /// - `user`: Indicates the message is sent by an actual user and should be used in most cases to represent user-generated messages.
-    /// - `assistant`: Indicates the message is generated by the assistant. Use this value to insert messages from the assistant into the conversation.
-    pub role: MessageRole,
-    /// The content of the message.
-    pub content: CreateMessageRequestContent,
-
-    /// A list of files attached to the message, and the tools they should be added to.
-    pub attachments: Option<Vec<MessageAttachment>>,
-
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<HashMap<String, serde_json::Value>>,
-}
-
-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-pub struct ModifyMessageRequest {
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub metadata: Option<HashMap<String, serde_json::Value>>,
-}
-
-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-pub struct DeleteMessageResponse {
-    pub id: String,
-    pub deleted: bool,
-    pub object: String,
-}
-
-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
-pub struct ListMessagesResponse {
-    pub object: String,
-    pub data: Vec<MessageObject>,
-    pub first_id: Option<String>,
-    pub last_id: Option<String>,
-    pub has_more: bool,
-}
-
-/// Represents a message delta i.e. any changed fields on a message during streaming.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaObject {
-    /// The identifier of the message, which can be referenced in API endpoints.
-    pub id: String,
-    /// The object type, which is always `thread.message.delta`.
-    pub object: String,
-    /// The delta containing the fields that have changed on the Message.
-    pub delta: MessageDelta,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDelta {
-    /// The entity that produced the message. One of `user` or `assistant`.
-    pub role: Option<MessageRole>,
-    ///  The content of the message in array of text and/or images.
-    pub content: Option<Vec<MessageDeltaContent>>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageDeltaContent {
-    ImageFile(MessageDeltaContentImageFileObject),
-    ImageUrl(MessageDeltaContentImageUrlObject),
-    VideoUrl(MessageDeltaContentVideoUrlObject),
-    AudioUrl(MessageDeltaContentAudioUrlObject),
-    Text(MessageDeltaContentTextObject),
-    Refusal(MessageDeltaContentRefusalObject),
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentRefusalObject {
-    /// The index of the refusal part in the message.
-    pub index: i32,
-    pub refusal: Option<String>,
-}
-
-/// The text content that is part of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentTextObject {
-    /// The index of the content part in the message.
-    pub index: u32,
-    pub text: Option<MessageDeltaContentText>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentText {
-    /// The data that makes up the text.
-    pub value: Option<String>,
-    pub annotations: Option<Vec<MessageDeltaContentTextAnnotations>>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-#[serde(tag = "type")]
-#[serde(rename_all = "snake_case")]
-pub enum MessageDeltaContentTextAnnotations {
-    FileCitation(MessageDeltaContentTextAnnotationsFileCitationObject),
-    FilePath(MessageDeltaContentTextAnnotationsFilePathObject),
-}
-
-/// A citation within the message that points to a specific quote from a specific File associated with the assistant or the message. Generated when the assistant uses the "file_search" tool to search files.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentTextAnnotationsFileCitationObject {
-    /// The index of the annotation in the text content part.
-    pub index: u32,
-    /// The text in the message content that needs to be replaced.
-    pub text: Option<String>,
-    pub file_citation: Option<FileCitation>,
-    pub start_index: Option<u32>,
-    pub end_index: Option<u32>,
-}
-
-/// A URL for the file that's generated when the assistant used the `code_interpreter` tool to generate a file.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentTextAnnotationsFilePathObject {
-    /// The index of the annotation in the text content part.
-    pub index: u32,
-    /// The text in the message content that needs to be replaced.
-    pub text: Option<String>,
-    pub file_path: Option<FilePath>,
-    pub start_index: Option<u32>,
-    pub end_index: Option<u32>,
-}
-
-/// References an image [File](https://platform.openai.com/docs/api-reference/files) in the content of a message.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentImageFileObject {
-    /// The index of the content part in the message.
-    pub index: u32,
-
-    pub image_file: Option<ImageFile>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentImageUrlObject {
-    /// The index of the content part in the message.
-    pub index: u32,
-
-    pub image_url: Option<ImageUrl>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentVideoUrlObject {
-    /// The index of the content part in the message.
-    pub index: u32,
-
-    pub video_url: Option<VideoUrl>,
-}
-
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
-pub struct MessageDeltaContentAudioUrlObject {
-    /// The index of the content part in the message.
-    pub index: u32,
-
-    pub audio_url: Option<AudioUrl>,
-}
--- a/lib/protocols/src/types/mod.rs
+++ b/lib/protocols/src/types/mod.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 //
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
+// Types used in inference API requests and responses.
 //
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
+// Base OpenAI types are re-exported from upstream async-openai.
+// Inference-serving extensions and Anthropic types are locally defined.

-//! Types used in OpenAI API requests and responses.
-//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
+// --- Locally defined modules ---
 pub mod anthropic;
-mod assistant;
-mod assistant_impls;
-mod assistant_stream;
-mod audio;
-mod audit_log;
-mod batch;
 mod chat;
-mod common;
 mod completion;
-mod embedding;
-mod file;
-mod fine_tuning;
-mod image;
-mod invites;
-pub mod mcp;
-mod message;
-mod model;
-mod moderation;
-mod project_api_key;
-mod project_service_account;
-mod project_users;
-mod projects;
-#[cfg_attr(docsrs, doc(cfg(feature = "realtime")))]
-#[cfg(feature = "realtime")]
-pub mod realtime;
 pub mod responses;
-mod run;
-pub mod shared;
-mod step;
-mod thread;
-mod upload;
-mod users;
-mod vector_store;

-pub use assistant::*;
-pub use assistant_stream::*;
-pub use audio::*;
-pub use audit_log::*;
-pub use batch::*;
+// --- Local type re-exports ---
 pub use chat::*;
-pub use common::*;
 pub use completion::*;
-pub use embedding::*;
-pub use file::*;
-pub use fine_tuning::*;
-pub use image::*;
-pub use invites::*;
-pub use message::*;
-pub use model::*;
-pub use moderation::*;
-pub use project_api_key::*;
-pub use project_service_account::*;
-pub use project_users::*;
-pub use projects::*;
-pub use run::*;
-pub use step::*;
-pub use thread::*;
-pub use upload::*;
-pub use users::*;
-pub use vector_store::*;

+// --- Upstream re-exports (types-only, no HTTP client) ---
+
+// Embeddings (full re-export)
+pub use async_openai::types::embeddings::*;
+
+// Images
+pub use async_openai::types::images::*;
+
+// --- Convenience impls for locally-defined types ---
 mod impls;
-use derive_builder::UninitializedFieldError;

 use crate::error::OpenAIError;
+use derive_builder::UninitializedFieldError;

 impl From<UninitializedFieldError> for OpenAIError {
    fn from(value: UninitializedFieldError) -> Self {

--- a/lib/protocols/src/types/model.rs
+++ b/lib/protocols/src/types/model.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use serde::{Deserialize, Serialize};
-
-/// Describes an OpenAI model offering that can be used with the API.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Model {
-    /// The model identifier, which can be referenced in the API endpoints.
-    pub id: String,
-    /// The object type, which is always "model".
-    pub object: String,
-    /// The Unix timestamp (in seconds) when the model was created.
-    pub created: u32,
-    /// The organization that owns the model.
-    pub owned_by: String,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct ListModelResponse {
-    pub object: String,
-    pub data: Vec<Model>,
-}
-
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
-pub struct DeleteModelResponse {
-    pub id: String,
-    pub object: String,
-    pub deleted: bool,
-}
--- a/lib/protocols/src/types/moderation.rs
+++ b/lib/protocols/src/types/moderation.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use derive_builder::Builder;
-use serde::{Deserialize, Serialize};
-
-use crate::error::OpenAIError;
-
-#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)]
-#[serde(untagged)]
-pub enum ModerationInput {
-    /// A single string of text to classify for moderation
-    String(String),
-
-    /// An array of strings to classify for moderation
-    StringArray(Vec<String>),
-
-    /// An array of multi-modal inputs to the moderation model
-    MultiModal(Vec<ModerationContentPart>),
-}
-
-/// Content part for multi-modal moderation input
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(tag = "type")]
-pub enum ModerationContentPart {
-    /// An object describing text to classify
-    #[serde(rename = "text")]
-    Text {
-        /// A string of text to classify
-        text: String,
-    },
-
-    /// An object describing an image to classify
-    #[serde(rename = "image_url")]
-    ImageUrl {
-        /// Contains either an image URL or a data URL for a base64 encoded image
-        image_url: ModerationImageUrl,
-    },
-}
-
-/// Image URL configuration for image moderation
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct ModerationImageUrl {
-    /// Either a URL of the image or the base64 encoded image data
-    pub url: String,
-}
-
-#[derive(Debug, Default, Clone, Serialize, Builder, PartialEq, Deserialize)]
-#[builder(name = "CreateModerationRequestArgs")]
-#[builder(pattern = "mutable")]
-#[builder(setter(into, strip_option), default)]
-#[builder(derive(Debug))]
-#[builder(build_fn(error = "OpenAIError"))]
-pub struct CreateModerationRequest {
-    /// Input (or inputs) to classify. Can be a single string, an array of strings, or
-    /// an array of multi-modal input objects similar to other models.
-    pub input: ModerationInput,
-
-    /// The content moderation model you would like to use. Learn more in the
-    /// [moderation guide](https://platform.openai.com/docs/guides/moderation), and learn about
-    /// available models [here](https://platform.openai.com/docs/models/moderation).
-    #[serde(skip_serializing_if = "Option::is_none")]
-    pub model: Option<String>,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct Category {
-    /// Content that expresses, incites, or promotes hate based on race, gender,
-    /// ethnicity, religion, nationality, sexual orientation, disability status, or
-    /// caste. Hateful content aimed at non-protected groups (e.g., chess players)
-    /// is harrassment.
-    pub hate: bool,
-    #[serde(rename = "hate/threatening")]
-    /// Hateful content that also includes violence or serious harm towards the
-    /// targeted group based on race, gender, ethnicity, religion, nationality,
-    /// sexual orientation, disability status, or caste.
-    pub hate_threatening: bool,
-    /// Content that expresses, incites, or promotes harassing language towards any target.
-    pub harassment: bool,
-    /// Harassment content that also includes violence or serious harm towards any target.
-    #[serde(rename = "harassment/threatening")]
-    pub harassment_threatening: bool,
-    /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing, or that gives advice or instruction on how to commit illicit acts. For example, "how to shoplift" would fit this category.
-    pub illicit: bool,
-    /// Content that includes instructions or advice that facilitate the planning or execution of wrongdoing that also includes violence, or that gives advice or instruction on the procurement of any weapon.
-    #[serde(rename = "illicit/violent")]
-    pub illicit_violent: bool,
-    /// Content that promotes, encourages, or depicts acts of self-harm, such as suicide, cutting, and eating disorders.
-    #[serde(rename = "self-harm")]
-    pub self_harm: bool,
-    /// Content where the speaker expresses that they are engaging or intend to engage in acts of self-harm, such as suicide, cutting, and eating disorders.
-    #[serde(rename = "self-harm/intent")]
-    pub self_harm_intent: bool,
-    /// Content that encourages performing acts of self-harm, such as suicide, cutting, and eating disorders, or that gives instructions or advice on how to commit such acts.
-    #[serde(rename = "self-harm/instructions")]
-    pub self_harm_instructions: bool,
-    /// Content meant to arouse sexual excitement, such as the description of sexual activity, or that promotes sexual services (excluding sex education and wellness).
-    pub sexual: bool,
-    /// Sexual content that includes an individual who is under 18 years old.
-    #[serde(rename = "sexual/minors")]
-    pub sexual_minors: bool,
-    /// Content that depicts death, violence, or physical injury.
-    pub violence: bool,
-    /// Content that depicts death, violence, or physical injury in graphic detail.
-    #[serde(rename = "violence/graphic")]
-    pub violence_graphic: bool,
-}
-
-/// A list of the categories along with their scores as predicted by model.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct CategoryScore {
-    /// The score for the category 'hate'.
-    pub hate: f32,
-    /// The score for the category 'hate/threatening'.
-    #[serde(rename = "hate/threatening")]
-    pub hate_threatening: f32,
-    /// The score for the category 'harassment'.
-    pub harassment: f32,
-    /// The score for the category 'harassment/threatening'.
-    #[serde(rename = "harassment/threatening")]
-    pub harassment_threatening: f32,
-    /// The score for the category 'illicit'.
-    pub illicit: f32,
-    /// The score for the category 'illicit/violent'.
-    #[serde(rename = "illicit/violent")]
-    pub illicit_violent: f32,
-    /// The score for the category 'self-harm'.
-    #[serde(rename = "self-harm")]
-    pub self_harm: f32,
-    /// The score for the category 'self-harm/intent'.
-    #[serde(rename = "self-harm/intent")]
-    pub self_harm_intent: f32,
-    /// The score for the category 'self-harm/instructions'.
-    #[serde(rename = "self-harm/instructions")]
-    pub self_harm_instructions: f32,
-    /// The score for the category 'sexual'.
-    pub sexual: f32,
-    /// The score for the category 'sexual/minors'.
-    #[serde(rename = "sexual/minors")]
-    pub sexual_minors: f32,
-    /// The score for the category 'violence'.
-    pub violence: f32,
-    /// The score for the category 'violence/graphic'.
-    #[serde(rename = "violence/graphic")]
-    pub violence_graphic: f32,
-}
-
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct ContentModerationResult {
-    /// Whether any of the below categories are flagged.
-    pub flagged: bool,
-    /// A list of the categories, and whether they are flagged or not.
-    pub categories: Category,
-    /// A list of the categories along with their scores as predicted by model.
-    pub category_scores: CategoryScore,
-    /// A list of the categories along with the input type(s) that the score applies to.
-    pub category_applied_input_types: CategoryAppliedInputTypes,
-}
-
-/// Represents if a given text input is potentially harmful.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
-pub struct CreateModerationResponse {
-    /// The unique identifier for the moderation request.
-    pub id: String,
-    /// The model used to generate the moderation results.
-    pub model: String,
-    /// A list of moderation objects.
-    pub results: Vec<ContentModerationResult>,
-}
-
-/// A list of the categories along with the input type(s) that the score applies to.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-pub struct CategoryAppliedInputTypes {
-    /// The applied input type(s) for the category 'hate'.
-    pub hate: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'hate/threatening'.
-    #[serde(rename = "hate/threatening")]
-    pub hate_threatening: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'harassment'.
-    pub harassment: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'harassment/threatening'.
-    #[serde(rename = "harassment/threatening")]
-    pub harassment_threatening: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'illicit'.
-    pub illicit: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'illicit/violent'.
-    #[serde(rename = "illicit/violent")]
-    pub illicit_violent: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'self-harm'.
-    #[serde(rename = "self-harm")]
-    pub self_harm: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'self-harm/intent'.
-    #[serde(rename = "self-harm/intent")]
-    pub self_harm_intent: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'self-harm/instructions'.
-    #[serde(rename = "self-harm/instructions")]
-    pub self_harm_instructions: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'sexual'.
-    pub sexual: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'sexual/minors'.
-    #[serde(rename = "sexual/minors")]
-    pub sexual_minors: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'violence'.
-    pub violence: Vec<ModInputType>,
-
-    /// The applied input type(s) for the category 'violence/graphic'.
-    #[serde(rename = "violence/graphic")]
-    pub violence_graphic: Vec<ModInputType>,
-}
-
-/// The type of input that was moderated
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
-#[serde(rename_all = "lowercase")]
-pub enum ModInputType {
-    /// Text content that was moderated
-    Text,
-    /// Image content that was moderated
-    Image,
-}
--- a/lib/protocols/src/types/project_api_key.rs
+++ b/lib/protocols/src/types/project_api_key.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use serde::{Deserialize, Serialize};
-
-use super::{ProjectServiceAccount, ProjectUser};
-
-/// Represents an individual API key in a project.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectApiKey {
-    /// The object type, which is always `organization.project.api_key`.
-    pub object: String,
-    /// The redacted value of the API key.
-    pub redacted_value: String,
-    /// The name of the API key.
-    pub name: String,
-    /// The Unix timestamp (in seconds) of when the API key was created.
-    pub created_at: u32,
-    /// The identifier, which can be referenced in API endpoints.
-    pub id: String,
-    /// The owner of the API key.
-    pub owner: ProjectApiKeyOwner,
-}
-
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(rename = "snake_case")]
-pub enum ProjectApiKeyOwnerType {
-    User,
-    ServiceAccount,
-}
-
-/// Represents the owner of a project API key.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectApiKeyOwner {
-    /// The type of owner, which is either `user` or `service_account`.
-    pub r#type: ProjectApiKeyOwnerType,
-    /// The user owner of the API key, if applicable.
-    pub user: Option<ProjectUser>,
-    /// The service account owner of the API key, if applicable.
-    pub service_account: Option<ProjectServiceAccount>,
-}
-
-/// Represents the response object for listing project API keys.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectApiKeyListResponse {
-    /// The object type, which is always `list`.
-    pub object: String,
-    /// The list of project API keys.
-    pub data: Vec<ProjectApiKey>,
-    /// The ID of the first project API key in the list.
-    pub first_id: String,
-    /// The ID of the last project API key in the list.
-    pub last_id: String,
-    /// Indicates if there are more project API keys available.
-    pub has_more: bool,
-}
-
-/// Represents the response object for deleting a project API key.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectApiKeyDeleteResponse {
-    /// The object type, which is always `organization.project.api_key.deleted`.
-    pub object: String,
-    /// The ID of the deleted API key.
-    pub id: String,
-    /// Indicates if the API key was successfully deleted.
-    pub deleted: bool,
-}
--- a/lib/protocols/src/types/project_service_account.rs
+++ b/lib/protocols/src/types/project_service_account.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//
-// Based on https://github.com/64bit/async-openai/ by Himanshu Neema
-// Original Copyright (c) 2022 Himanshu Neema
-// Licensed under MIT License (see ATTRIBUTIONS-Rust.md)
-//
-// Modifications Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES.
-// Licensed under Apache 2.0
-
-use serde::{Deserialize, Serialize};
-
-use super::ProjectUserRole;
-
-/// Represents an individual service account in a project.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccount {
-    /// The object type, which is always `organization.project.service_account`.
-    pub object: String,
-    /// The identifier, which can be referenced in API endpoints.
-    pub id: String,
-    /// The name of the service account.
-    pub name: String,
-    /// `owner` or `member`.
-    pub role: ProjectUserRole,
-    /// The Unix timestamp (in seconds) of when the service account was created.
-    pub created_at: u32,
-}
-
-/// Represents the response object for listing project service accounts.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccountListResponse {
-    /// The object type, which is always `list`.
-    pub object: String,
-    /// The list of project service accounts.
-    pub data: Vec<ProjectServiceAccount>,
-    /// The ID of the first project service account in the list.
-    pub first_id: String,
-    /// The ID of the last project service account in the list.
-    pub last_id: String,
-    /// Indicates if there are more project service accounts available.
-    pub has_more: bool,
-}
-
-/// Represents the request object for creating a project service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccountCreateRequest {
-    /// The name of the service account being created.
-    pub name: String,
-}
-
-/// Represents the response object for creating a project service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccountCreateResponse {
-    /// The object type, which is always `organization.project.service_account`.
-    pub object: String,
-    /// The ID of the created service account.
-    pub id: String,
-    /// The name of the created service account.
-    pub name: String,
-    /// Service accounts can only have one role of type `member`.
-    pub role: String,
-    /// The Unix timestamp (in seconds) of when the service account was created.
-    pub created_at: u32,
-    /// The API key associated with the created service account.
-    pub api_key: ProjectServiceAccountApiKey,
-}
-
-/// Represents the API key associated with a project service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccountApiKey {
-    /// The object type, which is always `organization.project.service_account.api_key`.
-    pub object: String,
-    /// The value of the API key.
-    pub value: String,
-    /// The name of the API key.
-    pub name: String,
-    /// The Unix timestamp (in seconds) of when the API key was created.
-    pub created_at: u32,
-    /// The ID of the API key.
-    pub id: String,
-}
-
-/// Represents the response object for deleting a project service account.
-#[derive(Debug, Serialize, Deserialize)]
-pub struct ProjectServiceAccountDeleteResponse {
-    /// The object type, which is always `organization.project.service_account.deleted`.
-    pub object: String,
-    /// The ID of the deleted service account.
-    pub id: String,
-    /// Indicates if the service account was successfully deleted.
-    pub deleted: bool,
-}