feat: add auto-generated frontend OpenAPI spec and helper binary (#4802)

Signed-off-by: Satvik Matta <smatta@nvidia.com>

feat: add auto-generated frontend OpenAPI spec and helper binary (#4802)
Signed-off-by: Satvik Matta <smatta@nvidia.com>
f63e273c · smatta-star · GitHub · ac8c9023 · f63e273c · f63e273c
Unverified Commit f63e273c authored Dec 22, 2025 by smatta-star Committed by GitHub Dec 22, 2025
20 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2575,6 +2575,7 @@ dependencies = [
 "tokio-util",
 "tracing",
 "url",
+ "utoipa",
 "uuid 1.18.1",
 ]

@@ -11781,6 +11782,8 @@ dependencies = [
 "quote",
 "regex",
 "syn 2.0.111",
+ "url",
+ "uuid 1.18.1",
 ]

 [[package]]

--- a/README.md
+++ b/README.md
@@ -181,6 +181,17 @@ Dynamo provides comprehensive benchmarking tools to evaluate and optimize your d
 - **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf
 - **[SLA-Driven Dynamo Deployments](docs/planner/sla_planner_quickstart.md)** – Optimize your deployment to meet SLA requirements

+## Frontend OpenAPI specification
+
+The OpenAI-compatible HTTP frontend exposes an OpenAPI 3 specification at `/openapi.json`.
+To generate and persist the same specification without running the server (for example for CI, documentation, or NIM integration), run:
+
+```bash
+cargo run -p dynamo-llm --bin generate-frontend-openapi
+```
+
+This writes the current frontend spec to `docs/frontends/openapi.json` at the repository root.
+
 # Engines

 Dynamo is designed to be inference engine agnostic. To use any engine with Dynamo, NATS and etcd need to be installed, along with a Dynamo frontend (`python -m dynamo.frontend [--interactive]`).

--- a/docs/frontends/openapi.json
+++ b/docs/frontends/openapi.json
+{
+  "openapi": "3.1.0",
+  "info": {
+    "title": "NVIDIA Dynamo OpenAI Frontend",
+    "description": "OpenAI-compatible HTTP API for NVIDIA Dynamo.",
+    "contact": {
+      "name": "NVIDIA Dynamo",
+      "url": "https://github.com/ai-dynamo/dynamo"
+    },
+    "license": {
+      "name": "Apache-2.0"
+    },
+    "version": "0.7.0"
+  },
+  "servers": [
+    {
+      "url": "/",
+      "description": "Current server"
+    }
+  ],
+  "paths": {
+    "/busy_threshold": {
+      "get": {
+        "summary": "Endpoint: /busy_threshold",
+        "description": "Endpoint for path: /busy_threshold",
+        "operationId": "get_busy_threshold",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/docs": {
+      "get": {
+        "summary": "API documentation",
+        "description": "Interactive API documentation powered by Swagger UI.",
+        "operationId": "get_docs",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/health": {
+      "get": {
+        "summary": "Health check",
+        "description": "Returns the health status of the service. Used for readiness probes.",
+        "operationId": "get_health",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/live": {
+      "get": {
+        "summary": "Liveness check",
+        "description": "Returns the liveness status of the service. Used for liveness probes.",
+        "operationId": "get_live",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/metrics": {
+      "get": {
+        "summary": "Prometheus metrics",
+        "description": "Returns Prometheus metrics for monitoring the service.",
+        "operationId": "get_metrics",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/openapi.json": {
+      "get": {
+        "summary": "OpenAPI specification",
+        "description": "Returns the OpenAPI 3.0 specification for this API in JSON format.",
+        "operationId": "get_openapi.json",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/v1/chat/completions": {
+      "post": {
+        "summary": "Create chat completion",
+        "description": "Creates a completion for a chat conversation. Supports both streaming and non-streaming modes. Compatible with OpenAI's chat completions API.",
+        "operationId": "post_v1_chat_completions",
+        "requestBody": {
+          "description": "Chat completion request with model, messages, and optional parameters",
+          "content": {
+            "application/json": {
+              "schema": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/CreateChatCompletionRequest"
+                  },
+                  {
+                    "$ref": "#/components/schemas/CommonExt"
+                  },
+                  {
+                    "type": "object",
+                    "properties": {
+                      "chat_template_args": {
+                        "type": [
+                          "object",
+                          "null"
+                        ],
+                        "description": "Extra args to pass to the chat template rendering context",
+                        "additionalProperties": {},
+                        "propertyNames": {
+                          "type": "string"
+                        }
+                      },
+                      "nvext": {
+                        "oneOf": [
+                          {
+                            "type": "null"
+                          },
+                          {
+                            "$ref": "#/components/schemas/NvExt"
+                          }
+                        ]
+                      }
+                    },
+                    "additionalProperties": {
+                      "description": "Catch-all for unsupported fields - checked during validation"
+                    }
+                  }
+                ],
+                "description": "A request structure for creating a chat completion, extending OpenAI's\n`CreateChatCompletionRequest` with [`NvExt`] extensions and common fields.\n\n# Fields\n- `inner`: The base OpenAI chat completion request, embedded using `serde(flatten)`.\n- `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.\n- `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.\n  Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence."
+              },
+              "example": {
+                "model": "Qwen/Qwen3-0.6B",
+                "messages": [
+                  {
+                    "role": "system",
+                    "content": "You are a helpful assistant."
+                  },
+                  {
+                    "role": "user",
+                    "content": "Hello! Can you help me understand what this API does?"
+                  }
+                ],
+                "temperature": 0.7,
+                "max_tokens": 50,
+                "stream": false
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/v1/completions": {
+      "post": {
+        "summary": "Create text completion",
+        "description": "Creates a completion for a given prompt. Supports both streaming and non-streaming modes. Compatible with OpenAI's completions API.",
+        "operationId": "post_v1_completions",
+        "requestBody": {
+          "description": "Text completion request with model, prompt, and optional parameters",
+          "content": {
+            "application/json": {
+              "schema": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/CreateCompletionRequest"
+                  },
+                  {
+                    "$ref": "#/components/schemas/CommonExt"
+                  },
+                  {
+                    "type": "object",
+                    "properties": {
+                      "metadata": {},
+                      "nvext": {
+                        "oneOf": [
+                          {
+                            "type": "null"
+                          },
+                          {
+                            "$ref": "#/components/schemas/NvExt"
+                          }
+                        ]
+                      }
+                    },
+                    "additionalProperties": {
+                      "description": "Catch-all for unsupported fields - checked during validation"
+                    }
+                  }
+                ]
+              },
+              "example": {
+                "model": "Qwen/Qwen3-0.6B",
+                "prompt": "Once upon a time",
+                "temperature": 0.7,
+                "max_tokens": 50,
+                "stream": false
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/v1/embeddings": {
+      "post": {
+        "summary": "Create embeddings",
+        "description": "Creates an embedding vector representing the input text. Compatible with OpenAI's embeddings API.",
+        "operationId": "post_v1_embeddings",
+        "requestBody": {
+          "description": "Embedding request with model and input text",
+          "content": {
+            "application/json": {
+              "schema": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/CreateEmbeddingRequest"
+                  },
+                  {
+                    "type": "object",
+                    "properties": {
+                      "nvext": {
+                        "oneOf": [
+                          {
+                            "type": "null"
+                          },
+                          {
+                            "$ref": "#/components/schemas/NvExt"
+                          }
+                        ]
+                      }
+                    }
+                  }
+                ]
+              },
+              "example": {
+                "model": "Qwen/Qwen3-Embedding-4B",
+                "input": "The quick brown fox jumps over the lazy dog"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/v1/models": {
+      "get": {
+        "summary": "List available models",
+        "description": "Lists the currently available models and provides basic information about each.",
+        "operationId": "get_v1_models",
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    },
+    "/v1/responses": {
+      "post": {
+        "summary": "Create response",
+        "description": "Creates a response for a given input. Compatible with OpenAI's responses API.",
+        "operationId": "post_v1_responses",
+        "requestBody": {
+          "description": "Response request with model and input",
+          "content": {
+            "application/json": {
+              "schema": {
+                "allOf": [
+                  {
+                    "$ref": "#/components/schemas/CreateResponse",
+                    "description": "Flattened CreateResponse fields (model, input, temperature, etc.)"
+                  },
+                  {
+                    "type": "object",
+                    "properties": {
+                      "nvext": {
+                        "oneOf": [
+                          {
+                            "type": "null"
+                          },
+                          {
+                            "$ref": "#/components/schemas/NvExt"
+                          }
+                        ]
+                      }
+                    }
+                  }
+                ]
+              },
+              "example": {
+                "model": "Qwen/Qwen3-0.6B",
+                "input": "What is the capital of France?"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "Successful response"
+          },
+          "400": {
+            "description": "Bad request - invalid input"
+          },
+          "404": {
+            "description": "Model not found"
+          },
+          "503": {
+            "description": "Service unavailable"
+          }
+        }
+      }
+    }
+  },
+  "components": {
+    "schemas": {
+      "AudioUrl": {
+        "type": "object",
+        "required": [
+          "url"
+        ],
+        "properties": {
+          "url": {
+            "type": "string",
+            "format": "uri",
+            "description": "URL of the audio file"
+          },
+          "uuid": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "format": "uuid",
+            "description": "Optional unique identifier for the audio."
+          }
+        }
+      },
+      "ChatCompletionAudio": {
+        "type": "object",
+        "required": [
+          "voice",
+          "format"
+        ],
+        "properties": {
+          "format": {
+            "$ref": "#/components/schemas/ChatCompletionAudioFormat",
+            "description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`."
+          },
+          "voice": {
+            "$ref": "#/components/schemas/ChatCompletionAudioVoice",
+            "description": "The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive)."
+          }
+        }
+      },
+      "ChatCompletionAudioFormat": {
+        "type": "string",
+        "enum": [
+          "wav",
+          "mp3",
+          "flac",
+          "opus",
+          "pcm16"
+        ]
+      },
+      "ChatCompletionAudioVoice": {
+        "type": "string",
+        "enum": [
+          "alloy",
+          "ash",
+          "ballad",
+          "coral",
+          "echo",
+          "sage",
+          "shimmer",
+          "verse"
+        ]
+      },
+      "ChatCompletionFunctionCall": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The model does not call a function, and responds to the end-user.",
+            "enum": [
+              "none"
+            ]
+          },
+          {
+            "type": "string",
+            "description": "The model can pick between an end-user or calling a function.",
+            "enum": [
+              "auto"
+            ]
+          },
+          {
+            "type": "object",
+            "description": "Forces the model to call the specified function.",
+            "required": [
+              "Function"
+            ],
+            "properties": {
+              "Function": {
+                "type": "object",
+                "description": "Forces the model to call the specified function.",
+                "required": [
+                  "name"
+                ],
+                "properties": {
+                  "name": {
+                    "type": "string"
+                  }
+                }
+              }
+            }
+          }
+        ]
+      },
+      "ChatCompletionFunctions": {
+        "type": "object",
+        "required": [
+          "name",
+          "parameters"
+        ],
+        "properties": {
+          "description": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A description of what the function does, used by the model to choose when and how to call the function."
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
+          },
+          "parameters": {
+            "description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.\n\nOmitting `parameters` defines a function with an empty parameter list."
+          }
+        },
+        "deprecated": true
+      },
+      "ChatCompletionMessageToolCall": {
+        "type": "object",
+        "required": [
+          "id",
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionCall",
+            "description": "The function that the model called."
+          },
+          "id": {
+            "type": "string",
+            "description": "The ID of the tool call."
+          },
+          "type": {
+            "$ref": "#/components/schemas/ChatCompletionToolType",
+            "description": "The type of the tool. Currently, only `function` is supported."
+          }
+        }
+      },
+      "ChatCompletionModalities": {
+        "type": "string",
+        "description": "Output types that you would like the model to generate for this request.\n\nMost models are capable of generating text, which is the default: `[\"text\"]`\n\nThe `gpt-4o-audio-preview` model can also be used to [generate\naudio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `[\"text\", \"audio\"]`",
+        "enum": [
+          "text",
+          "audio"
+        ]
+      },
+      "ChatCompletionNamedToolChoice": {
+        "type": "object",
+        "description": "Specifies a tool the model should use. Use to force the model to call a specific function.",
+        "required": [
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionName"
+          },
+          "type": {
+            "$ref": "#/components/schemas/ChatCompletionToolType",
+            "description": "The type of the tool. Currently, only `function` is supported."
+          }
+        }
+      },
+      "ChatCompletionRequestAssistantMessage": {
+        "type": "object",
+        "properties": {
+          "audio": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageAudio",
+                "description": "Data about a previous audio response from the model.\n[Learn more](https://platform.openai.com/docs/guides/audio)."
+              }
+            ]
+          },
+          "content": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageContent",
+                "description": "The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified."
+              }
+            ]
+          },
+          "function_call": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/FunctionCall",
+                "description": "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model."
+              }
+            ]
+          },
+          "name": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          },
+          "refusal": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The refusal message by the assistant."
+          },
+          "tool_calls": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionMessageToolCall"
+            }
+          }
+        }
+      },
+      "ChatCompletionRequestAssistantMessageAudio": {
+        "type": "object",
+        "required": [
+          "id"
+        ],
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for a previous audio response from the model."
+          }
+        }
+      },
+      "ChatCompletionRequestAssistantMessageContent": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The text contents of the message."
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageContentPart"
+            },
+            "description": "An array of content parts with a defined type. Can be one or more of type `text`, or exactly one of type `refusal`."
+          }
+        ]
+      },
+      "ChatCompletionRequestAssistantMessageContentPart": {
+        "oneOf": [
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "text"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartRefusal"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "refusal"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "ChatCompletionRequestDeveloperMessage": {
+        "type": "object",
+        "required": [
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/ChatCompletionRequestDeveloperMessageContent",
+            "description": "The contents of the developer message."
+          },
+          "name": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          }
+        }
+      },
+      "ChatCompletionRequestDeveloperMessageContent": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+            }
+          }
+        ]
+      },
+      "ChatCompletionRequestFunctionMessage": {
+        "type": "object",
+        "required": [
+          "name"
+        ],
+        "properties": {
+          "content": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The return value from the function call, to return to the model."
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the function to call."
+          }
+        }
+      },
+      "ChatCompletionRequestMessage": {
+        "oneOf": [
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestDeveloperMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "developer"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestSystemMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "system"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestUserMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "user"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestAssistantMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "assistant"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestToolMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "tool"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestFunctionMessage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "role"
+                ],
+                "properties": {
+                  "role": {
+                    "type": "string",
+                    "enum": [
+                      "function"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "ChatCompletionRequestMessageContentPartAudio": {
+        "type": "object",
+        "description": "Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).",
+        "required": [
+          "input_audio"
+        ],
+        "properties": {
+          "input_audio": {
+            "$ref": "#/components/schemas/InputAudio"
+          }
+        }
+      },
+      "ChatCompletionRequestMessageContentPartAudioUrl": {
+        "type": "object",
+        "required": [
+          "audio_url"
+        ],
+        "properties": {
+          "audio_url": {
+            "$ref": "#/components/schemas/AudioUrl"
+          }
+        }
+      },
+      "ChatCompletionRequestMessageContentPartImage": {
+        "type": "object",
+        "required": [
+          "image_url"
+        ],
+        "properties": {
+          "image_url": {
+            "$ref": "#/components/schemas/ImageUrl"
+          }
+        }
+      },
+      "ChatCompletionRequestMessageContentPartRefusal": {
+        "type": "object",
+        "required": [
+          "refusal"
+        ],
+        "properties": {
+          "refusal": {
+            "type": "string",
+            "description": "The refusal message generated by the model."
+          }
+        }
+      },
+      "ChatCompletionRequestMessageContentPartText": {
+        "type": "object",
+        "required": [
+          "text"
+        ],
+        "properties": {
+          "text": {
+            "type": "string"
+          }
+        }
+      },
+      "ChatCompletionRequestMessageContentPartVideo": {
+        "type": "object",
+        "required": [
+          "video_url"
+        ],
+        "properties": {
+          "video_url": {
+            "$ref": "#/components/schemas/VideoUrl"
+          }
+        }
+      },
+      "ChatCompletionRequestSystemMessage": {
+        "type": "object",
+        "required": [
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/ChatCompletionRequestSystemMessageContent",
+            "description": "The contents of the system message."
+          },
+          "name": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          }
+        }
+      },
+      "ChatCompletionRequestSystemMessageContent": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The text contents of the system message."
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestSystemMessageContentPart"
+            },
+            "description": "An array of content parts with a defined type. For system messages, only type `text` is supported."
+          }
+        ]
+      },
+      "ChatCompletionRequestSystemMessageContentPart": {
+        "oneOf": [
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "text"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "ChatCompletionRequestToolMessage": {
+        "type": "object",
+        "description": "Tool message",
+        "required": [
+          "content",
+          "tool_call_id"
+        ],
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/ChatCompletionRequestToolMessageContent",
+            "description": "The contents of the tool message."
+          },
+          "tool_call_id": {
+            "type": "string"
+          }
+        }
+      },
+      "ChatCompletionRequestToolMessageContent": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The text contents of the tool message."
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestToolMessageContentPart"
+            },
+            "description": "An array of content parts with a defined type. For tool messages, only type `text` is supported."
+          }
+        ]
+      },
+      "ChatCompletionRequestToolMessageContentPart": {
+        "oneOf": [
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "text"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "ChatCompletionRequestUserMessage": {
+        "type": "object",
+        "required": [
+          "content"
+        ],
+        "properties": {
+          "content": {
+            "$ref": "#/components/schemas/ChatCompletionRequestUserMessageContent",
+            "description": "The contents of the user message."
+          },
+          "name": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
+          }
+        }
+      },
+      "ChatCompletionRequestUserMessageContent": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The text contents of the message."
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestUserMessageContentPart"
+            },
+            "description": "An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text, image, or audio inputs."
+          }
+        ]
+      },
+      "ChatCompletionRequestUserMessageContentPart": {
+        "oneOf": [
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "text"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImage"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "image_url"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartVideo"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "video_url"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartAudioUrl"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "audio_url"
+                    ]
+                  }
+                }
+              }
+            ]
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartAudio"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "input_audio"
+                    ]
+                  }
+                }
+              }
+            ]
+          }
+        ]
+      },
+      "ChatCompletionStreamOptions": {
+        "type": "object",
+        "description": "Options for streaming response. Only set this when you set `stream: true`.",
+        "required": [
+          "include_usage"
+        ],
+        "properties": {
+          "include_usage": {
+            "type": "boolean",
+            "description": "If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value."
+          }
+        }
+      },
+      "ChatCompletionTool": {
+        "type": "object",
+        "required": [
+          "type",
+          "function"
+        ],
+        "properties": {
+          "function": {
+            "$ref": "#/components/schemas/FunctionObject"
+          },
+          "type": {
+            "$ref": "#/components/schemas/ChatCompletionToolType"
+          }
+        }
+      },
+      "ChatCompletionToolChoiceOption": {
+        "oneOf": [
+          {
+            "type": "string",
+            "enum": [
+              "none"
+            ]
+          },
+          {
+            "type": "string",
+            "enum": [
+              "auto"
+            ]
+          },
+          {
+            "type": "string",
+            "enum": [
+              "required"
+            ]
+          },
+          {
+            "type": "object",
+            "required": [
+              "named"
+            ],
+            "properties": {
+              "named": {
+                "$ref": "#/components/schemas/ChatCompletionNamedToolChoice"
+              }
+            }
+          }
+        ],
+        "description": "Controls which (if any) tool is called by the model.\n`none` means the model will not call any tool and instead generates a message.\n`auto` means the model can pick between generating a message or calling one or more tools.\n`required` means the model must call one or more tools.\nSpecifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.\n\n`none` is the default when no tools are present. `auto` is the default if tools are present."
+      },
+      "ChatCompletionToolType": {
+        "type": "string",
+        "enum": [
+          "function"
+        ]
+      },
+      "CommonExt": {
+        "type": "object",
+        "description": "Common extensions for OpenAI API requests that are not part of the standard OpenAI spec\nbut are commonly needed across different request types.",
+        "properties": {
+          "guided_choice": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "type": "string"
+            },
+            "description": "If specified, the output will be exactly one of the choices."
+          },
+          "guided_decoding_backend": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "If specified, the backend to use for guided decoding, can be backends like xgrammar or custom guided decoding backend"
+          },
+          "guided_grammar": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "If specified, the output will follow the context-free grammar. Can be a string or null."
+          },
+          "guided_json": {
+            "description": "Guided Decoding Options\nIf specified, the output will be a JSON object. Can be a string, an object, or null."
+          },
+          "guided_regex": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "If specified, the output will follow the regex pattern. Can be a string or null."
+          },
+          "guided_whitespace_pattern": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "If specified, the output will follow the whitespace pattern. Can be a string or null."
+          },
+          "ignore_eos": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "If true, the model will ignore the end of string token and generate to max_tokens.\nThis field can also be specified in nvext, but the root-level value takes precedence."
+          },
+          "include_stop_str_in_output": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "include_stop_str_in_output"
+          },
+          "min_p": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "Relative probability floor"
+          },
+          "min_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "The minimum number of tokens to generate.\nThis is a common parameter needed across different request types.",
+            "minimum": 0
+          },
+          "repetition_penalty": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "How much to penalize tokens based on how frequently they occur in the text.\nA value of 1 means no penalty, while values larger than 1 discourage and values smaller encourage."
+          },
+          "skip_special_tokens": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to skip special tokens in the decoded output.\nWhen true, special tokens (like EOS, BOS, PAD) are removed from the output text.\nWhen false, special tokens are included in the output text.\nDefaults to false if not specified."
+          },
+          "top_k": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "Integer that controls the number of top tokens to consider. Set to -1 to consider all tokens."
+          }
+        }
+      },
+      "CreateChatCompletionRequest": {
+        "type": "object",
+        "required": [
+          "messages",
+          "model"
+        ],
+        "properties": {
+          "audio": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionAudio",
+                "description": "Parameters for audio output. Required when audio output is requested with `modalities: [\"audio\"]`. [Learn more](https://platform.openai.com/docs/guides/audio)."
+              }
+            ]
+          },
+          "frequency_penalty": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
+          },
+          "function_call": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionFunctionCall",
+                "description": "Deprecated in favor of `tool_choice`.\n\nControls which (if any) function is called by the model.\n`none` means the model will not call a function and instead generates a message.\n`auto` means the model can pick between generating a message or calling a function.\nSpecifying a particular function via `{\"name\": \"my_function\"}` forces the model to call that function.\n\n`none` is the default when no functions are present. `auto` is the default if functions are present."
+              }
+            ]
+          },
+          "functions": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionFunctions"
+            },
+            "description": "Deprecated in favor of `tools`.\n\nA list of functions the model may generate JSON inputs for.",
+            "deprecated": true
+          },
+          "logit_bias": {
+            "type": [
+              "object",
+              "null"
+            ],
+            "description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.\nMathematically, the bias is added to the logits generated by the model prior to sampling.\nThe exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection;\nvalues like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
+            "additionalProperties": {},
+            "propertyNames": {
+              "type": "string"
+            }
+          },
+          "logprobs": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`."
+          },
+          "max_completion_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).",
+            "minimum": 0
+          },
+          "max_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the chat completion.\n\nThis value can be used to control [costs](https://openai.com/api/pricing/) for text generated via API.\nThis value is now deprecated in favor of `max_completion_tokens`, and is\nnot compatible with [o1 series models](https://platform.openai.com/docs/guides/reasoning).",
+            "deprecated": true,
+            "minimum": 0
+          },
+          "messages": {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestMessage"
+            },
+            "description": "A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message types (modalities) are supported, like [text](https://platform.openai.com/docs/guides/text-generation), [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio)."
+          },
+          "metadata": {
+            "description": "Developer-defined tags and values used for filtering completions in the [dashboard](https://platform.openai.com/chat-completions)."
+          },
+          "mm_processor_kwargs": {
+            "description": "Multimodal processor configuration parameters"
+          },
+          "modalities": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionModalities"
+            }
+          },
+          "model": {
+            "type": "string",
+            "description": "ID of the model to use.\nSee the [model endpoint compatibility](https://platform.openai.com/docs/models#model-endpoint-compatibility) table for details on which models work with the Chat API."
+          },
+          "n": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.",
+            "minimum": 0
+          },
+          "parallel_tool_calls": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use."
+          },
+          "prediction": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/PredictionContent",
+                "description": "Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/predicted-outputs),which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content."
+              }
+            ]
+          },
+          "presence_penalty": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
+          },
+          "reasoning_effort": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ReasoningEffort",
+                "description": "**o1 models only**\n\nConstrains effort on reasoning for\n[reasoning models](https://platform.openai.com/docs/guides/reasoning).\n\nCurrently supported values are `low`, `medium`, and `high`. Reducing\n\nreasoning effort can result in faster responses and fewer tokens\nused on reasoning in a response."
+              }
+            ]
+          },
+          "response_format": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ResponseFormat",
+                "description": "An object specifying the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
+              }
+            ]
+          },
+          "seed": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int64",
+            "description": " This feature is in Beta.\nIf specified, our system will make a best effort to sample deterministically, such that repeated requests\nwith the same `seed` and parameters should return the same result.\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend."
+          },
+          "service_tier": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ServiceTier",
+                "description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n- If set to 'auto', the system will utilize scale tier credits until they are exhausted.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- When not set, the default behavior is 'auto'.\n\nWhen this parameter is set, the response body will include the `service_tier` utilized."
+              }
+            ]
+          },
+          "stop": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/Stop",
+                "description": "Up to 4 sequences where the API will stop generating further tokens."
+              }
+            ]
+          },
+          "store": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether or not to store the output of this chat completion request\n\nfor use in our [model distillation](https://platform.openai.com/docs/guides/distillation) or [evals](https://platform.openai.com/docs/guides/evals) products."
+          },
+          "stream": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "If set, partial message deltas will be sent, like in ChatGPT.\nTokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)\nas they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions)."
+          },
+          "stream_options": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionStreamOptions"
+              }
+            ]
+          },
+          "temperature": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,\nwhile lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both."
+          },
+          "tool_choice": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionToolChoiceOption"
+              }
+            ]
+          },
+          "tools": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionTool"
+            },
+            "description": "A list of tools the model may call. Currently, only functions are supported as a tool.\nUse this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported."
+          },
+          "top_logprobs": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.",
+            "minimum": 0
+          },
+          "top_p": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling,\nwhere the model considers the results of the tokens with top_p probability mass.\nSo 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\n We generally recommend altering this or `temperature` but not both."
+          },
+          "user": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids)."
+          },
+          "web_search_options": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/WebSearchOptions",
+                "description": "This tool searches the web for relevant results to use in a response.\nLearn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat)."
+              }
+            ]
+          }
+        }
+      },
+      "CreateCompletionRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "prompt"
+        ],
+        "properties": {
+          "best_of": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "Generates `best_of` completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\n\nWhen used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.",
+            "minimum": 0
+          },
+          "echo": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Echo back the prompt in addition to the completion"
+          },
+          "frequency_penalty": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)"
+          },
+          "logit_bias": {
+            "type": [
+              "object",
+              "null"
+            ],
+            "description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n\nAs an example, you can pass `{\"50256\": -100}` to prevent the <|endoftext|> token from being generated.",
+            "additionalProperties": {},
+            "propertyNames": {
+              "type": "string"
+            }
+          },
+          "logprobs": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.\n\nThe maximum value for `logprobs` is 5.",
+            "minimum": 0
+          },
+          "max_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.",
+            "minimum": 0
+          },
+          "model": {
+            "type": "string",
+            "description": "ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them."
+          },
+          "n": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "How many completions to generate for each prompt.\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n",
+            "minimum": 0
+          },
+          "presence_penalty": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)"
+          },
+          "prompt": {
+            "$ref": "#/components/schemas/Prompt",
+            "description": "The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.\n\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document."
+          },
+          "seed": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int64",
+            "description": "If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\n\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend."
+          },
+          "stop": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/Stop",
+                "description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
+              }
+            ]
+          },
+          "stream": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)\nas they become available, with the stream terminated by a `data: [DONE]` message."
+          },
+          "stream_options": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ChatCompletionStreamOptions"
+              }
+            ]
+          },
+          "suffix": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The suffix that comes after a completion of inserted text.\n\nThis parameter is only supported for `gpt-3.5-turbo-instruct`."
+          },
+          "temperature": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both."
+          },
+          "top_p": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\n We generally recommend altering this or `temperature` but not both."
+          },
+          "user": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids)."
+          }
+        }
+      },
+      "CreateEmbeddingRequest": {
+        "type": "object",
+        "required": [
+          "model",
+          "input"
+        ],
+        "properties": {
+          "dimensions": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.",
+            "minimum": 0
+          },
+          "encoding_format": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/EncodingFormat",
+                "description": "The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). Defaults to float"
+              }
+            ]
+          },
+          "input": {
+            "$ref": "#/components/schemas/EmbeddingInput",
+            "description": "Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens."
+          },
+          "model": {
+            "type": "string",
+            "description": "ID of the model to use. You can use the\n[List models](https://platform.openai.com/docs/api-reference/models/list)\nAPI to see all of your available models, or see our\n[Model overview](https://platform.openai.com/docs/models/overview)\nfor descriptions of them."
+          },
+          "user": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A unique identifier representing your end-user, which will help OpenAI\n to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids)."
+          }
+        }
+      },
+      "CreateResponse": {
+        "type": "object",
+        "description": "Builder for a Responses API request.",
+        "required": [
+          "input",
+          "model"
+        ],
+        "properties": {
+          "background": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to run the model response in the background.\nboolean or null."
+          },
+          "include": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "type": "string"
+            },
+            "description": "Specify additional output data to include in the model response.\n\nSupported values:\n- `file_search_call.results`\n  Include the search results of the file search tool call.\n- `message.input_image.image_url`\n  Include image URLs from the input message.\n- `computer_call_output.output.image_url`\n  Include image URLs from the computer call output.\n- `reasoning.encrypted_content`\n  Include an encrypted version of reasoning tokens in reasoning item outputs.\n  This enables reasoning items to be used in multi-turn conversations when\n  using the Responses API statelessly (for example, when the `store` parameter\n  is set to `false`, or when an organization is enrolled in the zero-data-\n  retention program).\n\nIf `None`, no additional data is returned."
+          },
+          "input": {
+            "type": "object",
+            "description": "Text, image, or file inputs to the model, used to generate a response.\nUsing value_type to prevent deep schema recursion from Input's nested content types."
+          },
+          "instructions": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "Inserts a system (or developer) message as the first item in the model's context.\n\nWhen using along with previous_response_id, the instructions from a previous response will\nnot be carried over to the next response. This makes it simple to swap out system\n(or developer) messages in new responses."
+          },
+          "max_output_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "An upper bound for the number of tokens that can be generated for a\nresponse, including visible output tokens and reasoning tokens.",
+            "minimum": 0
+          },
+          "max_tool_calls": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "The maximum number of total calls to built-in tools that can be processed in a response.\nThis maximum number applies across all built-in tool calls, not per individual tool.\nAny further attempts to call a tool by the model will be ignored.",
+            "minimum": 0
+          },
+          "metadata": {
+            "description": "Arbitrary JSON metadata used as a passthrough parameter"
+          },
+          "model": {
+            "type": "string",
+            "description": "Model ID used to generate the response, like `gpt-4o`.\nOpenAI offers a wide range of models with different capabilities,\nperformance characteristics, and price points."
+          },
+          "parallel_tool_calls": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to allow the model to run tool calls in parallel."
+          },
+          "previous_response_id": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The unique ID of the previous response to the model. Use this to create\nmulti-turn conversations."
+          },
+          "prompt": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/PromptConfig",
+                "description": "Reference to a prompt template and its variables."
+              }
+            ]
+          },
+          "reasoning": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ReasoningConfig",
+                "description": "**o-series models only**: Configuration options for reasoning models."
+              }
+            ]
+          },
+          "service_tier": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ServiceTier",
+                "description": "Specifies the latency tier to use for processing the request.\n\nThis parameter is relevant for customers subscribed to the Scale tier service.\n\nSupported values:\n- `auto`\n  - If the Project is Scale tier enabled, the system will utilize Scale tier credits until\n    they are exhausted.\n  - If the Project is not Scale tier enabled, the request will be processed using the\n    default service tier with a lower uptime SLA and no latency guarantee.\n- `default`\n  The request will be processed using the default service tier with a lower uptime SLA and\n  no latency guarantee.\n- `flex`\n  The request will be processed with the Flex Processing service tier. Learn more.\n\nWhen not set, the default behavior is `auto`.\n\nWhen this parameter is set, the response body will include the `service_tier` utilized."
+              }
+            ]
+          },
+          "store": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to store the generated model response for later retrieval via API."
+          },
+          "stream": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "If set to true, the model response data will be streamed to the client as it is\ngenerated using server-sent events."
+          },
+          "temperature": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8\nwill make the output more random, while lower values like 0.2 will make it\nmore focused and deterministic. We generally recommend altering this or\n`top_p` but not both."
+          },
+          "text": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/TextConfig",
+                "description": "Configuration options for a text response from the model. Can be plain text\nor structured JSON data."
+              }
+            ]
+          },
+          "tool_choice": {
+            "type": "object",
+            "description": "How the model should select which tool (or tools) to use when generating\na response."
+          },
+          "tools": {
+            "type": "array",
+            "items": {
+              "type": "object"
+            },
+            "description": "An array of tools the model may call while generating a response.\nCan include built-in tools (file_search, web_search_preview,\ncomputer_use_preview) or custom function definitions."
+          },
+          "top_logprobs": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "An integer between 0 and 20 specifying the number of most likely tokens to return\nat each token position, each with an associated log probability.",
+            "minimum": 0
+          },
+          "top_p": {
+            "type": [
+              "number",
+              "null"
+            ],
+            "format": "float",
+            "description": "An alternative to sampling with temperature, called nucleus sampling,\nwhere the model considers the results of the tokens with top_p probability\nmass. So 0.1 means only the tokens comprising the top 10% probability mass\nare considered. We generally recommend altering this or `temperature` but\nnot both."
+          },
+          "truncation": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/Truncation",
+                "description": "The truncation strategy to use for the model response:\n- `auto`: drop items in the middle to fit context window.\n- `disabled`: error if exceeding context window."
+              }
+            ]
+          },
+          "user": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A unique identifier representing your end-user, which can help OpenAI to\nmonitor and detect abuse."
+          }
+        }
+      },
+      "EmbeddingInput": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "int32",
+              "minimum": 0
+            }
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "array",
+              "items": {
+                "type": "integer",
+                "format": "int32",
+                "minimum": 0
+              }
+            }
+          }
+        ]
+      },
+      "EncodingFormat": {
+        "type": "string",
+        "enum": [
+          "float",
+          "base64"
+        ]
+      },
+      "FunctionCall": {
+        "type": "object",
+        "description": "The name and arguments of a function that should be called, as generated by the model.",
+        "required": [
+          "name",
+          "arguments"
+        ],
+        "properties": {
+          "arguments": {
+            "type": "string",
+            "description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the function to call."
+          }
+        }
+      },
+      "FunctionName": {
+        "type": "object",
+        "required": [
+          "name"
+        ],
+        "properties": {
+          "name": {
+            "type": "string",
+            "description": "The name of the function to call."
+          }
+        }
+      },
+      "FunctionObject": {
+        "type": "object",
+        "required": [
+          "name"
+        ],
+        "properties": {
+          "description": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A description of what the function does, used by the model to choose when and how to call the function."
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
+          },
+          "parameters": {
+            "description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.\n\nOmitting `parameters` defines a function with an empty parameter list."
+          },
+          "strict": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to enable strict schema adherence when generating the function call. If set to true, the model will follow the exact schema defined in the `parameters` field. Only a subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured Outputs in the [function calling guide](https://platform.openai.com/docs/guides/function-calling)."
+          }
+        }
+      },
+      "ImageDetail": {
+        "type": "string",
+        "enum": [
+          "auto",
+          "low",
+          "high"
+        ]
+      },
+      "ImageUrl": {
+        "type": "object",
+        "required": [
+          "url"
+        ],
+        "properties": {
+          "detail": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ImageDetail",
+                "description": "Specifies the detail level of the image. Learn more in the [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding)."
+              }
+            ]
+          },
+          "url": {
+            "type": "string",
+            "format": "uri",
+            "description": "Either a URL of the image or the base64 encoded image data."
+          },
+          "uuid": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "format": "uuid",
+            "description": "Optional unique identifier for the image."
+          }
+        }
+      },
+      "InputAudio": {
+        "type": "object",
+        "required": [
+          "data",
+          "format"
+        ],
+        "properties": {
+          "data": {
+            "type": "string",
+            "description": "Base64 encoded audio data."
+          },
+          "format": {
+            "$ref": "#/components/schemas/InputAudioFormat",
+            "description": "The format of the encoded audio data. Currently supports \"wav\" and \"mp3\"."
+          }
+        }
+      },
+      "InputAudioFormat": {
+        "type": "string",
+        "enum": [
+          "wav",
+          "mp3"
+        ]
+      },
+      "NvCreateChatCompletionRequest": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/CreateChatCompletionRequest"
+          },
+          {
+            "$ref": "#/components/schemas/CommonExt"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "chat_template_args": {
+                "type": [
+                  "object",
+                  "null"
+                ],
+                "description": "Extra args to pass to the chat template rendering context",
+                "additionalProperties": {},
+                "propertyNames": {
+                  "type": "string"
+                }
+              },
+              "nvext": {
+                "oneOf": [
+                  {
+                    "type": "null"
+                  },
+                  {
+                    "$ref": "#/components/schemas/NvExt"
+                  }
+                ]
+              }
+            },
+            "additionalProperties": {
+              "description": "Catch-all for unsupported fields - checked during validation"
+            }
+          }
+        ],
+        "description": "A request structure for creating a chat completion, extending OpenAI's\n`CreateChatCompletionRequest` with [`NvExt`] extensions and common fields.\n\n# Fields\n- `inner`: The base OpenAI chat completion request, embedded using `serde(flatten)`.\n- `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.\n- `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.\n  Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence."
+      },
+      "NvCreateCompletionRequest": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/CreateCompletionRequest"
+          },
+          {
+            "$ref": "#/components/schemas/CommonExt"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "metadata": {},
+              "nvext": {
+                "oneOf": [
+                  {
+                    "type": "null"
+                  },
+                  {
+                    "$ref": "#/components/schemas/NvExt"
+                  }
+                ]
+              }
+            },
+            "additionalProperties": {
+              "description": "Catch-all for unsupported fields - checked during validation"
+            }
+          }
+        ]
+      },
+      "NvCreateEmbeddingRequest": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/CreateEmbeddingRequest"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "nvext": {
+                "oneOf": [
+                  {
+                    "type": "null"
+                  },
+                  {
+                    "$ref": "#/components/schemas/NvExt"
+                  }
+                ]
+              }
+            }
+          }
+        ]
+      },
+      "NvCreateResponse": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/CreateResponse",
+            "description": "Flattened CreateResponse fields (model, input, temperature, etc.)"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "nvext": {
+                "oneOf": [
+                  {
+                    "type": "null"
+                  },
+                  {
+                    "$ref": "#/components/schemas/NvExt"
+                  }
+                ]
+              }
+            }
+          }
+        ]
+      },
+      "NvExt": {
+        "type": "object",
+        "description": "NVIDIA LLM extensions to the OpenAI API",
+        "properties": {
+          "annotations": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "type": "string"
+            },
+            "description": "Annotations\nUser requests triggers which result in the request issue back out-of-band information in the SSE\nstream using the `event:` field."
+          },
+          "backend_instance_id": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int64",
+            "description": "Targeted backend instance ID for the request\nIf set, the request will be routed to backend instance with the given ID.\nIf not set, the request will be routed to the best matching instance.",
+            "minimum": 0
+          },
+          "extra_fields": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "type": "string"
+            },
+            "description": "Extra fields to be included in the response's nvext\nThis is a list of field names that should be populated in the response\nSupported fields: \"worker_id\""
+          },
+          "greed_sampling": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "If true, sampling will be forced to be greedy.\nThe backend is responsible for selecting the correct backend-specific options to\nimplement this."
+          },
+          "max_thinking_tokens": {
+            "type": [
+              "integer",
+              "null"
+            ],
+            "format": "int32",
+            "description": "Maximum number of thinking tokens allowed\nNOTE: Currently passed through to backends as a no-op for future implementation",
+            "minimum": 0
+          },
+          "token_data": {
+            "type": [
+              "array",
+              "null"
+            ],
+            "items": {
+              "type": "integer",
+              "format": "int32",
+              "minimum": 0
+            },
+            "description": "Pre-tokenized data to use instead of tokenizing the prompt\nIf provided along with backend_instance_id, these tokens will be used directly\nand tokenization will be skipped."
+          },
+          "use_raw_prompt": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "If true, the preproessor will try to bypass the prompt template and pass the prompt directly to\nto the tokenizer."
+          }
+        }
+      },
+      "PredictionContent": {
+        "oneOf": [
+          {
+            "type": "object",
+            "description": "The type of the predicted content you want to provide. This type is\ncurrently always `content`.",
+            "required": [
+              "content",
+              "type"
+            ],
+            "properties": {
+              "content": {
+                "$ref": "#/components/schemas/PredictionContentContent",
+                "description": "The type of the predicted content you want to provide. This type is\ncurrently always `content`."
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "content"
+                ]
+              }
+            }
+          }
+        ],
+        "description": "Static predicted output content, such as the content of a text file that is being regenerated."
+      },
+      "PredictionContentContent": {
+        "oneOf": [
+          {
+            "type": "string",
+            "description": "The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes."
+          },
+          {
+            "type": "array",
+            "items": {
+              "$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
+            },
+            "description": "An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text inputs."
+          }
+        ],
+        "description": "The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly."
+      },
+      "Prompt": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "integer",
+              "format": "int32",
+              "minimum": 0
+            }
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "array",
+              "items": {
+                "type": "integer",
+                "format": "int32",
+                "minimum": 0
+              }
+            }
+          }
+        ]
+      },
+      "PromptConfig": {
+        "type": "object",
+        "description": "Service tier request options.",
+        "required": [
+          "id"
+        ],
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "The unique identifier of the prompt template to use."
+          },
+          "variables": {
+            "type": [
+              "object",
+              "null"
+            ],
+            "description": "Optional map of values to substitute in for variables in your prompt. The substitution\nvalues can either be strings, or other Response input types like images or files.\nFor now only supporting Strings.",
+            "additionalProperties": {
+              "type": "string"
+            },
+            "propertyNames": {
+              "type": "string"
+            }
+          },
+          "version": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "Optional version of the prompt template."
+          }
+        }
+      },
+      "ReasoningConfig": {
+        "type": "object",
+        "description": "o-series reasoning settings.",
+        "properties": {
+          "effort": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ReasoningEffort",
+                "description": "Constrain effort on reasoning."
+              }
+            ]
+          },
+          "summary": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ReasoningSummary",
+                "description": "Summary mode for reasoning."
+              }
+            ]
+          }
+        }
+      },
+      "ReasoningEffort": {
+        "type": "string",
+        "enum": [
+          "minimal",
+          "low",
+          "medium",
+          "high"
+        ]
+      },
+      "ReasoningSummary": {
+        "type": "string",
+        "enum": [
+          "auto",
+          "concise",
+          "detailed"
+        ]
+      },
+      "ResponseFormat": {
+        "oneOf": [
+          {
+            "type": "object",
+            "description": "The type of response format being defined: `text`",
+            "required": [
+              "type"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "text"
+                ]
+              }
+            }
+          },
+          {
+            "type": "object",
+            "description": "The type of response format being defined: `json_object`",
+            "required": [
+              "type"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "json_object"
+                ]
+              }
+            }
+          },
+          {
+            "type": "object",
+            "description": "The type of response format being defined: `json_schema`",
+            "required": [
+              "json_schema",
+              "type"
+            ],
+            "properties": {
+              "json_schema": {
+                "$ref": "#/components/schemas/ResponseFormatJsonSchema"
+              },
+              "type": {
+                "type": "string",
+                "enum": [
+                  "json_schema"
+                ]
+              }
+            }
+          }
+        ]
+      },
+      "ResponseFormatJsonSchema": {
+        "type": "object",
+        "required": [
+          "name"
+        ],
+        "properties": {
+          "description": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "A description of what the response format is for, used by the model to determine how to respond in the format."
+          },
+          "name": {
+            "type": "string",
+            "description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
+          },
+          "schema": {
+            "description": "The schema for the response format, described as a JSON Schema object."
+          },
+          "strict": {
+            "type": [
+              "boolean",
+              "null"
+            ],
+            "description": "Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs)."
+          }
+        }
+      },
+      "ServiceTier": {
+        "type": "string",
+        "description": "Service tier request options.",
+        "enum": [
+          "auto",
+          "default",
+          "flex"
+        ]
+      },
+      "Stop": {
+        "oneOf": [
+          {
+            "type": "string"
+          },
+          {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        ]
+      },
+      "TextConfig": {
+        "type": "object",
+        "description": "Configuration for text response format.",
+        "required": [
+          "format"
+        ],
+        "properties": {
+          "format": {
+            "$ref": "#/components/schemas/TextResponseFormat",
+            "description": "Defines the format: plain text, JSON object, or JSON schema."
+          }
+        }
+      },
+      "TextResponseFormat": {
+        "oneOf": [
+          {
+            "type": "object",
+            "description": "The type of response format being defined: `text`",
+            "required": [
+              "type"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "text"
+                ]
+              }
+            }
+          },
+          {
+            "type": "object",
+            "description": "The type of response format being defined: `json_object`",
+            "required": [
+              "type"
+            ],
+            "properties": {
+              "type": {
+                "type": "string",
+                "enum": [
+                  "json_object"
+                ]
+              }
+            }
+          },
+          {
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/ResponseFormatJsonSchema",
+                "description": "The type of response format being defined: `json_schema`"
+              },
+              {
+                "type": "object",
+                "required": [
+                  "type"
+                ],
+                "properties": {
+                  "type": {
+                    "type": "string",
+                    "enum": [
+                      "json_schema"
+                    ]
+                  }
+                }
+              }
+            ],
+            "description": "The type of response format being defined: `json_schema`"
+          }
+        ]
+      },
+      "Truncation": {
+        "type": "string",
+        "description": "Truncation strategies.",
+        "enum": [
+          "auto",
+          "disabled"
+        ]
+      },
+      "VideoUrl": {
+        "type": "object",
+        "required": [
+          "url"
+        ],
+        "properties": {
+          "detail": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/ImageDetail",
+                "description": "Specifies the detail level of the video processing."
+              }
+            ]
+          },
+          "url": {
+            "type": "string",
+            "format": "uri",
+            "description": "Either a URL of the video or the base64 encoded video data."
+          },
+          "uuid": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "format": "uuid",
+            "description": "Optional unique identifier for the video."
+          }
+        }
+      },
+      "WebSearchContextSize": {
+        "type": "string",
+        "description": "The amount of context window space to use for the search.",
+        "enum": [
+          "low",
+          "medium",
+          "high"
+        ]
+      },
+      "WebSearchLocation": {
+        "type": "object",
+        "description": "Approximate location parameters for the search.",
+        "properties": {
+          "city": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "Free text input for the city of the user, e.g. `San Francisco`."
+          },
+          "country": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`."
+          },
+          "region": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "Free text input for the region of the user, e.g. `California`."
+          },
+          "timezone": {
+            "type": [
+              "string",
+              "null"
+            ],
+            "description": "The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. `America/Los_Angeles`."
+          }
+        }
+      },
+      "WebSearchOptions": {
+        "type": "object",
+        "description": "Options for the web search tool.",
+        "properties": {
+          "search_context_size": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/WebSearchContextSize",
+                "description": "High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default."
+              }
+            ]
+          },
+          "user_location": {
+            "oneOf": [
+              {
+                "type": "null"
+              },
+              {
+                "$ref": "#/components/schemas/WebSearchUserLocation",
+                "description": "Approximate location parameters for the search."
+              }
+            ]
+          }
+        }
+      },
+      "WebSearchUserLocation": {
+        "type": "object",
+        "required": [
+          "type",
+          "approximate"
+        ],
+        "properties": {
+          "approximate": {
+            "$ref": "#/components/schemas/WebSearchLocation"
+          },
+          "type": {
+            "$ref": "#/components/schemas/WebSearchUserLocationType"
+          }
+        }
+      },
+      "WebSearchUserLocationType": {
+        "type": "string",
+        "enum": [
+          "approximate"
+        ]
+      }
+    }
+  }
+}
\ No newline at end of file
--- a/lib/async-openai/Cargo.toml
+++ b/lib/async-openai/Cargo.toml
@@ -59,6 +59,7 @@ secrecy = { version = "0.10.3", features = ["serde"] }
 bytes = "1.9.0"
 eventsource-stream = "0.2.3"
 tokio-tungstenite = { version = "0.26.1", optional = true, default-features = false }
+utoipa = { version = "5.3", features = ["url", "uuid"] }

 [dev-dependencies]
 tokio-test = "0.4.4"

--- a/lib/async-openai/src/types/chat.rs
+++ b/lib/async-openai/src/types/chat.rs
@@ -13,13 +13,14 @@ use std::{collections::HashMap, pin::Pin};
 use derive_builder::Builder;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;

 use url::Url;
 use uuid::{Uuid, uuid};

 use crate::error::OpenAIError;

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum Prompt {
    String(String),
@@ -29,14 +30,14 @@ pub enum Prompt {
    ArrayOfIntegerArray(Vec<Vec<u32>>),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum Stop {
    String(String),           // nullable: true
    StringArray(Vec<String>), // minItems: 1; maxItems: 4
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct Logprobs {
    pub tokens: Vec<String>,
    pub token_logprobs: Vec<Option<f32>>, // Option is to account for null value in the list
@@ -44,7 +45,7 @@ pub struct Logprobs {
    pub text_offset: Vec<u32>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum CompletionFinishReason {
    Stop,
@@ -52,7 +53,7 @@ pub enum CompletionFinishReason {
    ContentFilter,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct Choice {
    pub text: String,
    pub index: u32,
@@ -62,7 +63,7 @@ pub struct Choice {
    pub finish_reason: Option<CompletionFinishReason>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub enum ChatCompletionFunctionCall {
    /// The model does not call a function, and responds to the end-user.
    #[serde(rename = "none")]
@@ -79,7 +80,7 @@ pub enum ChatCompletionFunctionCall {
    Function { name: String },
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
    System,
@@ -91,7 +92,7 @@ pub enum Role {
 }

 /// The name and arguments of a function that should be called, as generated by the model.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct FunctionCall {
    /// The name of the function to call.
    pub name: String,
@@ -100,7 +101,7 @@ pub struct FunctionCall {
 }

 /// Usage statistics for the completion request.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
 pub struct CompletionUsage {
    /// Number of tokens in the prompt.
    pub prompt_tokens: u32,
@@ -117,7 +118,7 @@ pub struct CompletionUsage {
 }

 /// Breakdown of tokens used in a completion.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
 pub struct PromptTokensDetails {
    /// Audio input tokens present in the prompt.
    pub audio_tokens: Option<u32>,
@@ -126,7 +127,7 @@ pub struct PromptTokensDetails {
 }

 /// Breakdown of tokens used in a completion.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
 pub struct CompletionTokensDetails {
    pub accepted_prediction_tokens: Option<u32>,
    /// Audio input tokens generated by the model.
@@ -141,7 +142,7 @@ pub struct CompletionTokensDetails {
    pub rejected_prediction_tokens: Option<u32>,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestDeveloperMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -156,14 +157,14 @@ pub struct ChatCompletionRequestDeveloperMessage {
    pub name: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionRequestDeveloperMessageContent {
    Text(String),
    Array(Vec<ChatCompletionRequestMessageContentPartText>),
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestSystemMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -177,7 +178,7 @@ pub struct ChatCompletionRequestSystemMessage {
    pub name: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestMessageContentPartTextArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -187,13 +188,13 @@ pub struct ChatCompletionRequestMessageContentPartText {
    pub text: String,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 pub struct ChatCompletionRequestMessageContentPartRefusal {
    /// The refusal message generated by the model.
    pub refusal: String,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageDetail {
    #[default]
@@ -202,7 +203,7 @@ pub enum ImageDetail {
    High,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "ImageUrlArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -218,7 +219,7 @@ pub struct ImageUrl {
    pub uuid: Option<uuid::Uuid>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "VideoUrlArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -234,7 +235,7 @@ pub struct VideoUrl {
    pub uuid: Option<uuid::Uuid>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestMessageContentPartImageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -244,7 +245,7 @@ pub struct ChatCompletionRequestMessageContentPartImage {
    pub image_url: ImageUrl,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestMessageContentPartVideoArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -254,7 +255,7 @@ pub struct ChatCompletionRequestMessageContentPartVideo {
    pub video_url: VideoUrl,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "AudioUrlArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -268,7 +269,7 @@ pub struct AudioUrl {
    pub uuid: Option<uuid::Uuid>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestMessageContentPartAudioUrlArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option))]
@@ -278,7 +279,7 @@ pub struct ChatCompletionRequestMessageContentPartAudioUrl {
    pub audio_url: AudioUrl,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum InputAudioFormat {
    Wav,
@@ -286,7 +287,7 @@ pub enum InputAudioFormat {
    Mp3,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 pub struct InputAudio {
    /// Base64 encoded audio data.
    pub data: String,
@@ -295,7 +296,7 @@ pub struct InputAudio {
 }

 /// Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestMessageContentPartAudioArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -305,7 +306,7 @@ pub struct ChatCompletionRequestMessageContentPartAudio {
    pub input_audio: InputAudio,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum ChatCompletionRequestUserMessageContentPart {
@@ -316,14 +317,14 @@ pub enum ChatCompletionRequestUserMessageContentPart {
    InputAudio(ChatCompletionRequestMessageContentPartAudio),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum ChatCompletionRequestSystemMessageContentPart {
    Text(ChatCompletionRequestMessageContentPartText),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum ChatCompletionRequestAssistantMessageContentPart {
@@ -331,14 +332,14 @@ pub enum ChatCompletionRequestAssistantMessageContentPart {
    Refusal(ChatCompletionRequestMessageContentPartRefusal),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[serde(rename_all = "snake_case")]
 pub enum ChatCompletionRequestToolMessageContentPart {
    Text(ChatCompletionRequestMessageContentPartText),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionRequestSystemMessageContent {
    /// The text contents of the system message.
@@ -347,7 +348,7 @@ pub enum ChatCompletionRequestSystemMessageContent {
    Array(Vec<ChatCompletionRequestSystemMessageContentPart>),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionRequestUserMessageContent {
    /// The text contents of the message.
@@ -356,7 +357,7 @@ pub enum ChatCompletionRequestUserMessageContent {
    Array(Vec<ChatCompletionRequestUserMessageContentPart>),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionRequestAssistantMessageContent {
    /// The text contents of the message.
@@ -365,7 +366,7 @@ pub enum ChatCompletionRequestAssistantMessageContent {
    Array(Vec<ChatCompletionRequestAssistantMessageContentPart>),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ChatCompletionRequestToolMessageContent {
    /// The text contents of the tool message.
@@ -374,7 +375,7 @@ pub enum ChatCompletionRequestToolMessageContent {
    Array(Vec<ChatCompletionRequestToolMessageContentPart>),
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestUserMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -388,13 +389,13 @@ pub struct ChatCompletionRequestUserMessage {
    pub name: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 pub struct ChatCompletionRequestAssistantMessageAudio {
    /// Unique identifier for a previous audio response from the model.
    pub id: String,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestAssistantMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -423,7 +424,7 @@ pub struct ChatCompletionRequestAssistantMessage {
 }

 /// Tool message
-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestToolMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -435,7 +436,7 @@ pub struct ChatCompletionRequestToolMessage {
    pub tool_call_id: String,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
 #[builder(name = "ChatCompletionRequestFunctionMessageArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -448,7 +449,7 @@ pub struct ChatCompletionRequestFunctionMessage {
    pub name: String,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "role")]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionRequestMessage {
@@ -460,7 +461,7 @@ pub enum ChatCompletionRequestMessage {
    Function(ChatCompletionRequestFunctionMessage),
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionMessageToolCall {
    /// The ID of the tool call.
    pub id: String,
@@ -470,7 +471,7 @@ pub struct ChatCompletionMessageToolCall {
    pub function: FunctionCall,
 }

-#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
 pub struct ChatCompletionResponseMessageAudio {
    /// Unique identifier for this audio response.
    pub id: String,
@@ -483,7 +484,7 @@ pub struct ChatCompletionResponseMessageAudio {
 }

 /// A chat completion message generated by the model.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionResponseMessage {
    /// The contents of the message.
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -512,7 +513,7 @@ pub struct ChatCompletionResponseMessage {
    pub reasoning_content: Option<String>,
 }

-#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
 #[builder(name = "ChatCompletionFunctionsArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -531,7 +532,7 @@ pub struct ChatCompletionFunctions {
    pub parameters: serde_json::Value,
 }

-#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
 #[builder(name = "FunctionObjectArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -554,7 +555,7 @@ pub struct FunctionObject {
    pub strict: Option<bool>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ResponseFormat {
    /// The type of response format being defined: `text`
@@ -567,7 +568,7 @@ pub enum ResponseFormat {
    },
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ResponseFormatJsonSchema {
    /// A description of what the response format is for, used by the model to determine how to respond in the format.
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -582,14 +583,14 @@ pub struct ResponseFormatJsonSchema {
    pub strict: Option<bool>,
 }

-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionToolType {
    #[default]
    Function,
 }

-#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
 #[builder(name = "ChatCompletionToolArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -601,14 +602,14 @@ pub struct ChatCompletionTool {
    pub function: FunctionObject,
 }

-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
 pub struct FunctionName {
    /// The name of the function to call.
    pub name: String,
 }

 /// Specifies a tool the model should use. Use to force the model to call a specific function.
-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionNamedToolChoice {
    /// The type of the tool. Currently, only `function` is supported.
    pub r#type: ChatCompletionToolType,
@@ -623,7 +624,7 @@ pub struct ChatCompletionNamedToolChoice {
 /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
 ///
 /// `none` is the default when no tools are present. `auto` is the default if tools are present.
-#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionToolChoiceOption {
    #[default]
@@ -634,7 +635,7 @@ pub enum ChatCompletionToolChoiceOption {
    Named(ChatCompletionNamedToolChoice),
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
 #[serde(rename_all = "lowercase")]
 /// The amount of context window space to use for the search.
 pub enum WebSearchContextSize {
@@ -644,14 +645,14 @@ pub enum WebSearchContextSize {
    High,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum WebSearchUserLocationType {
    Approximate,
 }

 /// Approximate location parameters for the search.
-#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
 pub struct WebSearchLocation {
    ///  The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`.
    pub country: Option<String>,
@@ -663,7 +664,7 @@ pub struct WebSearchLocation {
    pub timezone: Option<String>,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct WebSearchUserLocation {
    //  The type of location approximation. Always `approximate`.
    pub r#type: WebSearchUserLocationType,
@@ -672,7 +673,7 @@ pub struct WebSearchUserLocation {
 }

 /// Options for the web search tool.
-#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
 pub struct WebSearchOptions {
    /// High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default.
    pub search_context_size: Option<WebSearchContextSize>,
@@ -681,7 +682,7 @@ pub struct WebSearchOptions {
    pub user_location: Option<WebSearchUserLocation>,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ServiceTier {
    Auto,
@@ -691,7 +692,7 @@ pub enum ServiceTier {
    Priority,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ServiceTierResponse {
    Scale,
@@ -700,7 +701,7 @@ pub enum ServiceTierResponse {
    Priority,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ReasoningEffort {
    Minimal,
@@ -715,7 +716,7 @@ pub enum ReasoningEffort {
 ///
 /// The `gpt-4o-audio-preview` model can also be used to [generate
 /// audio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `["text", "audio"]`
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionModalities {
    Text,
@@ -723,7 +724,7 @@ pub enum ChatCompletionModalities {
 }

 /// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(untagged)]
 pub enum PredictionContentContent {
    /// The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
@@ -733,7 +734,7 @@ pub enum PredictionContentContent {
 }

 /// Static predicted output content, such as the content of a text file that is being regenerated.
-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(tag = "type", rename_all = "lowercase", content = "content")]
 pub enum PredictionContent {
    /// The type of the predicted content you want to provide. This type is
@@ -741,7 +742,7 @@ pub enum PredictionContent {
    Content(PredictionContentContent),
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionAudioVoice {
    Alloy,
@@ -754,7 +755,7 @@ pub enum ChatCompletionAudioVoice {
    Verse,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ChatCompletionAudioFormat {
    Wav,
@@ -764,7 +765,7 @@ pub enum ChatCompletionAudioFormat {
    Pcm16,
 }

-#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
 pub struct ChatCompletionAudio {
    /// The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive).
    pub voice: ChatCompletionAudioVoice,
@@ -772,7 +773,7 @@ pub struct ChatCompletionAudio {
    pub format: ChatCompletionAudioFormat,
 }

-#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
 #[builder(name = "CreateChatCompletionRequestArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -961,13 +962,13 @@ pub struct CreateChatCompletionRequest {
 }

 /// Options for streaming response. Only set this when you set `stream: true`.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 pub struct ChatCompletionStreamOptions {
    /// If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
    pub include_usage: bool,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum FinishReason {
    Stop,
@@ -977,7 +978,7 @@ pub enum FinishReason {
    FunctionCall,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct TopLogprobs {
    /// The token.
    pub token: String,
@@ -987,7 +988,7 @@ pub struct TopLogprobs {
    pub bytes: Option<Vec<u8>>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionTokenLogprob {
    /// The token.
    pub token: String,
@@ -999,21 +1000,21 @@ pub struct ChatCompletionTokenLogprob {
    pub top_logprobs: Vec<TopLogprobs>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatChoiceLogprobs {
    /// A list of message content tokens with log probability information.
    pub content: Option<Vec<ChatCompletionTokenLogprob>>,
    pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum StopReason {
    String(String), // matched user-provided stop sequence
    Int(i64),       // matched stop token id (requires stop_token_id support)
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatChoice {
    /// The index of the choice in the list of choices.
    pub index: u32,
@@ -1034,7 +1035,7 @@ pub struct ChatChoice {
 }

 /// Represents a chat completion response returned by model, based on the provided input.
-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateChatCompletionResponse {
    /// A unique identifier for the chat completion.
    pub id: String,
@@ -1066,7 +1067,7 @@ pub struct CreateChatCompletionResponse {
 pub type ChatCompletionResponseStream =
    Pin<Box<dyn Stream<Item = Result<CreateChatCompletionStreamResponse, OpenAIError>> + Send>>;

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct FunctionCallStream {
    /// The name of the function to call.
    pub name: Option<String>,
@@ -1077,7 +1078,7 @@ pub struct FunctionCallStream {
    pub arguments: Option<String>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionMessageToolCallChunk {
    pub index: u32,
    /// The ID of the tool call.
@@ -1088,7 +1089,7 @@ pub struct ChatCompletionMessageToolCallChunk {
 }

 /// A chat completion delta generated by streamed model responses.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatCompletionStreamResponseDelta {
    /// The contents of the chunk message.
    pub content: Option<String>,
@@ -1106,7 +1107,7 @@ pub struct ChatCompletionStreamResponseDelta {
    pub reasoning_content: Option<String>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct ChatChoiceStream {
    /// The index of the choice in the list of choices.
    pub index: u32,
@@ -1132,7 +1133,7 @@ pub struct ChatChoiceStream {
    pub logprobs: Option<ChatChoiceLogprobs>,
 }

-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
 /// Represents a streamed chunk of a chat completion response returned by model, based on the provided input.
 pub struct CreateChatCompletionStreamResponse {
    /// A unique identifier for the chat completion. Each chunk has the same ID.

--- a/lib/async-openai/src/types/completion.rs
+++ b/lib/async-openai/src/types/completion.rs
@@ -13,6 +13,7 @@ use std::{collections::HashMap, pin::Pin};
 use derive_builder::Builder;
 use futures::Stream;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;

 use crate::error::OpenAIError;

@@ -89,7 +90,7 @@ where
    deserializer.deserialize_option(StrictBoolVisitor)
 }

-#[derive(Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)]
 #[builder(name = "CreateCompletionRequestArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -197,7 +198,7 @@ pub struct CreateCompletionRequest {
    pub seed: Option<i64>,
 }

-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateCompletionResponse {
    /// A unique identifier for the completion.
    pub id: String,

--- a/lib/async-openai/src/types/embedding.rs
+++ b/lib/async-openai/src/types/embedding.rs
@@ -11,10 +11,11 @@
 use base64::engine::{Engine, general_purpose};
 use derive_builder::Builder;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;

 use crate::error::OpenAIError;

-#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)]
+#[derive(ToSchema, Debug, Serialize, Clone, PartialEq, Deserialize)]
 #[serde(untagged)]
 pub enum EmbeddingInput {
    String(String),
@@ -24,7 +25,7 @@ pub enum EmbeddingInput {
    ArrayOfIntegerArray(Vec<Vec<u32>>),
 }

-#[derive(Debug, Serialize, Default, Clone, PartialEq, Deserialize)]
+#[derive(ToSchema, Debug, Serialize, Default, Clone, PartialEq, Deserialize)]
 #[serde(rename_all = "lowercase")]
 pub enum EncodingFormat {
    #[default]
@@ -32,7 +33,7 @@ pub enum EncodingFormat {
    Base64,
 }

-#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
+#[derive(ToSchema, Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
 #[builder(name = "CreateEmbeddingRequestArgs")]
 #[builder(pattern = "mutable")]
 #[builder(setter(into, strip_option), default)]
@@ -64,7 +65,7 @@ pub struct CreateEmbeddingRequest {
 }

 /// Represents an embedding vector returned by embedding endpoint.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct Embedding {
    /// The index of the embedding in the list of embeddings.
    pub index: u32,
@@ -75,7 +76,7 @@ pub struct Embedding {
    pub embedding: Vec<f32>,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct Base64EmbeddingVector(pub String);

 impl From<Base64EmbeddingVector> for Vec<f32> {
@@ -91,7 +92,7 @@ impl From<Base64EmbeddingVector> for Vec<f32> {
 }

 /// Represents an base64-encoded embedding vector returned by embedding endpoint.
-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct Base64Embedding {
    /// The index of the embedding in the list of embeddings.
    pub index: u32,
@@ -101,7 +102,7 @@ pub struct Base64Embedding {
    pub embedding: Base64EmbeddingVector,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 pub struct EmbeddingUsage {
    /// The number of tokens used by the prompt.
    pub prompt_tokens: u32,
@@ -109,7 +110,7 @@ pub struct EmbeddingUsage {
    pub total_tokens: u32,
 }

-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateEmbeddingResponse {
    pub object: String,
    /// The name of the model used to generate the embedding.
@@ -120,7 +121,7 @@ pub struct CreateEmbeddingResponse {
    pub usage: EmbeddingUsage,
 }

-#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)]
+#[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
 pub struct CreateBase64EmbeddingResponse {
    pub object: String,
    /// The name of the model used to generate the embedding.

--- a/lib/async-openai/src/types/responses.rs
+++ b/lib/async-openai/src/types/responses.rs
@@ -19,9 +19,10 @@ use serde::{Deserialize, Serialize};
 use serde_json::Value;
 use std::collections::HashMap;
 use std::pin::Pin;
+use utoipa::ToSchema;

 /// Role of messages in the API.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum Role {
    User,
@@ -31,7 +32,7 @@ pub enum Role {
 }

 /// Status of input/output items.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum OutputStatus {
    InProgress,
@@ -40,7 +41,7 @@ pub enum OutputStatus {
 }

 /// Input payload: raw text or structured context items.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum Input {
    /// A text input to the model, equivalent to a text input with the user role.
@@ -50,7 +51,7 @@ pub enum Input {
 }

 /// A context item: currently only messages.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged, rename_all = "snake_case")]
 pub enum InputItem {
    Message(InputMessage),
@@ -58,7 +59,7 @@ pub enum InputItem {
 }

 /// A message to prime the model.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "InputMessageArgs",
    pattern = "mutable",
@@ -76,14 +77,14 @@ pub struct InputMessage {
    pub content: InputContent,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
 #[serde(rename_all = "snake_case")]
 pub enum InputMessageType {
    #[default]
    Message,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum InputContent {
    /// A text input to the model.
@@ -93,7 +94,7 @@ pub enum InputContent {
 }

 /// Parts of a message: text, image, video, file, or audio.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ContentType {
    /// A text input to the model.
@@ -108,12 +109,12 @@ pub enum ContentType {
    InputFile(InputFile),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct InputText {
    text: String,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "InputImageArgs",
    pattern = "mutable",
@@ -133,7 +134,7 @@ pub struct InputImage {
    image_url: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "InputVideoArgs",
    pattern = "mutable",
@@ -153,7 +154,7 @@ pub struct InputVideo {
    video_url: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "InputAudioArgs",
    pattern = "mutable",
@@ -171,7 +172,7 @@ pub struct InputAudio {
    audio_url: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "InputFileArgs",
    pattern = "mutable",
@@ -192,7 +193,7 @@ pub struct InputFile {
 }

 /// Builder for a Responses API request.
-#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
+#[derive(ToSchema, Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
 #[builder(
    name = "CreateResponseArgs",
    pattern = "mutable",
@@ -202,6 +203,8 @@ pub struct InputFile {
 #[builder(build_fn(error = "OpenAIError"))]
 pub struct CreateResponse {
    /// Text, image, or file inputs to the model, used to generate a response.
+    /// Using value_type to prevent deep schema recursion from Input's nested content types.
+    #[schema(value_type = Object)]
    pub input: Input,

    /// Model ID used to generate the response, like `gpt-4o`.
@@ -319,12 +322,14 @@ pub struct CreateResponse {
    /// How the model should select which tool (or tools) to use when generating
    /// a response.
    #[serde(skip_serializing_if = "Option::is_none")]
+    #[schema(value_type = Object)]
    pub tool_choice: Option<ToolChoice>,

    /// An array of tools the model may call while generating a response.
    /// Can include built-in tools (file_search, web_search_preview,
    /// computer_use_preview) or custom function definitions.
    #[serde(skip_serializing_if = "Option::is_none")]
+    #[schema(value_type = Vec<Object>)]
    pub tools: Option<Vec<ToolDefinition>>,

    /// An integer between 0 and 20 specifying the number of most likely tokens to return
@@ -353,7 +358,7 @@ pub struct CreateResponse {
 }

 /// Service tier request options.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct PromptConfig {
    /// The unique identifier of the prompt template to use.
    pub id: String,
@@ -370,7 +375,7 @@ pub struct PromptConfig {
 }

 /// Service tier request options.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ServiceTier {
    Auto,
@@ -379,7 +384,7 @@ pub enum ServiceTier {
 }

 /// Truncation strategies.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum Truncation {
    Auto,
@@ -387,7 +392,7 @@ pub enum Truncation {
 }

 /// o-series reasoning settings.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "ReasoningConfigArgs",
    pattern = "mutable",
@@ -404,7 +409,7 @@ pub struct ReasoningConfig {
    pub summary: Option<ReasoningSummary>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ReasoningSummary {
    Auto,
@@ -413,13 +418,13 @@ pub enum ReasoningSummary {
 }

 /// Configuration for text response format.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct TextConfig {
    /// Defines the format: plain text, JSON object, or JSON schema.
    pub format: TextResponseFormat,
 }

-#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum TextResponseFormat {
    /// The type of response format being defined: `text`
@@ -431,7 +436,7 @@ pub enum TextResponseFormat {
 }

 /// Definitions for model-callable tools.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ToolDefinition {
    /// File search tool.
@@ -452,7 +457,7 @@ pub enum ToolDefinition {
    LocalShell,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "FileSearchArgs",
    pattern = "mutable",
@@ -474,7 +479,7 @@ pub struct FileSearch {
    pub ranking_options: Option<RankingOptions>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "FunctionArgs",
    pattern = "mutable",
@@ -494,7 +499,7 @@ pub struct Function {
    pub description: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "WebSearchPreviewArgs",
    pattern = "mutable",
@@ -510,7 +515,7 @@ pub struct WebSearchPreview {
    pub search_context_size: Option<WebSearchContextSize>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
 #[serde(rename_all = "lowercase")]
 pub enum WebSearchContextSize {
    Low,
@@ -518,7 +523,7 @@ pub enum WebSearchContextSize {
    High,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "ComputerUsePreviewArgs",
    pattern = "mutable",
@@ -535,7 +540,7 @@ pub struct ComputerUsePreview {
 }

 /// Options for search result ranking.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct RankingOptions {
    /// The ranker to use for the file search.
    pub ranker: String,
@@ -546,7 +551,7 @@ pub struct RankingOptions {
 }

 /// Filters for file search.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum Filter {
    /// A filter used to compare a specified attribute key to a given value using a defined
@@ -557,7 +562,7 @@ pub enum Filter {
 }

 /// Single comparison filter.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ComparisonFilter {
    /// Specifies the comparison operator
    #[serde(rename = "type")]
@@ -568,7 +573,7 @@ pub struct ComparisonFilter {
    pub value: serde_json::Value,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 pub enum ComparisonType {
    #[serde(rename = "eq")]
    Equals,
@@ -585,7 +590,7 @@ pub enum ComparisonType {
 }

 /// Combine multiple filters.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CompoundFilter {
    /// Type of operation
    #[serde(rename = "type")]
@@ -594,7 +599,7 @@ pub struct CompoundFilter {
    pub filters: Vec<Filter>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum CompoundType {
    And,
@@ -602,7 +607,7 @@ pub enum CompoundType {
 }

 /// Approximate user location for web search.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "LocationArgs",
    pattern = "mutable",
@@ -629,7 +634,7 @@ pub struct Location {
 }

 /// MCP (Model Context Protocol) tool configuration.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "McpArgs",
    pattern = "mutable",
@@ -654,7 +659,7 @@ pub struct Mcp {
 }

 /// Allowed tools configuration for MCP.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum AllowedTools {
    /// A flat list of allowed tool names.
@@ -664,7 +669,7 @@ pub enum AllowedTools {
 }

 /// Filter object for MCP allowed tools.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpAllowedToolsFilter {
    /// Names of tools in the filter
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -672,7 +677,7 @@ pub struct McpAllowedToolsFilter {
 }

 /// Approval policy or filter for MCP tools.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum RequireApproval {
    /// A blanket policy: "always" or "never".
@@ -681,7 +686,7 @@ pub enum RequireApproval {
    Filter(McpApprovalFilter),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum RequireApprovalPolicy {
    Always,
@@ -689,7 +694,7 @@ pub enum RequireApprovalPolicy {
 }

 /// Filter object for MCP tool approval.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpApprovalFilter {
    /// A list of tools that always require approval.
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -700,7 +705,7 @@ pub struct McpApprovalFilter {
 }

 /// Container configuration for a code interpreter.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum CodeInterpreterContainer {
    /// A simple container ID.
@@ -710,7 +715,7 @@ pub enum CodeInterpreterContainer {
 }

 /// Auto configuration for code interpreter container.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum CodeInterpreterContainerKind {
    Auto {
@@ -721,7 +726,7 @@ pub enum CodeInterpreterContainerKind {
 }

 /// Code interpreter tool definition.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "CodeInterpreterArgs",
    pattern = "mutable",
@@ -735,7 +740,7 @@ pub struct CodeInterpreter {
 }

 /// Mask image input for image generation.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct InputImageMask {
    /// Base64-encoded mask image.
    #[serde(skip_serializing_if = "Option::is_none")]
@@ -746,7 +751,7 @@ pub struct InputImageMask {
 }

 /// Image generation tool definition.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
 #[builder(
    name = "ImageGenerationArgs",
    pattern = "mutable",
@@ -784,7 +789,7 @@ pub struct ImageGeneration {
    pub size: Option<ImageGenerationSize>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageGenerationBackground {
    Transparent,
@@ -792,7 +797,7 @@ pub enum ImageGenerationBackground {
    Auto,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageGenerationOutputFormat {
    Png,
@@ -800,7 +805,7 @@ pub enum ImageGenerationOutputFormat {
    Jpeg,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageGenerationQuality {
    Low,
@@ -809,7 +814,7 @@ pub enum ImageGenerationQuality {
    Auto,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ImageGenerationSize {
    Auto,
@@ -822,7 +827,7 @@ pub enum ImageGenerationSize {
 }

 /// Control how the model picks or is forced to pick a tool.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(untagged)]
 pub enum ToolChoice {
    /// Controls which (if any) tool is called by the model.
@@ -841,7 +846,7 @@ pub enum ToolChoice {
 }

 /// Simple tool-choice modes.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "lowercase")]
 pub enum ToolChoiceMode {
    /// The model will not call any tool and instead generates a message.
@@ -853,7 +858,7 @@ pub enum ToolChoiceMode {
 }

 /// Hosted tool type identifiers.
-#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum HostedToolType {
    FileSearch,
@@ -862,7 +867,7 @@ pub enum HostedToolType {
 }

 /// Error returned by the API when a request fails.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ErrorObject {
    /// The error code for the response.
    pub code: String,
@@ -871,14 +876,14 @@ pub struct ErrorObject {
 }

 /// Details about an incomplete response.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct IncompleteDetails {
    /// The reason why the response is incomplete.
    pub reason: String,
 }

 /// A simple text output from the model.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct OutputText {
    /// The annotations of the text output.
    pub annotations: Vec<Annotation>,
@@ -886,7 +891,7 @@ pub struct OutputText {
    pub text: String,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum Annotation {
    /// A citation to a file.
@@ -897,7 +902,7 @@ pub enum Annotation {
    FilePath(FilePath),
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FileCitation {
    /// The ID of the file.
    file_id: String,
@@ -905,7 +910,7 @@ pub struct FileCitation {
    index: u32,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct UrlCitation {
    /// The index of the last character of the URL citation in the message.
    end_index: u32,
@@ -917,7 +922,7 @@ pub struct UrlCitation {
    url: String,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FilePath {
    /// The ID of the file.
    file_id: String,
@@ -926,14 +931,14 @@ pub struct FilePath {
 }

 /// A refusal explanation from the model.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct Refusal {
    /// The refusal explanationfrom the model.
    pub refusal: String,
 }

 /// A message generated by the model.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct OutputMessage {
    /// The content of the output message.
    pub content: Vec<Content>,
@@ -945,7 +950,7 @@ pub struct OutputMessage {
    pub status: OutputStatus,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum Content {
    /// A text output from the model.
@@ -955,7 +960,7 @@ pub enum Content {
 }

 /// Nested content within an output message.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum OutputContent {
    /// An output message from the model.
@@ -987,7 +992,7 @@ pub enum OutputContent {
 }

 /// A reasoning item representing the model's chain of thought, including summary paragraphs.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ReasoningItem {
    /// Unique identifier of the reasoning content.
    pub id: String,
@@ -1003,14 +1008,14 @@ pub struct ReasoningItem {
 }

 /// A single summary text fragment from reasoning.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct SummaryText {
    /// A short summary of the reasoning used by the model.
    pub text: String,
 }

 /// File search tool call output.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FileSearchCallOutput {
    /// The unique ID of the file search tool call.
    pub id: String,
@@ -1023,7 +1028,7 @@ pub struct FileSearchCallOutput {
    pub results: Option<Vec<FileSearchResult>>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum FileSearchCallOutputStatus {
    InProgress,
@@ -1034,7 +1039,7 @@ pub enum FileSearchCallOutputStatus {
 }

 /// A single result from a file search.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FileSearchResult {
    /// The unique ID of the file.
    pub file_id: String,
@@ -1051,7 +1056,7 @@ pub struct FileSearchResult {
    pub attributes: HashMap<String, serde_json::Value>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct SafetyCheck {
    /// The ID of the safety check.
    pub id: String,
@@ -1062,7 +1067,7 @@ pub struct SafetyCheck {
 }

 /// Web search tool call output.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct WebSearchCallOutput {
    /// The unique ID of the web search tool call.
    pub id: String,
@@ -1071,7 +1076,7 @@ pub struct WebSearchCallOutput {
 }

 /// Output from a computer tool call.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ComputerCallOutput {
    pub action: ComputerCallAction,
    /// An identifier used when responding to the tool call with output.
@@ -1085,14 +1090,14 @@ pub struct ComputerCallOutput {
 }

 /// A point in 2D space.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Point {
    pub x: i32,
    pub y: i32,
 }

 /// Represents all user‐triggered actions.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ComputerCallAction {
    /// A click action.
@@ -1123,7 +1128,7 @@ pub enum ComputerCallAction {
    Wait,
 }

-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 #[serde(rename_all = "snake_case")]
 pub enum ButtonPress {
    Left,
@@ -1134,7 +1139,7 @@ pub enum ButtonPress {
 }

 /// A click action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Click {
    /// Which mouse button was pressed.
    pub button: ButtonPress,
@@ -1145,7 +1150,7 @@ pub struct Click {
 }

 /// A double click action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct DoubleClick {
    /// X‐coordinate of the double click.
    pub x: i32,
@@ -1154,7 +1159,7 @@ pub struct DoubleClick {
 }

 /// A drag action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Drag {
    /// The path of points the cursor drags through.
    pub path: Vec<Point>,
@@ -1165,14 +1170,14 @@ pub struct Drag {
 }

 /// A keypress action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct KeyPress {
    /// The list of keys to press (e.g. `["Control", "C"]`).
    pub keys: Vec<String>,
 }

 /// A mouse move action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct MoveAction {
    /// X‐coordinate to move to.
    pub x: i32,
@@ -1181,7 +1186,7 @@ pub struct MoveAction {
 }

 /// A scroll action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct Scroll {
    /// Horizontal scroll distance.
    pub scroll_x: i32,
@@ -1194,14 +1199,14 @@ pub struct Scroll {
 }

 /// A typing (text entry) action.
-#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct TypeAction {
    /// The text to type.
    pub text: String,
 }

 /// Metadata for a function call request.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct FunctionCall {
    /// The unique ID of the function tool call.
    pub id: String,
@@ -1216,7 +1221,7 @@ pub struct FunctionCall {
 }

 /// Output of an image generation request.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct ImageGenerationCallOutput {
    /// Unique ID of the image generation call.
    pub id: String,
@@ -1227,7 +1232,7 @@ pub struct ImageGenerationCallOutput {
 }

 /// Output of a code interpreter request.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CodeInterpreterCallOutput {
    /// The code that was executed.
    pub code: String,
@@ -1242,7 +1247,7 @@ pub struct CodeInterpreterCallOutput {
 }

 /// Individual result from a code interpreter: either logs or files.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum CodeInterpreterResult {
    /// Text logs from the execution.
@@ -1252,20 +1257,20 @@ pub enum CodeInterpreterResult {
 }

 /// The output containing execution logs.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CodeInterpreterTextOutput {
    /// The logs of the code interpreter tool call.
    pub logs: String,
 }

 /// The output containing file references.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CodeInterpreterFileOutput {
    /// List of file IDs produced.
    pub files: Vec<CodeInterpreterFile>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct CodeInterpreterFile {
    /// The ID of the file.
    file_id: String,
@@ -1274,7 +1279,7 @@ pub struct CodeInterpreterFile {
 }

 /// Output of a local shell command request.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct LocalShellCallOutput {
    /// Details of the exec action.
    pub action: LocalShellAction,
@@ -1287,7 +1292,7 @@ pub struct LocalShellCallOutput {
 }

 /// Define the shape of a local shell action (exec).
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct LocalShellAction {
    /// The command to run.
    pub command: Vec<String>,
@@ -1302,7 +1307,7 @@ pub struct LocalShellAction {
 }

 /// Output of an MCP server tool invocation.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpCallOutput {
    /// JSON string of the arguments passed.
    pub arguments: String,
@@ -1319,7 +1324,7 @@ pub struct McpCallOutput {
 }

 /// Output listing tools available on an MCP server.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpListToolsOutput {
    /// Unique ID of the list request.
    pub id: String,
@@ -1333,7 +1338,7 @@ pub struct McpListToolsOutput {
 }

 /// Information about a single tool on an MCP server.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpToolInfo {
    /// The name of the tool.
    pub name: String,
@@ -1348,7 +1353,7 @@ pub struct McpToolInfo {
 }

 /// Output representing a human approval request for an MCP tool.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct McpApprovalRequestOutput {
    /// JSON string of arguments for the tool.
    pub arguments: String,
@@ -1361,7 +1366,7 @@ pub struct McpApprovalRequestOutput {
 }

 /// Usage statistics for a response.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct Usage {
    /// The number of input tokens.
    pub input_tokens: u32,
@@ -1376,7 +1381,7 @@ pub struct Usage {
 }

 /// The complete response returned by the Responses API.
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 pub struct Response {
    /// Unix timestamp (in seconds) when this Response was created.
    pub created_at: u64,
@@ -1475,7 +1480,7 @@ pub struct Response {
    pub user: Option<String>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum Status {
    Completed,
@@ -1485,7 +1490,7 @@ pub enum Status {
 }

 /// Event types for streaming responses from the Responses API
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[serde(tag = "type")]
 #[non_exhaustive] // Future-proof against breaking changes
 pub enum ResponseEvent {
@@ -1639,21 +1644,21 @@ pub enum ResponseEvent {
 /// Stream of response events
 pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>;

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCreated {
    pub sequence_number: u64,
    pub response: ResponseMetadata,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseInProgress {
    pub sequence_number: u64,
    pub response: ResponseMetadata,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseOutputItemAdded {
    pub sequence_number: u64,
@@ -1661,7 +1666,7 @@ pub struct ResponseOutputItemAdded {
    pub item: OutputItem,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseContentPartAdded {
    pub sequence_number: u64,
@@ -1671,7 +1676,7 @@ pub struct ResponseContentPartAdded {
    pub part: ContentPart,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseOutputTextDelta {
    pub sequence_number: u64,
@@ -1683,7 +1688,7 @@ pub struct ResponseOutputTextDelta {
    pub logprobs: Option<serde_json::Value>,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseContentPartDone {
    pub sequence_number: u64,
@@ -1693,7 +1698,7 @@ pub struct ResponseContentPartDone {
    pub part: ContentPart,
 }

-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseOutputItemDone {
    pub sequence_number: u64,
@@ -1702,7 +1707,7 @@ pub struct ResponseOutputItemDone {
 }

 /// Response completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCompleted {
    pub sequence_number: u64,
@@ -1710,7 +1715,7 @@ pub struct ResponseCompleted {
 }

 /// Response failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFailed {
    pub sequence_number: u64,
@@ -1718,7 +1723,7 @@ pub struct ResponseFailed {
 }

 /// Response incomplete event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseIncomplete {
    pub sequence_number: u64,
@@ -1726,7 +1731,7 @@ pub struct ResponseIncomplete {
 }

 /// Response queued event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseQueued {
    pub sequence_number: u64,
@@ -1734,7 +1739,7 @@ pub struct ResponseQueued {
 }

 /// Text output completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseOutputTextDone {
    pub sequence_number: u64,
@@ -1746,7 +1751,7 @@ pub struct ResponseOutputTextDone {
 }

 /// Refusal delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseRefusalDelta {
    pub sequence_number: u64,
@@ -1757,7 +1762,7 @@ pub struct ResponseRefusalDelta {
 }

 /// Refusal done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseRefusalDone {
    pub sequence_number: u64,
@@ -1768,7 +1773,7 @@ pub struct ResponseRefusalDone {
 }

 /// Function call arguments delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFunctionCallArgumentsDelta {
    pub sequence_number: u64,
@@ -1778,7 +1783,7 @@ pub struct ResponseFunctionCallArgumentsDelta {
 }

 /// Function call arguments done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFunctionCallArgumentsDone {
    pub sequence_number: u64,
@@ -1788,7 +1793,7 @@ pub struct ResponseFunctionCallArgumentsDone {
 }

 /// Error event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseError {
    pub sequence_number: u64,
@@ -1798,7 +1803,7 @@ pub struct ResponseError {
 }

 /// File search call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFileSearchCallInProgress {
    pub sequence_number: u64,
@@ -1807,7 +1812,7 @@ pub struct ResponseFileSearchCallInProgress {
 }

 /// File search call searching event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFileSearchCallSearching {
    pub sequence_number: u64,
@@ -1816,7 +1821,7 @@ pub struct ResponseFileSearchCallSearching {
 }

 /// File search call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseFileSearchCallCompleted {
    pub sequence_number: u64,
@@ -1825,7 +1830,7 @@ pub struct ResponseFileSearchCallCompleted {
 }

 /// Web search call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseWebSearchCallInProgress {
    pub sequence_number: u64,
@@ -1834,7 +1839,7 @@ pub struct ResponseWebSearchCallInProgress {
 }

 /// Web search call searching event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseWebSearchCallSearching {
    pub sequence_number: u64,
@@ -1843,7 +1848,7 @@ pub struct ResponseWebSearchCallSearching {
 }

 /// Web search call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseWebSearchCallCompleted {
    pub sequence_number: u64,
@@ -1852,7 +1857,7 @@ pub struct ResponseWebSearchCallCompleted {
 }

 /// Reasoning summary part added event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryPartAdded {
    pub sequence_number: u64,
@@ -1863,7 +1868,7 @@ pub struct ResponseReasoningSummaryPartAdded {
 }

 /// Reasoning summary part done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryPartDone {
    pub sequence_number: u64,
@@ -1874,7 +1879,7 @@ pub struct ResponseReasoningSummaryPartDone {
 }

 /// Reasoning summary text delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryTextDelta {
    pub sequence_number: u64,
@@ -1885,7 +1890,7 @@ pub struct ResponseReasoningSummaryTextDelta {
 }

 /// Reasoning summary text done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryTextDone {
    pub sequence_number: u64,
@@ -1896,7 +1901,7 @@ pub struct ResponseReasoningSummaryTextDone {
 }

 /// Reasoning summary delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryDelta {
    pub sequence_number: u64,
@@ -1907,7 +1912,7 @@ pub struct ResponseReasoningSummaryDelta {
 }

 /// Reasoning summary done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseReasoningSummaryDone {
    pub sequence_number: u64,
@@ -1918,7 +1923,7 @@ pub struct ResponseReasoningSummaryDone {
 }

 /// Image generation call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseImageGenerationCallInProgress {
    pub sequence_number: u64,
@@ -1927,7 +1932,7 @@ pub struct ResponseImageGenerationCallInProgress {
 }

 /// Image generation call generating event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseImageGenerationCallGenerating {
    pub sequence_number: u64,
@@ -1936,7 +1941,7 @@ pub struct ResponseImageGenerationCallGenerating {
 }

 /// Image generation call partial image event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseImageGenerationCallPartialImage {
    pub sequence_number: u64,
@@ -1947,7 +1952,7 @@ pub struct ResponseImageGenerationCallPartialImage {
 }

 /// Image generation call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseImageGenerationCallCompleted {
    pub sequence_number: u64,
@@ -1956,7 +1961,7 @@ pub struct ResponseImageGenerationCallCompleted {
 }

 /// MCP call arguments delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpCallArgumentsDelta {
    pub sequence_number: u64,
@@ -1966,7 +1971,7 @@ pub struct ResponseMcpCallArgumentsDelta {
 }

 /// MCP call arguments done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpCallArgumentsDone {
    pub sequence_number: u64,
@@ -1976,7 +1981,7 @@ pub struct ResponseMcpCallArgumentsDone {
 }

 /// MCP call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpCallCompleted {
    pub sequence_number: u64,
@@ -1985,7 +1990,7 @@ pub struct ResponseMcpCallCompleted {
 }

 /// MCP call failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpCallFailed {
    pub sequence_number: u64,
@@ -1994,7 +1999,7 @@ pub struct ResponseMcpCallFailed {
 }

 /// MCP call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpCallInProgress {
    pub sequence_number: u64,
@@ -2003,7 +2008,7 @@ pub struct ResponseMcpCallInProgress {
 }

 /// MCP list tools completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpListToolsCompleted {
    pub sequence_number: u64,
@@ -2012,7 +2017,7 @@ pub struct ResponseMcpListToolsCompleted {
 }

 /// MCP list tools failed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpListToolsFailed {
    pub sequence_number: u64,
@@ -2021,7 +2026,7 @@ pub struct ResponseMcpListToolsFailed {
 }

 /// MCP list tools in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMcpListToolsInProgress {
    pub sequence_number: u64,
@@ -2030,7 +2035,7 @@ pub struct ResponseMcpListToolsInProgress {
 }

 /// Code interpreter call in progress event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCodeInterpreterCallInProgress {
    pub sequence_number: u64,
@@ -2039,7 +2044,7 @@ pub struct ResponseCodeInterpreterCallInProgress {
 }

 /// Code interpreter call interpreting event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCodeInterpreterCallInterpreting {
    pub sequence_number: u64,
@@ -2048,7 +2053,7 @@ pub struct ResponseCodeInterpreterCallInterpreting {
 }

 /// Code interpreter call completed event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCodeInterpreterCallCompleted {
    pub sequence_number: u64,
@@ -2057,7 +2062,7 @@ pub struct ResponseCodeInterpreterCallCompleted {
 }

 /// Code interpreter call code delta event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCodeInterpreterCallCodeDelta {
    pub sequence_number: u64,
@@ -2067,7 +2072,7 @@ pub struct ResponseCodeInterpreterCallCodeDelta {
 }

 /// Code interpreter call code done event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseCodeInterpreterCallCodeDone {
    pub sequence_number: u64,
@@ -2077,7 +2082,7 @@ pub struct ResponseCodeInterpreterCallCodeDone {
 }

 /// Response metadata
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseMetadata {
    pub id: String,
@@ -2146,7 +2151,7 @@ pub struct ResponseMetadata {
 }

 /// Output item
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct OutputItem {
    pub id: String,
@@ -2164,7 +2169,7 @@ pub struct OutputItem {
 }

 /// Content part
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ContentPart {
    #[serde(rename = "type")]
@@ -2180,7 +2185,7 @@ pub struct ContentPart {

 /// Collects streaming response events into a complete response
 /// Output text annotation added event
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct ResponseOutputTextAnnotationAdded {
    pub sequence_number: u64,
@@ -2192,7 +2197,7 @@ pub struct ResponseOutputTextAnnotationAdded {
 }

 /// Text annotation object for output text
-#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+#[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
 #[non_exhaustive]
 pub struct TextAnnotation {
    #[serde(rename = "type")]

--- a/lib/llm/Cargo.toml
+++ b/lib/llm/Cargo.toml
@@ -195,6 +195,10 @@ insta = { version = "1.41", features = [
 lazy_static = "1.4"
 mockito = "1.7.0"

+[[bin]]
+name = "generate-frontend-openapi"
+path = "src/bin/generate_frontend_openapi.rs"
+
 [build-dependencies]
 tonic-build = { version = "0.13.1" }


--- a/lib/llm/src/bin/generate_frontend_openapi.rs
+++ b/lib/llm/src/bin/generate_frontend_openapi.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+
+//! Helper binary to generate the Dynamo HTTP frontend OpenAPI specification.
+//!
+//! This allows CI, documentation tooling, and NIM to obtain the exact same
+//! OpenAPI document that is served at `/openapi.json` by the frontend
+//! without having to start the HTTP service and scrape the endpoint.
+//!
+//! Usage (from the repository root):
+//! ```bash
+//! cargo run -p dynamo-llm --bin generate-frontend-openapi
+//! ```
+//! The generated spec will be written to:
+//!   `docs/frontends/openapi.json`
+
+use std::fs;
+use std::path::PathBuf;
+use std::thread;
+
+use anyhow::Context as _;
+
+use dynamo_llm::http::service::{openapi_docs, service_v2::HttpService};
+
+/// Stack size for the generator thread (8 MB).
+/// The utoipa schema derivation for deeply nested OpenAI types requires
+/// additional stack space due to recursive type expansion.
+const GENERATOR_STACK_SIZE: usize = 8 * 1024 * 1024;
+
+fn main() -> anyhow::Result<()> {
+    // Spawn a thread with a larger stack to handle deeply nested schema generation
+    let handle = thread::Builder::new()
+        .stack_size(GENERATOR_STACK_SIZE)
+        .spawn(generate_openapi)
+        .context("failed to spawn generator thread")?;
+
+    handle
+        .join()
+        .map_err(|e| anyhow::anyhow!("generator thread panicked: {:?}", e))?
+}
+
+fn generate_openapi() -> anyhow::Result<()> {
+    // Build an HttpService instance with all standard OpenAI-compatible
+    // frontend endpoints enabled so that the generated OpenAPI document
+    // reflects the full surface area exposed to users.
+    //
+    // This does NOT start any network listeners; it only builds the router
+    // graph and associated route documentation.
+    let http_service = HttpService::builder()
+        .enable_chat_endpoints(true)
+        .enable_cmpl_endpoints(true)
+        .enable_embeddings_endpoints(true)
+        .enable_responses_endpoints(true)
+        .build()
+        .context("failed to build HttpService for OpenAPI generation")?;
+
+    let route_docs = http_service.route_docs().to_vec();
+    let openapi = openapi_docs::generate_openapi_spec(&route_docs);
+
+    // Write the spec to a stable location relative to the repository root.
+    let out_dir = PathBuf::from("docs/frontends");
+    let out_path = out_dir.join("openapi.json");
+
+    fs::create_dir_all(&out_dir)
+        .with_context(|| format!("failed to create OpenAPI output directory: {out_dir:?}"))?;
+
+    let json =
+        serde_json::to_string_pretty(&openapi).context("failed to serialize OpenAPI spec")?;
+
+    fs::write(&out_path, json)
+        .with_context(|| format!("failed to write OpenAPI spec to: {out_path:?}"))?;
+
+    println!(
+        "Generated Dynamo frontend OpenAPI specification at {}",
+        out_path.display()
+    );
+
+    Ok(())
+}
--- a/lib/llm/src/http/service/openai.rs
+++ b/lib/llm/src/http/service/openai.rs
@@ -330,6 +330,9 @@ async fn completions(
    // return a 503 if the service is not ready
    check_ready(&state)?;

+    // Validate stream_options is only used when streaming (NVBug 5662680)
+    validate_completion_stream_options(&request)?;
+
    validate_completion_fields_generic(&request)?;

    // Detect batch prompts
@@ -873,6 +876,9 @@ async fn chat_completions(
    // Handle required fields like messages shouldn't be empty.
    validate_chat_completion_required_fields(&request)?;

+    // Validate stream_options is only used when streaming (NVBug 5662680)
+    validate_chat_completion_stream_options(&request)?;
+
    // Handle Rest of Validation Errors
    validate_chat_completion_fields_generic(&request)?;

@@ -1063,6 +1069,22 @@ pub fn validate_chat_completion_required_fields(
    Ok(())
 }

+/// Validates that stream_options is only used when stream=true for chat completions (NVBug 5662680)
+pub fn validate_chat_completion_stream_options(
+    request: &NvCreateChatCompletionRequest,
+) -> Result<(), ErrorResponse> {
+    let inner = &request.inner;
+    let streaming = inner.stream.unwrap_or(false);
+    if !streaming && inner.stream_options.is_some() {
+        return Err(ErrorMessage::from_http_error(HttpError {
+            code: 400,
+            message: VALIDATION_PREFIX.to_string()
+                + "The 'stream_options' field is only allowed when 'stream' is set to true.",
+        }));
+    }
+    Ok(())
+}
+
 /// Validates a chat completion request and returns an error response if validation fails.
 ///
 /// This function calls the `validate` method implemented for `NvCreateChatCompletionRequest`.
@@ -1078,6 +1100,22 @@ pub fn validate_chat_completion_fields_generic(
    })
 }

+/// Validates that stream_options is only used when stream=true for completions (NVBug 5662680)
+pub fn validate_completion_stream_options(
+    request: &NvCreateCompletionRequest,
+) -> Result<(), ErrorResponse> {
+    let inner = &request.inner;
+    let streaming = inner.stream.unwrap_or(false);
+    if !streaming && inner.stream_options.is_some() {
+        return Err(ErrorMessage::from_http_error(HttpError {
+            code: 400,
+            message: VALIDATION_PREFIX.to_string()
+                + "The 'stream_options' field is only allowed when 'stream' is set to true.",
+        }));
+    }
+    Ok(())
+}
+
 /// Validates a completion request and returns an error response if validation fails.
 ///
 /// This function calls the `validate` method implemented for `NvCreateCompletionRequest`.
@@ -1395,9 +1433,9 @@ async fn list_models_openai(
    for model_name in models {
        data.push(ModelListing {
            id: model_name.clone(),
-            object: "object",
-            created,                        // Where would this come from?
-            owned_by: "nvidia".to_string(), // Get organization from config
+            object: "model", // Per OpenAI spec, this should be "model"
+            created,
+            owned_by: "nvidia".to_string(),
        });
    }

@@ -1417,8 +1455,8 @@ struct ListModelOpenAI {
 #[derive(Serialize)]
 struct ModelListing {
    id: String,
-    object: &'static str, // always "object"
-    created: u64,         //  Seconds since epoch
+    object: &'static str, // always "model" per OpenAI spec
+    created: u64,         // Seconds since epoch
    owned_by: String,
 }


--- a/lib/llm/src/http/service/openapi_docs.rs
+++ b/lib/llm/src/http/service/openapi_docs.rs
@@ -54,12 +54,24 @@ use crate::http::service::RouteDoc;
    ),
    servers(
        (url = "/", description = "Current server")
+    ),
+    components(
+        schemas(
+            crate::protocols::openai::chat_completions::NvCreateChatCompletionRequest,
+            crate::protocols::openai::completions::NvCreateCompletionRequest,
+            crate::protocols::openai::embeddings::NvCreateEmbeddingRequest,
+            crate::protocols::openai::responses::NvCreateResponse
+        )
    )
 )]
 struct ApiDoc;

 /// Generate OpenAPI specification from route documentation
-fn generate_openapi_spec(route_docs: &[RouteDoc]) -> utoipa::openapi::OpenApi {
+///
+/// This is the core helper used both by the embedded Swagger UI and by
+/// external tools (for example CI or NIM) which need to materialize the
+/// same frontend OpenAPI specification without running the HTTP service.
+pub fn generate_openapi_spec(route_docs: &[RouteDoc]) -> utoipa::openapi::OpenApi {
    let mut openapi = ApiDoc::openapi();

    // Build paths from route documentation
@@ -216,60 +228,8 @@ fn add_request_body_for_path(

 /// Create schema for chat completion request
 fn create_chat_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> {
-    use utoipa::openapi::schema::{ArrayBuilder, ObjectBuilder};
-
-    RefOr::T(utoipa::openapi::schema::Schema::Object(
-        ObjectBuilder::new()
-            .property(
-                "model",
-                ObjectBuilder::new()
-                    .description(Some("ID of the model to use"))
-                    .build(),
-            )
-            .property(
-                "messages",
-                ArrayBuilder::new()
-                    .description(Some("A list of messages comprising the conversation so far"))
-                    .items(
-                        ObjectBuilder::new()
-                            .property(
-                                "role",
-                                ObjectBuilder::new()
-                                    .description(Some("The role of the message author (system, user, assistant)"))
-                                    .build(),
-                            )
-                            .property(
-                                "content",
-                                ObjectBuilder::new()
-                                    .description(Some("The contents of the message"))
-                                    .build(),
-                            )
-                            .build(),
-                    )
-                    .build(),
-            )
-            .property(
-                "temperature",
-                ObjectBuilder::new()
-                    .description(Some("Sampling temperature between 0 and 2. Higher values make output more random"))
-                    .build(),
-            )
-            .property(
-                "max_tokens",
-                ObjectBuilder::new()
-                    .description(Some("Maximum number of tokens to generate"))
-                    .build(),
-            )
-            .property(
-                "stream",
-                ObjectBuilder::new()
-                    .description(Some("Whether to stream back partial progress"))
-                    .build(),
-            )
-            .required("model")
-            .required("messages")
-            .build(),
-    ))
+    // Schema derived from actual NvCreateChatCompletionRequest type via ToSchema
+    <crate::protocols::openai::chat_completions::NvCreateChatCompletionRequest as utoipa::PartialSchema>::schema()
 }

 /// Create example for chat completion request
@@ -294,44 +254,7 @@ fn create_chat_completion_example() -> serde_json::Value {

 /// Create schema for completion request
 fn create_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> {
-    use utoipa::openapi::schema::ObjectBuilder;
-
-    RefOr::T(utoipa::openapi::schema::Schema::Object(
-        ObjectBuilder::new()
-            .property(
-                "model",
-                ObjectBuilder::new()
-                    .description(Some("ID of the model to use"))
-                    .build(),
-            )
-            .property(
-                "prompt",
-                ObjectBuilder::new()
-                    .description(Some("The prompt to generate completions for"))
-                    .build(),
-            )
-            .property(
-                "temperature",
-                ObjectBuilder::new()
-                    .description(Some("Sampling temperature between 0 and 2"))
-                    .build(),
-            )
-            .property(
-                "max_tokens",
-                ObjectBuilder::new()
-                    .description(Some("Maximum number of tokens to generate"))
-                    .build(),
-            )
-            .property(
-                "stream",
-                ObjectBuilder::new()
-                    .description(Some("Whether to stream back partial progress"))
-                    .build(),
-            )
-            .required("model")
-            .required("prompt")
-            .build(),
-    ))
+    <crate::protocols::openai::completions::NvCreateCompletionRequest as utoipa::PartialSchema>::schema()
 }

 /// Create example for completion request
@@ -347,28 +270,7 @@ fn create_completion_example() -> serde_json::Value {

 /// Create schema for embedding request
 fn create_embedding_schema() -> RefOr<utoipa::openapi::schema::Schema> {
-    use utoipa::openapi::schema::ObjectBuilder;
-
-    RefOr::T(utoipa::openapi::schema::Schema::Object(
-        ObjectBuilder::new()
-            .property(
-                "model",
-                ObjectBuilder::new()
-                    .description(Some("ID of the model to use"))
-                    .build(),
-            )
-            .property(
-                "input",
-                ObjectBuilder::new()
-                    .description(Some(
-                        "Input text to embed, encoded as a string or array of strings",
-                    ))
-                    .build(),
-            )
-            .required("model")
-            .required("input")
-            .build(),
-    ))
+    <crate::protocols::openai::embeddings::NvCreateEmbeddingRequest as utoipa::PartialSchema>::schema()
 }

 /// Create example for embedding request
@@ -381,26 +283,8 @@ fn create_embedding_example() -> serde_json::Value {

 /// Create schema for response request
 fn create_response_schema() -> RefOr<utoipa::openapi::schema::Schema> {
-    use utoipa::openapi::schema::ObjectBuilder;
-
-    RefOr::T(utoipa::openapi::schema::Schema::Object(
-        ObjectBuilder::new()
-            .property(
-                "model",
-                ObjectBuilder::new()
-                    .description(Some("ID of the model to use"))
-                    .build(),
-            )
-            .property(
-                "input",
-                ObjectBuilder::new()
-                    .description(Some("The input text"))
-                    .build(),
-            )
-            .required("model")
-            .required("input")
-            .build(),
-    ))
+    // Schema derived from NvCreateResponse type via ToSchema
+    <crate::protocols::openai::responses::NvCreateResponse as utoipa::PartialSchema>::schema()
 }

 /// Create example for response request

--- a/lib/llm/src/preprocessor/media/decoders.rs
+++ b/lib/llm/src/preprocessor/media/decoders.rs
@@ -3,6 +3,7 @@

 use anyhow::Result;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;

 use super::common::EncodedMediaData;
 use super::rdma::DecodedMediaData;
@@ -34,7 +35,7 @@ pub trait Decoder: Clone + Send + 'static {
 /// Media decoder configuration.
 /// Used both for MDC server config and runtime `media_io_kwargs`.
 /// When used at runtime, limits are enforced from MDC and cannot be overridden.
-#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)]
+#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize, ToSchema)]
 pub struct MediaDecoder {
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub image: Option<ImageDecoder>,

--- a/lib/llm/src/preprocessor/media/decoders/image.rs
+++ b/lib/llm/src/preprocessor/media/decoders/image.rs
@@ -7,6 +7,7 @@ use anyhow::Result;
 use image::{ColorType, GenericImageView, ImageFormat, ImageReader};
 use ndarray::Array3;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;

 use super::super::common::EncodedMediaData;
 use super::super::rdma::DecodedMediaData;
@@ -15,7 +16,7 @@ use super::{DecodedMediaMetadata, Decoder};
 const DEFAULT_MAX_ALLOC: u64 = 128 * 1024 * 1024; // 128 MB

 /// Image decoder limits - can only be set via server config, not runtime kwargs.
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize, ToSchema)]
 #[serde(deny_unknown_fields)]
 pub struct ImageDecoderLimits {
    #[serde(default)]
@@ -37,7 +38,7 @@ impl Default for ImageDecoderLimits {
    }
 }

-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, ToSchema)]
 #[serde(deny_unknown_fields)]
 pub struct ImageDecoder {
    #[serde(default)]

--- a/lib/llm/src/preprocessor/media/decoders/video.rs
+++ b/lib/llm/src/preprocessor/media/decoders/video.rs
@@ -10,6 +10,7 @@ use ffmpeg_next::ffi::{AVPixelFormat, av_image_copy_to_buffer};
 use memfile::{CreateOptions, MemFile, Seal};
 use ndarray::Array4;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
 use video_rs::frame::RawFrame;
 use video_rs::{Location, Time};

@@ -22,7 +23,7 @@ use crate::preprocessor::media::{
 const FRAME_TIME_BUFFER_SECS: f64 = 0.001;
 const DEFAULT_MAX_ALLOC: u64 = 512 * 1024 * 1024; // 512 MB

-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize, ToSchema)]
 #[serde(deny_unknown_fields)]
 pub struct VideoDecoderLimits {
    /// Maximum allowed total allocation of decoded frames in bytes
@@ -38,7 +39,7 @@ impl Default for VideoDecoderLimits {
    }
 }

-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
+#[derive(Clone, Debug, Default, Serialize, Deserialize, ToSchema)]
 #[serde(deny_unknown_fields)]
 pub struct VideoDecoder {
    #[serde(default)]

--- a/lib/llm/src/protocols/common/timing.rs
+++ b/lib/llm/src/protocols/common/timing.rs
@@ -9,6 +9,7 @@
 use serde::{Deserialize, Serialize};
 use std::sync::{Mutex, OnceLock};
 use std::time::{Instant, SystemTime, UNIX_EPOCH};
+use utoipa::ToSchema;

 use crate::protocols::openai::nvext::WorkerIdInfo;

@@ -241,7 +242,7 @@ impl Default for RequestTracker {
 ///
 /// This struct is serialized and included in the response's `nvext` field
 /// when the client requests timing information via `extra_fields: ["timing"]`.
-#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
+#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, PartialEq)]
 pub struct TimingInfo {
    /// When the request was received (epoch milliseconds)
    pub request_received_ms: u64,

--- a/lib/llm/src/protocols/openai/chat_completions.rs
+++ b/lib/llm/src/protocols/openai/chat_completions.rs
@@ -3,6 +3,7 @@

 use dynamo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
 use validator::Validate;

 use crate::engines::ValidateRequest;
@@ -31,7 +32,7 @@ pub use delta::DeltaGenerator;
 /// - `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.
 /// - `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.
 ///   Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence.
-#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateChatCompletionRequest {
    #[serde(flatten)]
    pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,

--- a/lib/llm/src/protocols/openai/common_ext.rs
+++ b/lib/llm/src/protocols/openai/common_ext.rs
@@ -2,11 +2,12 @@
 // SPDX-License-Identifier: Apache-2.0
 use derive_builder::Builder;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
 use validator::Validate;

 /// Common extensions for OpenAI API requests that are not part of the standard OpenAI spec
 /// but are commonly needed across different request types.
-#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone, Default)]
+#[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone, Default)]
 pub struct CommonExt {
    /// If true, the model will ignore the end of string token and generate to max_tokens.
    /// This field can also be specified in nvext, but the root-level value takes precedence.

--- a/lib/llm/src/protocols/openai/completions.rs
+++ b/lib/llm/src/protocols/openai/completions.rs
@@ -4,6 +4,7 @@
 use derive_builder::Builder;
 use dynamo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
 use validator::Validate;

 use crate::engines::ValidateRequest;
@@ -23,7 +24,7 @@ mod delta;
 pub use aggregator::DeltaAggregator;
 pub use delta::DeltaGenerator;

-#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionRequest {
    #[serde(flatten)]
    pub inner: dynamo_async_openai::types::CreateCompletionRequest,
@@ -43,7 +44,7 @@ pub struct NvCreateCompletionRequest {
    pub unsupported_fields: std::collections::HashMap<String, serde_json::Value>,
 }

-#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateCompletionResponse {
    #[serde(flatten)]
    pub inner: dynamo_async_openai::types::CreateCompletionResponse,

--- a/lib/llm/src/protocols/openai/embeddings.rs
+++ b/lib/llm/src/protocols/openai/embeddings.rs
@@ -3,6 +3,7 @@

 use dynamo_runtime::protocols::annotated::AnnotationsProvider;
 use serde::{Deserialize, Serialize};
+use utoipa::ToSchema;
 use validator::Validate;

 mod aggregator;
@@ -11,7 +12,7 @@ mod nvext;
 pub use aggregator::DeltaAggregator;
 pub use nvext::{NvExt, NvExtProvider};

-#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingRequest {
    #[serde(flatten)]
    pub inner: dynamo_async_openai::types::CreateEmbeddingRequest,
@@ -26,7 +27,7 @@ pub struct NvCreateEmbeddingRequest {
 /// # Fields
 /// - `inner`: The base OpenAI unary chat completion response, embedded
 ///   using `serde(flatten)`.
-#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
+#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
 pub struct NvCreateEmbeddingResponse {
    #[serde(flatten)]
    pub inner: dynamo_async_openai::types::CreateEmbeddingResponse,