Unverified Commit f63e273c authored by smatta-star's avatar smatta-star Committed by GitHub
Browse files

feat: add auto-generated frontend OpenAPI spec and helper binary (#4802)


Signed-off-by: default avatarSatvik Matta <smatta@nvidia.com>
parent ac8c9023
...@@ -2575,6 +2575,7 @@ dependencies = [ ...@@ -2575,6 +2575,7 @@ dependencies = [
"tokio-util", "tokio-util",
"tracing", "tracing",
"url", "url",
"utoipa",
"uuid 1.18.1", "uuid 1.18.1",
] ]
...@@ -11781,6 +11782,8 @@ dependencies = [ ...@@ -11781,6 +11782,8 @@ dependencies = [
"quote", "quote",
"regex", "regex",
"syn 2.0.111", "syn 2.0.111",
"url",
"uuid 1.18.1",
] ]
[[package]] [[package]]
......
...@@ -181,6 +181,17 @@ Dynamo provides comprehensive benchmarking tools to evaluate and optimize your d ...@@ -181,6 +181,17 @@ Dynamo provides comprehensive benchmarking tools to evaluate and optimize your d
- **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf - **[Benchmarking Guide](docs/benchmarks/benchmarking.md)** – Compare deployment topologies (aggregated vs. disaggregated vs. vanilla vLLM) using AIPerf
- **[SLA-Driven Dynamo Deployments](docs/planner/sla_planner_quickstart.md)** – Optimize your deployment to meet SLA requirements - **[SLA-Driven Dynamo Deployments](docs/planner/sla_planner_quickstart.md)** – Optimize your deployment to meet SLA requirements
## Frontend OpenAPI specification
The OpenAI-compatible HTTP frontend exposes an OpenAPI 3 specification at `/openapi.json`.
To generate and persist the same specification without running the server (for example for CI, documentation, or NIM integration), run:
```bash
cargo run -p dynamo-llm --bin generate-frontend-openapi
```
This writes the current frontend spec to `docs/frontends/openapi.json` at the repository root.
# Engines # Engines
Dynamo is designed to be inference engine agnostic. To use any engine with Dynamo, NATS and etcd need to be installed, along with a Dynamo frontend (`python -m dynamo.frontend [--interactive]`). Dynamo is designed to be inference engine agnostic. To use any engine with Dynamo, NATS and etcd need to be installed, along with a Dynamo frontend (`python -m dynamo.frontend [--interactive]`).
......
{
"openapi": "3.1.0",
"info": {
"title": "NVIDIA Dynamo OpenAI Frontend",
"description": "OpenAI-compatible HTTP API for NVIDIA Dynamo.",
"contact": {
"name": "NVIDIA Dynamo",
"url": "https://github.com/ai-dynamo/dynamo"
},
"license": {
"name": "Apache-2.0"
},
"version": "0.7.0"
},
"servers": [
{
"url": "/",
"description": "Current server"
}
],
"paths": {
"/busy_threshold": {
"get": {
"summary": "Endpoint: /busy_threshold",
"description": "Endpoint for path: /busy_threshold",
"operationId": "get_busy_threshold",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/docs": {
"get": {
"summary": "API documentation",
"description": "Interactive API documentation powered by Swagger UI.",
"operationId": "get_docs",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/health": {
"get": {
"summary": "Health check",
"description": "Returns the health status of the service. Used for readiness probes.",
"operationId": "get_health",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/live": {
"get": {
"summary": "Liveness check",
"description": "Returns the liveness status of the service. Used for liveness probes.",
"operationId": "get_live",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/metrics": {
"get": {
"summary": "Prometheus metrics",
"description": "Returns Prometheus metrics for monitoring the service.",
"operationId": "get_metrics",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/openapi.json": {
"get": {
"summary": "OpenAPI specification",
"description": "Returns the OpenAPI 3.0 specification for this API in JSON format.",
"operationId": "get_openapi.json",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/v1/chat/completions": {
"post": {
"summary": "Create chat completion",
"description": "Creates a completion for a chat conversation. Supports both streaming and non-streaming modes. Compatible with OpenAI's chat completions API.",
"operationId": "post_v1_chat_completions",
"requestBody": {
"description": "Chat completion request with model, messages, and optional parameters",
"content": {
"application/json": {
"schema": {
"allOf": [
{
"$ref": "#/components/schemas/CreateChatCompletionRequest"
},
{
"$ref": "#/components/schemas/CommonExt"
},
{
"type": "object",
"properties": {
"chat_template_args": {
"type": [
"object",
"null"
],
"description": "Extra args to pass to the chat template rendering context",
"additionalProperties": {},
"propertyNames": {
"type": "string"
}
},
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
},
"additionalProperties": {
"description": "Catch-all for unsupported fields - checked during validation"
}
}
],
"description": "A request structure for creating a chat completion, extending OpenAI's\n`CreateChatCompletionRequest` with [`NvExt`] extensions and common fields.\n\n# Fields\n- `inner`: The base OpenAI chat completion request, embedded using `serde(flatten)`.\n- `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.\n- `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.\n Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence."
},
"example": {
"model": "Qwen/Qwen3-0.6B",
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello! Can you help me understand what this API does?"
}
],
"temperature": 0.7,
"max_tokens": 50,
"stream": false
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/v1/completions": {
"post": {
"summary": "Create text completion",
"description": "Creates a completion for a given prompt. Supports both streaming and non-streaming modes. Compatible with OpenAI's completions API.",
"operationId": "post_v1_completions",
"requestBody": {
"description": "Text completion request with model, prompt, and optional parameters",
"content": {
"application/json": {
"schema": {
"allOf": [
{
"$ref": "#/components/schemas/CreateCompletionRequest"
},
{
"$ref": "#/components/schemas/CommonExt"
},
{
"type": "object",
"properties": {
"metadata": {},
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
},
"additionalProperties": {
"description": "Catch-all for unsupported fields - checked during validation"
}
}
]
},
"example": {
"model": "Qwen/Qwen3-0.6B",
"prompt": "Once upon a time",
"temperature": 0.7,
"max_tokens": 50,
"stream": false
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/v1/embeddings": {
"post": {
"summary": "Create embeddings",
"description": "Creates an embedding vector representing the input text. Compatible with OpenAI's embeddings API.",
"operationId": "post_v1_embeddings",
"requestBody": {
"description": "Embedding request with model and input text",
"content": {
"application/json": {
"schema": {
"allOf": [
{
"$ref": "#/components/schemas/CreateEmbeddingRequest"
},
{
"type": "object",
"properties": {
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
}
}
]
},
"example": {
"model": "Qwen/Qwen3-Embedding-4B",
"input": "The quick brown fox jumps over the lazy dog"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/v1/models": {
"get": {
"summary": "List available models",
"description": "Lists the currently available models and provides basic information about each.",
"operationId": "get_v1_models",
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
},
"/v1/responses": {
"post": {
"summary": "Create response",
"description": "Creates a response for a given input. Compatible with OpenAI's responses API.",
"operationId": "post_v1_responses",
"requestBody": {
"description": "Response request with model and input",
"content": {
"application/json": {
"schema": {
"allOf": [
{
"$ref": "#/components/schemas/CreateResponse",
"description": "Flattened CreateResponse fields (model, input, temperature, etc.)"
},
{
"type": "object",
"properties": {
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
}
}
]
},
"example": {
"model": "Qwen/Qwen3-0.6B",
"input": "What is the capital of France?"
}
}
},
"required": true
},
"responses": {
"200": {
"description": "Successful response"
},
"400": {
"description": "Bad request - invalid input"
},
"404": {
"description": "Model not found"
},
"503": {
"description": "Service unavailable"
}
}
}
}
},
"components": {
"schemas": {
"AudioUrl": {
"type": "object",
"required": [
"url"
],
"properties": {
"url": {
"type": "string",
"format": "uri",
"description": "URL of the audio file"
},
"uuid": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Optional unique identifier for the audio."
}
}
},
"ChatCompletionAudio": {
"type": "object",
"required": [
"voice",
"format"
],
"properties": {
"format": {
"$ref": "#/components/schemas/ChatCompletionAudioFormat",
"description": "Specifies the output audio format. Must be one of `wav`, `mp3`, `flac`, `opus`, or `pcm16`."
},
"voice": {
"$ref": "#/components/schemas/ChatCompletionAudioVoice",
"description": "The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive)."
}
}
},
"ChatCompletionAudioFormat": {
"type": "string",
"enum": [
"wav",
"mp3",
"flac",
"opus",
"pcm16"
]
},
"ChatCompletionAudioVoice": {
"type": "string",
"enum": [
"alloy",
"ash",
"ballad",
"coral",
"echo",
"sage",
"shimmer",
"verse"
]
},
"ChatCompletionFunctionCall": {
"oneOf": [
{
"type": "string",
"description": "The model does not call a function, and responds to the end-user.",
"enum": [
"none"
]
},
{
"type": "string",
"description": "The model can pick between an end-user or calling a function.",
"enum": [
"auto"
]
},
{
"type": "object",
"description": "Forces the model to call the specified function.",
"required": [
"Function"
],
"properties": {
"Function": {
"type": "object",
"description": "Forces the model to call the specified function.",
"required": [
"name"
],
"properties": {
"name": {
"type": "string"
}
}
}
}
}
]
},
"ChatCompletionFunctions": {
"type": "object",
"required": [
"name",
"parameters"
],
"properties": {
"description": {
"type": [
"string",
"null"
],
"description": "A description of what the function does, used by the model to choose when and how to call the function."
},
"name": {
"type": "string",
"description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
},
"parameters": {
"description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.\n\nOmitting `parameters` defines a function with an empty parameter list."
}
},
"deprecated": true
},
"ChatCompletionMessageToolCall": {
"type": "object",
"required": [
"id",
"type",
"function"
],
"properties": {
"function": {
"$ref": "#/components/schemas/FunctionCall",
"description": "The function that the model called."
},
"id": {
"type": "string",
"description": "The ID of the tool call."
},
"type": {
"$ref": "#/components/schemas/ChatCompletionToolType",
"description": "The type of the tool. Currently, only `function` is supported."
}
}
},
"ChatCompletionModalities": {
"type": "string",
"description": "Output types that you would like the model to generate for this request.\n\nMost models are capable of generating text, which is the default: `[\"text\"]`\n\nThe `gpt-4o-audio-preview` model can also be used to [generate\naudio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `[\"text\", \"audio\"]`",
"enum": [
"text",
"audio"
]
},
"ChatCompletionNamedToolChoice": {
"type": "object",
"description": "Specifies a tool the model should use. Use to force the model to call a specific function.",
"required": [
"type",
"function"
],
"properties": {
"function": {
"$ref": "#/components/schemas/FunctionName"
},
"type": {
"$ref": "#/components/schemas/ChatCompletionToolType",
"description": "The type of the tool. Currently, only `function` is supported."
}
}
},
"ChatCompletionRequestAssistantMessage": {
"type": "object",
"properties": {
"audio": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageAudio",
"description": "Data about a previous audio response from the model.\n[Learn more](https://platform.openai.com/docs/guides/audio)."
}
]
},
"content": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageContent",
"description": "The contents of the assistant message. Required unless `tool_calls` or `function_call` is specified."
}
]
},
"function_call": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/FunctionCall",
"description": "Deprecated and replaced by `tool_calls`. The name and arguments of a function that should be called, as generated by the model."
}
]
},
"name": {
"type": [
"string",
"null"
],
"description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
},
"refusal": {
"type": [
"string",
"null"
],
"description": "The refusal message by the assistant."
},
"tool_calls": {
"type": [
"array",
"null"
],
"items": {
"$ref": "#/components/schemas/ChatCompletionMessageToolCall"
}
}
}
},
"ChatCompletionRequestAssistantMessageAudio": {
"type": "object",
"required": [
"id"
],
"properties": {
"id": {
"type": "string",
"description": "Unique identifier for a previous audio response from the model."
}
}
},
"ChatCompletionRequestAssistantMessageContent": {
"oneOf": [
{
"type": "string",
"description": "The text contents of the message."
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessageContentPart"
},
"description": "An array of content parts with a defined type. Can be one or more of type `text`, or exactly one of type `refusal`."
}
]
},
"ChatCompletionRequestAssistantMessageContentPart": {
"oneOf": [
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartRefusal"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"refusal"
]
}
}
}
]
}
]
},
"ChatCompletionRequestDeveloperMessage": {
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"$ref": "#/components/schemas/ChatCompletionRequestDeveloperMessageContent",
"description": "The contents of the developer message."
},
"name": {
"type": [
"string",
"null"
],
"description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
}
}
},
"ChatCompletionRequestDeveloperMessageContent": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
}
}
]
},
"ChatCompletionRequestFunctionMessage": {
"type": "object",
"required": [
"name"
],
"properties": {
"content": {
"type": [
"string",
"null"
],
"description": "The return value from the function call, to return to the model."
},
"name": {
"type": "string",
"description": "The name of the function to call."
}
}
},
"ChatCompletionRequestMessage": {
"oneOf": [
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestDeveloperMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"developer"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestSystemMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"system"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestUserMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"user"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestAssistantMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"assistant"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestToolMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"tool"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestFunctionMessage"
},
{
"type": "object",
"required": [
"role"
],
"properties": {
"role": {
"type": "string",
"enum": [
"function"
]
}
}
}
]
}
]
},
"ChatCompletionRequestMessageContentPartAudio": {
"type": "object",
"description": "Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).",
"required": [
"input_audio"
],
"properties": {
"input_audio": {
"$ref": "#/components/schemas/InputAudio"
}
}
},
"ChatCompletionRequestMessageContentPartAudioUrl": {
"type": "object",
"required": [
"audio_url"
],
"properties": {
"audio_url": {
"$ref": "#/components/schemas/AudioUrl"
}
}
},
"ChatCompletionRequestMessageContentPartImage": {
"type": "object",
"required": [
"image_url"
],
"properties": {
"image_url": {
"$ref": "#/components/schemas/ImageUrl"
}
}
},
"ChatCompletionRequestMessageContentPartRefusal": {
"type": "object",
"required": [
"refusal"
],
"properties": {
"refusal": {
"type": "string",
"description": "The refusal message generated by the model."
}
}
},
"ChatCompletionRequestMessageContentPartText": {
"type": "object",
"required": [
"text"
],
"properties": {
"text": {
"type": "string"
}
}
},
"ChatCompletionRequestMessageContentPartVideo": {
"type": "object",
"required": [
"video_url"
],
"properties": {
"video_url": {
"$ref": "#/components/schemas/VideoUrl"
}
}
},
"ChatCompletionRequestSystemMessage": {
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"$ref": "#/components/schemas/ChatCompletionRequestSystemMessageContent",
"description": "The contents of the system message."
},
"name": {
"type": [
"string",
"null"
],
"description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
}
}
},
"ChatCompletionRequestSystemMessageContent": {
"oneOf": [
{
"type": "string",
"description": "The text contents of the system message."
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestSystemMessageContentPart"
},
"description": "An array of content parts with a defined type. For system messages, only type `text` is supported."
}
]
},
"ChatCompletionRequestSystemMessageContentPart": {
"oneOf": [
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
}
]
}
]
},
"ChatCompletionRequestToolMessage": {
"type": "object",
"description": "Tool message",
"required": [
"content",
"tool_call_id"
],
"properties": {
"content": {
"$ref": "#/components/schemas/ChatCompletionRequestToolMessageContent",
"description": "The contents of the tool message."
},
"tool_call_id": {
"type": "string"
}
}
},
"ChatCompletionRequestToolMessageContent": {
"oneOf": [
{
"type": "string",
"description": "The text contents of the tool message."
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestToolMessageContentPart"
},
"description": "An array of content parts with a defined type. For tool messages, only type `text` is supported."
}
]
},
"ChatCompletionRequestToolMessageContentPart": {
"oneOf": [
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
}
]
}
]
},
"ChatCompletionRequestUserMessage": {
"type": "object",
"required": [
"content"
],
"properties": {
"content": {
"$ref": "#/components/schemas/ChatCompletionRequestUserMessageContent",
"description": "The contents of the user message."
},
"name": {
"type": [
"string",
"null"
],
"description": "An optional name for the participant. Provides the model information to differentiate between participants of the same role."
}
}
},
"ChatCompletionRequestUserMessageContent": {
"oneOf": [
{
"type": "string",
"description": "The text contents of the message."
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestUserMessageContentPart"
},
"description": "An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text, image, or audio inputs."
}
]
},
"ChatCompletionRequestUserMessageContentPart": {
"oneOf": [
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartImage"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"image_url"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartVideo"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"video_url"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartAudioUrl"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"audio_url"
]
}
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartAudio"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"input_audio"
]
}
}
}
]
}
]
},
"ChatCompletionStreamOptions": {
"type": "object",
"description": "Options for streaming response. Only set this when you set `stream: true`.",
"required": [
"include_usage"
],
"properties": {
"include_usage": {
"type": "boolean",
"description": "If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value."
}
}
},
"ChatCompletionTool": {
"type": "object",
"required": [
"type",
"function"
],
"properties": {
"function": {
"$ref": "#/components/schemas/FunctionObject"
},
"type": {
"$ref": "#/components/schemas/ChatCompletionToolType"
}
}
},
"ChatCompletionToolChoiceOption": {
"oneOf": [
{
"type": "string",
"enum": [
"none"
]
},
{
"type": "string",
"enum": [
"auto"
]
},
{
"type": "string",
"enum": [
"required"
]
},
{
"type": "object",
"required": [
"named"
],
"properties": {
"named": {
"$ref": "#/components/schemas/ChatCompletionNamedToolChoice"
}
}
}
],
"description": "Controls which (if any) tool is called by the model.\n`none` means the model will not call any tool and instead generates a message.\n`auto` means the model can pick between generating a message or calling one or more tools.\n`required` means the model must call one or more tools.\nSpecifying a particular tool via `{\"type\": \"function\", \"function\": {\"name\": \"my_function\"}}` forces the model to call that tool.\n\n`none` is the default when no tools are present. `auto` is the default if tools are present."
},
"ChatCompletionToolType": {
"type": "string",
"enum": [
"function"
]
},
"CommonExt": {
"type": "object",
"description": "Common extensions for OpenAI API requests that are not part of the standard OpenAI spec\nbut are commonly needed across different request types.",
"properties": {
"guided_choice": {
"type": [
"array",
"null"
],
"items": {
"type": "string"
},
"description": "If specified, the output will be exactly one of the choices."
},
"guided_decoding_backend": {
"type": [
"string",
"null"
],
"description": "If specified, the backend to use for guided decoding, can be backends like xgrammar or custom guided decoding backend"
},
"guided_grammar": {
"type": [
"string",
"null"
],
"description": "If specified, the output will follow the context-free grammar. Can be a string or null."
},
"guided_json": {
"description": "Guided Decoding Options\nIf specified, the output will be a JSON object. Can be a string, an object, or null."
},
"guided_regex": {
"type": [
"string",
"null"
],
"description": "If specified, the output will follow the regex pattern. Can be a string or null."
},
"guided_whitespace_pattern": {
"type": [
"string",
"null"
],
"description": "If specified, the output will follow the whitespace pattern. Can be a string or null."
},
"ignore_eos": {
"type": [
"boolean",
"null"
],
"description": "If true, the model will ignore the end of string token and generate to max_tokens.\nThis field can also be specified in nvext, but the root-level value takes precedence."
},
"include_stop_str_in_output": {
"type": [
"boolean",
"null"
],
"description": "include_stop_str_in_output"
},
"min_p": {
"type": [
"number",
"null"
],
"format": "float",
"description": "Relative probability floor"
},
"min_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "The minimum number of tokens to generate.\nThis is a common parameter needed across different request types.",
"minimum": 0
},
"repetition_penalty": {
"type": [
"number",
"null"
],
"format": "float",
"description": "How much to penalize tokens based on how frequently they occur in the text.\nA value of 1 means no penalty, while values larger than 1 discourage and values smaller encourage."
},
"skip_special_tokens": {
"type": [
"boolean",
"null"
],
"description": "Whether to skip special tokens in the decoded output.\nWhen true, special tokens (like EOS, BOS, PAD) are removed from the output text.\nWhen false, special tokens are included in the output text.\nDefaults to false if not specified."
},
"top_k": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "Integer that controls the number of top tokens to consider. Set to -1 to consider all tokens."
}
}
},
"CreateChatCompletionRequest": {
"type": "object",
"required": [
"messages",
"model"
],
"properties": {
"audio": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionAudio",
"description": "Parameters for audio output. Required when audio output is requested with `modalities: [\"audio\"]`. [Learn more](https://platform.openai.com/docs/guides/audio)."
}
]
},
"frequency_penalty": {
"type": [
"number",
"null"
],
"format": "float",
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim."
},
"function_call": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionFunctionCall",
"description": "Deprecated in favor of `tool_choice`.\n\nControls which (if any) function is called by the model.\n`none` means the model will not call a function and instead generates a message.\n`auto` means the model can pick between generating a message or calling a function.\nSpecifying a particular function via `{\"name\": \"my_function\"}` forces the model to call that function.\n\n`none` is the default when no functions are present. `auto` is the default if functions are present."
}
]
},
"functions": {
"type": [
"array",
"null"
],
"items": {
"$ref": "#/components/schemas/ChatCompletionFunctions"
},
"description": "Deprecated in favor of `tools`.\n\nA list of functions the model may generate JSON inputs for.",
"deprecated": true
},
"logit_bias": {
"type": [
"object",
"null"
],
"description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a json object that maps tokens (specified by their token ID in the tokenizer) to an associated bias value from -100 to 100.\nMathematically, the bias is added to the logits generated by the model prior to sampling.\nThe exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection;\nvalues like -100 or 100 should result in a ban or exclusive selection of the relevant token.",
"additionalProperties": {},
"propertyNames": {
"type": "string"
}
},
"logprobs": {
"type": [
"boolean",
"null"
],
"description": "Whether to return log probabilities of the output tokens or not. If true, returns the log probabilities of each output token returned in the `content` of `message`."
},
"max_completion_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and [reasoning tokens](https://platform.openai.com/docs/guides/reasoning).",
"minimum": 0
},
"max_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the chat completion.\n\nThis value can be used to control [costs](https://openai.com/api/pricing/) for text generated via API.\nThis value is now deprecated in favor of `max_completion_tokens`, and is\nnot compatible with [o1 series models](https://platform.openai.com/docs/guides/reasoning).",
"deprecated": true,
"minimum": 0
},
"messages": {
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestMessage"
},
"description": "A list of messages comprising the conversation so far. Depending on the [model](https://platform.openai.com/docs/models) you use, different message types (modalities) are supported, like [text](https://platform.openai.com/docs/guides/text-generation), [images](https://platform.openai.com/docs/guides/vision), and [audio](https://platform.openai.com/docs/guides/audio)."
},
"metadata": {
"description": "Developer-defined tags and values used for filtering completions in the [dashboard](https://platform.openai.com/chat-completions)."
},
"mm_processor_kwargs": {
"description": "Multimodal processor configuration parameters"
},
"modalities": {
"type": [
"array",
"null"
],
"items": {
"$ref": "#/components/schemas/ChatCompletionModalities"
}
},
"model": {
"type": "string",
"description": "ID of the model to use.\nSee the [model endpoint compatibility](https://platform.openai.com/docs/models#model-endpoint-compatibility) table for details on which models work with the Chat API."
},
"n": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "How many chat completion choices to generate for each input message. Note that you will be charged based on the number of generated tokens across all of the choices. Keep `n` as `1` to minimize costs.",
"minimum": 0
},
"parallel_tool_calls": {
"type": [
"boolean",
"null"
],
"description": "Whether to enable [parallel function calling](https://platform.openai.com/docs/guides/function-calling/parallel-function-calling) during tool use."
},
"prediction": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/PredictionContent",
"description": "Configuration for a [Predicted Output](https://platform.openai.com/docs/guides/predicted-outputs),which can greatly improve response times when large parts of the model response are known ahead of time. This is most common when you are regenerating a file with only minor changes to most of the content."
}
]
},
"presence_penalty": {
"type": [
"number",
"null"
],
"format": "float",
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics."
},
"reasoning_effort": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ReasoningEffort",
"description": "**o1 models only**\n\nConstrains effort on reasoning for\n[reasoning models](https://platform.openai.com/docs/guides/reasoning).\n\nCurrently supported values are `low`, `medium`, and `high`. Reducing\n\nreasoning effort can result in faster responses and fewer tokens\nused on reasoning in a response."
}
]
},
"response_format": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ResponseFormat",
"description": "An object specifying the format that the model must output. Compatible with [GPT-4o](https://platform.openai.com/docs/models/gpt-4o), [GPT-4o mini](https://platform.openai.com/docs/models/gpt-4o-mini), [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.\n\nSetting to `{ \"type\": \"json_schema\", \"json_schema\": {...} }` enables Structured Outputs which guarantees the model will match your supplied JSON schema. Learn more in the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs).\n\nSetting to `{ \"type\": \"json_object\" }` enables JSON mode, which guarantees the message the model generates is valid JSON.\n\n**Important:** when using JSON mode, you **must** also instruct the model to produce JSON yourself via a system or user message. Without this, the model may generate an unending stream of whitespace until the generation reaches the token limit, resulting in a long-running and seemingly \"stuck\" request. Also note that the message content may be partially cut off if `finish_reason=\"length\"`, which indicates the generation exceeded `max_tokens` or the conversation exceeded the max context length."
}
]
},
"seed": {
"type": [
"integer",
"null"
],
"format": "int64",
"description": " This feature is in Beta.\nIf specified, our system will make a best effort to sample deterministically, such that repeated requests\nwith the same `seed` and parameters should return the same result.\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend."
},
"service_tier": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ServiceTier",
"description": "Specifies the latency tier to use for processing the request. This parameter is relevant for customers subscribed to the scale tier service:\n- If set to 'auto', the system will utilize scale tier credits until they are exhausted.\n- If set to 'default', the request will be processed using the default service tier with a lower uptime SLA and no latency guarentee.\n- When not set, the default behavior is 'auto'.\n\nWhen this parameter is set, the response body will include the `service_tier` utilized."
}
]
},
"stop": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/Stop",
"description": "Up to 4 sequences where the API will stop generating further tokens."
}
]
},
"store": {
"type": [
"boolean",
"null"
],
"description": "Whether or not to store the output of this chat completion request\n\nfor use in our [model distillation](https://platform.openai.com/docs/guides/distillation) or [evals](https://platform.openai.com/docs/guides/evals) products."
},
"stream": {
"type": [
"boolean",
"null"
],
"description": "If set, partial message deltas will be sent, like in ChatGPT.\nTokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)\nas they become available, with the stream terminated by a `data: [DONE]` message. [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions)."
},
"stream_options": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionStreamOptions"
}
]
},
"temperature": {
"type": [
"number",
"null"
],
"format": "float",
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random,\nwhile lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both."
},
"tool_choice": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionToolChoiceOption"
}
]
},
"tools": {
"type": [
"array",
"null"
],
"items": {
"$ref": "#/components/schemas/ChatCompletionTool"
},
"description": "A list of tools the model may call. Currently, only functions are supported as a tool.\nUse this to provide a list of functions the model may generate JSON inputs for. A max of 128 functions are supported."
},
"top_logprobs": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "An integer between 0 and 20 specifying the number of most likely tokens to return at each token position, each with an associated log probability. `logprobs` must be set to `true` if this parameter is used.",
"minimum": 0
},
"top_p": {
"type": [
"number",
"null"
],
"format": "float",
"description": "An alternative to sampling with temperature, called nucleus sampling,\nwhere the model considers the results of the tokens with top_p probability mass.\nSo 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\n We generally recommend altering this or `temperature` but not both."
},
"user": {
"type": [
"string",
"null"
],
"description": "A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/guides/safety-best-practices#end-user-ids)."
},
"web_search_options": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/WebSearchOptions",
"description": "This tool searches the web for relevant results to use in a response.\nLearn more about the [web search tool](https://platform.openai.com/docs/guides/tools-web-search?api-mode=chat)."
}
]
}
}
},
"CreateCompletionRequest": {
"type": "object",
"required": [
"model",
"prompt"
],
"properties": {
"best_of": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "Generates `best_of` completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.\n\nWhen used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.\n\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.",
"minimum": 0
},
"echo": {
"type": [
"boolean",
"null"
],
"description": "Echo back the prompt in addition to the completion"
},
"frequency_penalty": {
"type": [
"number",
"null"
],
"format": "float",
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.\n\n[See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)"
},
"logit_bias": {
"type": [
"object",
"null"
],
"description": "Modify the likelihood of specified tokens appearing in the completion.\n\nAccepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.\n\nAs an example, you can pass `{\"50256\": -100}` to prevent the <|endoftext|> token from being generated.",
"additionalProperties": {},
"propertyNames": {
"type": "string"
}
},
"logprobs": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "Include the log probabilities on the `logprobs` most likely output tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.\n\nThe maximum value for `logprobs` is 5.",
"minimum": 0
},
"max_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "The maximum number of [tokens](https://platform.openai.com/tokenizer) that can be generated in the completion.\n\nThe token count of your prompt plus `max_tokens` cannot exceed the model's context length. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens.",
"minimum": 0
},
"model": {
"type": "string",
"description": "ID of the model to use. You can use the [List models](https://platform.openai.com/docs/api-reference/models/list) API to see all of your available models, or see our [Model overview](https://platform.openai.com/docs/models/overview) for descriptions of them."
},
"n": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "How many completions to generate for each prompt.\n**Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.\n",
"minimum": 0
},
"presence_penalty": {
"type": [
"number",
"null"
],
"format": "float",
"description": "Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.\n\n[See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)"
},
"prompt": {
"$ref": "#/components/schemas/Prompt",
"description": "The prompt(s) to generate completions for, encoded as a string, array of strings, array of tokens, or array of token arrays.\n\nNote that <|endoftext|> is the document separator that the model sees during training, so if a prompt is not specified the model will generate as if from the beginning of a new document."
},
"seed": {
"type": [
"integer",
"null"
],
"format": "int64",
"description": "If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same `seed` and parameters should return the same result.\n\nDeterminism is not guaranteed, and you should refer to the `system_fingerprint` response parameter to monitor changes in the backend."
},
"stop": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/Stop",
"description": "Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence."
}
]
},
"stream": {
"type": [
"boolean",
"null"
],
"description": "Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)\nas they become available, with the stream terminated by a `data: [DONE]` message."
},
"stream_options": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ChatCompletionStreamOptions"
}
]
},
"suffix": {
"type": [
"string",
"null"
],
"description": "The suffix that comes after a completion of inserted text.\n\nThis parameter is only supported for `gpt-3.5-turbo-instruct`."
},
"temperature": {
"type": [
"number",
"null"
],
"format": "float",
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.\n\nWe generally recommend altering this or `top_p` but not both."
},
"top_p": {
"type": [
"number",
"null"
],
"format": "float",
"description": "An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.\n\n We generally recommend altering this or `temperature` but not both."
},
"user": {
"type": [
"string",
"null"
],
"description": "A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids)."
}
}
},
"CreateEmbeddingRequest": {
"type": "object",
"required": [
"model",
"input"
],
"properties": {
"dimensions": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "The number of dimensions the resulting output embeddings should have. Only supported in `text-embedding-3` and later models.",
"minimum": 0
},
"encoding_format": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/EncodingFormat",
"description": "The format to return the embeddings in. Can be either `float` or [`base64`](https://pypi.org/project/pybase64/). Defaults to float"
}
]
},
"input": {
"$ref": "#/components/schemas/EmbeddingInput",
"description": "Input text to embed, encoded as a string or array of tokens. To embed multiple inputs in a single request, pass an array of strings or array of token arrays. The input must not exceed the max input tokens for the model (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048 dimensions or less. [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken) for counting tokens."
},
"model": {
"type": "string",
"description": "ID of the model to use. You can use the\n[List models](https://platform.openai.com/docs/api-reference/models/list)\nAPI to see all of your available models, or see our\n[Model overview](https://platform.openai.com/docs/models/overview)\nfor descriptions of them."
},
"user": {
"type": [
"string",
"null"
],
"description": "A unique identifier representing your end-user, which will help OpenAI\n to monitor and detect abuse. [Learn more](https://platform.openai.com/docs/usage-policies/end-user-ids)."
}
}
},
"CreateResponse": {
"type": "object",
"description": "Builder for a Responses API request.",
"required": [
"input",
"model"
],
"properties": {
"background": {
"type": [
"boolean",
"null"
],
"description": "Whether to run the model response in the background.\nboolean or null."
},
"include": {
"type": [
"array",
"null"
],
"items": {
"type": "string"
},
"description": "Specify additional output data to include in the model response.\n\nSupported values:\n- `file_search_call.results`\n Include the search results of the file search tool call.\n- `message.input_image.image_url`\n Include image URLs from the input message.\n- `computer_call_output.output.image_url`\n Include image URLs from the computer call output.\n- `reasoning.encrypted_content`\n Include an encrypted version of reasoning tokens in reasoning item outputs.\n This enables reasoning items to be used in multi-turn conversations when\n using the Responses API statelessly (for example, when the `store` parameter\n is set to `false`, or when an organization is enrolled in the zero-data-\n retention program).\n\nIf `None`, no additional data is returned."
},
"input": {
"type": "object",
"description": "Text, image, or file inputs to the model, used to generate a response.\nUsing value_type to prevent deep schema recursion from Input's nested content types."
},
"instructions": {
"type": [
"string",
"null"
],
"description": "Inserts a system (or developer) message as the first item in the model's context.\n\nWhen using along with previous_response_id, the instructions from a previous response will\nnot be carried over to the next response. This makes it simple to swap out system\n(or developer) messages in new responses."
},
"max_output_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "An upper bound for the number of tokens that can be generated for a\nresponse, including visible output tokens and reasoning tokens.",
"minimum": 0
},
"max_tool_calls": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "The maximum number of total calls to built-in tools that can be processed in a response.\nThis maximum number applies across all built-in tool calls, not per individual tool.\nAny further attempts to call a tool by the model will be ignored.",
"minimum": 0
},
"metadata": {
"description": "Arbitrary JSON metadata used as a passthrough parameter"
},
"model": {
"type": "string",
"description": "Model ID used to generate the response, like `gpt-4o`.\nOpenAI offers a wide range of models with different capabilities,\nperformance characteristics, and price points."
},
"parallel_tool_calls": {
"type": [
"boolean",
"null"
],
"description": "Whether to allow the model to run tool calls in parallel."
},
"previous_response_id": {
"type": [
"string",
"null"
],
"description": "The unique ID of the previous response to the model. Use this to create\nmulti-turn conversations."
},
"prompt": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/PromptConfig",
"description": "Reference to a prompt template and its variables."
}
]
},
"reasoning": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ReasoningConfig",
"description": "**o-series models only**: Configuration options for reasoning models."
}
]
},
"service_tier": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ServiceTier",
"description": "Specifies the latency tier to use for processing the request.\n\nThis parameter is relevant for customers subscribed to the Scale tier service.\n\nSupported values:\n- `auto`\n - If the Project is Scale tier enabled, the system will utilize Scale tier credits until\n they are exhausted.\n - If the Project is not Scale tier enabled, the request will be processed using the\n default service tier with a lower uptime SLA and no latency guarantee.\n- `default`\n The request will be processed using the default service tier with a lower uptime SLA and\n no latency guarantee.\n- `flex`\n The request will be processed with the Flex Processing service tier. Learn more.\n\nWhen not set, the default behavior is `auto`.\n\nWhen this parameter is set, the response body will include the `service_tier` utilized."
}
]
},
"store": {
"type": [
"boolean",
"null"
],
"description": "Whether to store the generated model response for later retrieval via API."
},
"stream": {
"type": [
"boolean",
"null"
],
"description": "If set to true, the model response data will be streamed to the client as it is\ngenerated using server-sent events."
},
"temperature": {
"type": [
"number",
"null"
],
"format": "float",
"description": "What sampling temperature to use, between 0 and 2. Higher values like 0.8\nwill make the output more random, while lower values like 0.2 will make it\nmore focused and deterministic. We generally recommend altering this or\n`top_p` but not both."
},
"text": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/TextConfig",
"description": "Configuration options for a text response from the model. Can be plain text\nor structured JSON data."
}
]
},
"tool_choice": {
"type": "object",
"description": "How the model should select which tool (or tools) to use when generating\na response."
},
"tools": {
"type": "array",
"items": {
"type": "object"
},
"description": "An array of tools the model may call while generating a response.\nCan include built-in tools (file_search, web_search_preview,\ncomputer_use_preview) or custom function definitions."
},
"top_logprobs": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "An integer between 0 and 20 specifying the number of most likely tokens to return\nat each token position, each with an associated log probability.",
"minimum": 0
},
"top_p": {
"type": [
"number",
"null"
],
"format": "float",
"description": "An alternative to sampling with temperature, called nucleus sampling,\nwhere the model considers the results of the tokens with top_p probability\nmass. So 0.1 means only the tokens comprising the top 10% probability mass\nare considered. We generally recommend altering this or `temperature` but\nnot both."
},
"truncation": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/Truncation",
"description": "The truncation strategy to use for the model response:\n- `auto`: drop items in the middle to fit context window.\n- `disabled`: error if exceeding context window."
}
]
},
"user": {
"type": [
"string",
"null"
],
"description": "A unique identifier representing your end-user, which can help OpenAI to\nmonitor and detect abuse."
}
}
},
"EmbeddingInput": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "array",
"items": {
"type": "integer",
"format": "int32",
"minimum": 0
}
},
{
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer",
"format": "int32",
"minimum": 0
}
}
}
]
},
"EncodingFormat": {
"type": "string",
"enum": [
"float",
"base64"
]
},
"FunctionCall": {
"type": "object",
"description": "The name and arguments of a function that should be called, as generated by the model.",
"required": [
"name",
"arguments"
],
"properties": {
"arguments": {
"type": "string",
"description": "The arguments to call the function with, as generated by the model in JSON format. Note that the model does not always generate valid JSON, and may hallucinate parameters not defined by your function schema. Validate the arguments in your code before calling your function."
},
"name": {
"type": "string",
"description": "The name of the function to call."
}
}
},
"FunctionName": {
"type": "object",
"required": [
"name"
],
"properties": {
"name": {
"type": "string",
"description": "The name of the function to call."
}
}
},
"FunctionObject": {
"type": "object",
"required": [
"name"
],
"properties": {
"description": {
"type": [
"string",
"null"
],
"description": "A description of what the function does, used by the model to choose when and how to call the function."
},
"name": {
"type": "string",
"description": "The name of the function to be called. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
},
"parameters": {
"description": "The parameters the functions accepts, described as a JSON Schema object. See the [guide](https://platform.openai.com/docs/guides/text-generation/function-calling) for examples, and the [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for documentation about the format.\n\nOmitting `parameters` defines a function with an empty parameter list."
},
"strict": {
"type": [
"boolean",
"null"
],
"description": "Whether to enable strict schema adherence when generating the function call. If set to true, the model will follow the exact schema defined in the `parameters` field. Only a subset of JSON Schema is supported when `strict` is `true`. Learn more about Structured Outputs in the [function calling guide](https://platform.openai.com/docs/guides/function-calling)."
}
}
},
"ImageDetail": {
"type": "string",
"enum": [
"auto",
"low",
"high"
]
},
"ImageUrl": {
"type": "object",
"required": [
"url"
],
"properties": {
"detail": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ImageDetail",
"description": "Specifies the detail level of the image. Learn more in the [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding)."
}
]
},
"url": {
"type": "string",
"format": "uri",
"description": "Either a URL of the image or the base64 encoded image data."
},
"uuid": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Optional unique identifier for the image."
}
}
},
"InputAudio": {
"type": "object",
"required": [
"data",
"format"
],
"properties": {
"data": {
"type": "string",
"description": "Base64 encoded audio data."
},
"format": {
"$ref": "#/components/schemas/InputAudioFormat",
"description": "The format of the encoded audio data. Currently supports \"wav\" and \"mp3\"."
}
}
},
"InputAudioFormat": {
"type": "string",
"enum": [
"wav",
"mp3"
]
},
"NvCreateChatCompletionRequest": {
"allOf": [
{
"$ref": "#/components/schemas/CreateChatCompletionRequest"
},
{
"$ref": "#/components/schemas/CommonExt"
},
{
"type": "object",
"properties": {
"chat_template_args": {
"type": [
"object",
"null"
],
"description": "Extra args to pass to the chat template rendering context",
"additionalProperties": {},
"propertyNames": {
"type": "string"
}
},
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
},
"additionalProperties": {
"description": "Catch-all for unsupported fields - checked during validation"
}
}
],
"description": "A request structure for creating a chat completion, extending OpenAI's\n`CreateChatCompletionRequest` with [`NvExt`] extensions and common fields.\n\n# Fields\n- `inner`: The base OpenAI chat completion request, embedded using `serde(flatten)`.\n- `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.\n- `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.\n Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence."
},
"NvCreateCompletionRequest": {
"allOf": [
{
"$ref": "#/components/schemas/CreateCompletionRequest"
},
{
"$ref": "#/components/schemas/CommonExt"
},
{
"type": "object",
"properties": {
"metadata": {},
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
},
"additionalProperties": {
"description": "Catch-all for unsupported fields - checked during validation"
}
}
]
},
"NvCreateEmbeddingRequest": {
"allOf": [
{
"$ref": "#/components/schemas/CreateEmbeddingRequest"
},
{
"type": "object",
"properties": {
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
}
}
]
},
"NvCreateResponse": {
"allOf": [
{
"$ref": "#/components/schemas/CreateResponse",
"description": "Flattened CreateResponse fields (model, input, temperature, etc.)"
},
{
"type": "object",
"properties": {
"nvext": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/NvExt"
}
]
}
}
}
]
},
"NvExt": {
"type": "object",
"description": "NVIDIA LLM extensions to the OpenAI API",
"properties": {
"annotations": {
"type": [
"array",
"null"
],
"items": {
"type": "string"
},
"description": "Annotations\nUser requests triggers which result in the request issue back out-of-band information in the SSE\nstream using the `event:` field."
},
"backend_instance_id": {
"type": [
"integer",
"null"
],
"format": "int64",
"description": "Targeted backend instance ID for the request\nIf set, the request will be routed to backend instance with the given ID.\nIf not set, the request will be routed to the best matching instance.",
"minimum": 0
},
"extra_fields": {
"type": [
"array",
"null"
],
"items": {
"type": "string"
},
"description": "Extra fields to be included in the response's nvext\nThis is a list of field names that should be populated in the response\nSupported fields: \"worker_id\""
},
"greed_sampling": {
"type": [
"boolean",
"null"
],
"description": "If true, sampling will be forced to be greedy.\nThe backend is responsible for selecting the correct backend-specific options to\nimplement this."
},
"max_thinking_tokens": {
"type": [
"integer",
"null"
],
"format": "int32",
"description": "Maximum number of thinking tokens allowed\nNOTE: Currently passed through to backends as a no-op for future implementation",
"minimum": 0
},
"token_data": {
"type": [
"array",
"null"
],
"items": {
"type": "integer",
"format": "int32",
"minimum": 0
},
"description": "Pre-tokenized data to use instead of tokenizing the prompt\nIf provided along with backend_instance_id, these tokens will be used directly\nand tokenization will be skipped."
},
"use_raw_prompt": {
"type": [
"boolean",
"null"
],
"description": "If true, the preproessor will try to bypass the prompt template and pass the prompt directly to\nto the tokenizer."
}
}
},
"PredictionContent": {
"oneOf": [
{
"type": "object",
"description": "The type of the predicted content you want to provide. This type is\ncurrently always `content`.",
"required": [
"content",
"type"
],
"properties": {
"content": {
"$ref": "#/components/schemas/PredictionContentContent",
"description": "The type of the predicted content you want to provide. This type is\ncurrently always `content`."
},
"type": {
"type": "string",
"enum": [
"content"
]
}
}
}
],
"description": "Static predicted output content, such as the content of a text file that is being regenerated."
},
"PredictionContentContent": {
"oneOf": [
{
"type": "string",
"description": "The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes."
},
{
"type": "array",
"items": {
"$ref": "#/components/schemas/ChatCompletionRequestMessageContentPartText"
},
"description": "An array of content parts with a defined type. Supported options differ based on the [model](https://platform.openai.com/docs/models) being used to generate the response. Can contain text inputs."
}
],
"description": "The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly."
},
"Prompt": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
},
{
"type": "array",
"items": {
"type": "integer",
"format": "int32",
"minimum": 0
}
},
{
"type": "array",
"items": {
"type": "array",
"items": {
"type": "integer",
"format": "int32",
"minimum": 0
}
}
}
]
},
"PromptConfig": {
"type": "object",
"description": "Service tier request options.",
"required": [
"id"
],
"properties": {
"id": {
"type": "string",
"description": "The unique identifier of the prompt template to use."
},
"variables": {
"type": [
"object",
"null"
],
"description": "Optional map of values to substitute in for variables in your prompt. The substitution\nvalues can either be strings, or other Response input types like images or files.\nFor now only supporting Strings.",
"additionalProperties": {
"type": "string"
},
"propertyNames": {
"type": "string"
}
},
"version": {
"type": [
"string",
"null"
],
"description": "Optional version of the prompt template."
}
}
},
"ReasoningConfig": {
"type": "object",
"description": "o-series reasoning settings.",
"properties": {
"effort": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ReasoningEffort",
"description": "Constrain effort on reasoning."
}
]
},
"summary": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ReasoningSummary",
"description": "Summary mode for reasoning."
}
]
}
}
},
"ReasoningEffort": {
"type": "string",
"enum": [
"minimal",
"low",
"medium",
"high"
]
},
"ReasoningSummary": {
"type": "string",
"enum": [
"auto",
"concise",
"detailed"
]
},
"ResponseFormat": {
"oneOf": [
{
"type": "object",
"description": "The type of response format being defined: `text`",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
},
{
"type": "object",
"description": "The type of response format being defined: `json_object`",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"json_object"
]
}
}
},
{
"type": "object",
"description": "The type of response format being defined: `json_schema`",
"required": [
"json_schema",
"type"
],
"properties": {
"json_schema": {
"$ref": "#/components/schemas/ResponseFormatJsonSchema"
},
"type": {
"type": "string",
"enum": [
"json_schema"
]
}
}
}
]
},
"ResponseFormatJsonSchema": {
"type": "object",
"required": [
"name"
],
"properties": {
"description": {
"type": [
"string",
"null"
],
"description": "A description of what the response format is for, used by the model to determine how to respond in the format."
},
"name": {
"type": "string",
"description": "The name of the response format. Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length of 64."
},
"schema": {
"description": "The schema for the response format, described as a JSON Schema object."
},
"strict": {
"type": [
"boolean",
"null"
],
"description": "Whether to enable strict schema adherence when generating the output. If set to true, the model will always follow the exact schema defined in the `schema` field. Only a subset of JSON Schema is supported when `strict` is `true`. To learn more, read the [Structured Outputs guide](https://platform.openai.com/docs/guides/structured-outputs)."
}
}
},
"ServiceTier": {
"type": "string",
"description": "Service tier request options.",
"enum": [
"auto",
"default",
"flex"
]
},
"Stop": {
"oneOf": [
{
"type": "string"
},
{
"type": "array",
"items": {
"type": "string"
}
}
]
},
"TextConfig": {
"type": "object",
"description": "Configuration for text response format.",
"required": [
"format"
],
"properties": {
"format": {
"$ref": "#/components/schemas/TextResponseFormat",
"description": "Defines the format: plain text, JSON object, or JSON schema."
}
}
},
"TextResponseFormat": {
"oneOf": [
{
"type": "object",
"description": "The type of response format being defined: `text`",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"text"
]
}
}
},
{
"type": "object",
"description": "The type of response format being defined: `json_object`",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"json_object"
]
}
}
},
{
"allOf": [
{
"$ref": "#/components/schemas/ResponseFormatJsonSchema",
"description": "The type of response format being defined: `json_schema`"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"json_schema"
]
}
}
}
],
"description": "The type of response format being defined: `json_schema`"
}
]
},
"Truncation": {
"type": "string",
"description": "Truncation strategies.",
"enum": [
"auto",
"disabled"
]
},
"VideoUrl": {
"type": "object",
"required": [
"url"
],
"properties": {
"detail": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/ImageDetail",
"description": "Specifies the detail level of the video processing."
}
]
},
"url": {
"type": "string",
"format": "uri",
"description": "Either a URL of the video or the base64 encoded video data."
},
"uuid": {
"type": [
"string",
"null"
],
"format": "uuid",
"description": "Optional unique identifier for the video."
}
}
},
"WebSearchContextSize": {
"type": "string",
"description": "The amount of context window space to use for the search.",
"enum": [
"low",
"medium",
"high"
]
},
"WebSearchLocation": {
"type": "object",
"description": "Approximate location parameters for the search.",
"properties": {
"city": {
"type": [
"string",
"null"
],
"description": "Free text input for the city of the user, e.g. `San Francisco`."
},
"country": {
"type": [
"string",
"null"
],
"description": "The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`."
},
"region": {
"type": [
"string",
"null"
],
"description": "Free text input for the region of the user, e.g. `California`."
},
"timezone": {
"type": [
"string",
"null"
],
"description": "The [IANA timezone](https://timeapi.io/documentation/iana-timezones) of the user, e.g. `America/Los_Angeles`."
}
}
},
"WebSearchOptions": {
"type": "object",
"description": "Options for the web search tool.",
"properties": {
"search_context_size": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/WebSearchContextSize",
"description": "High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default."
}
]
},
"user_location": {
"oneOf": [
{
"type": "null"
},
{
"$ref": "#/components/schemas/WebSearchUserLocation",
"description": "Approximate location parameters for the search."
}
]
}
}
},
"WebSearchUserLocation": {
"type": "object",
"required": [
"type",
"approximate"
],
"properties": {
"approximate": {
"$ref": "#/components/schemas/WebSearchLocation"
},
"type": {
"$ref": "#/components/schemas/WebSearchUserLocationType"
}
}
},
"WebSearchUserLocationType": {
"type": "string",
"enum": [
"approximate"
]
}
}
}
}
\ No newline at end of file
...@@ -59,6 +59,7 @@ secrecy = { version = "0.10.3", features = ["serde"] } ...@@ -59,6 +59,7 @@ secrecy = { version = "0.10.3", features = ["serde"] }
bytes = "1.9.0" bytes = "1.9.0"
eventsource-stream = "0.2.3" eventsource-stream = "0.2.3"
tokio-tungstenite = { version = "0.26.1", optional = true, default-features = false } tokio-tungstenite = { version = "0.26.1", optional = true, default-features = false }
utoipa = { version = "5.3", features = ["url", "uuid"] }
[dev-dependencies] [dev-dependencies]
tokio-test = "0.4.4" tokio-test = "0.4.4"
......
...@@ -13,13 +13,14 @@ use std::{collections::HashMap, pin::Pin}; ...@@ -13,13 +13,14 @@ use std::{collections::HashMap, pin::Pin};
use derive_builder::Builder; use derive_builder::Builder;
use futures::Stream; use futures::Stream;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use url::Url; use url::Url;
use uuid::{Uuid, uuid}; use uuid::{Uuid, uuid};
use crate::error::OpenAIError; use crate::error::OpenAIError;
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum Prompt { pub enum Prompt {
String(String), String(String),
...@@ -29,14 +30,14 @@ pub enum Prompt { ...@@ -29,14 +30,14 @@ pub enum Prompt {
ArrayOfIntegerArray(Vec<Vec<u32>>), ArrayOfIntegerArray(Vec<Vec<u32>>),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum Stop { pub enum Stop {
String(String), // nullable: true String(String), // nullable: true
StringArray(Vec<String>), // minItems: 1; maxItems: 4 StringArray(Vec<String>), // minItems: 1; maxItems: 4
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Logprobs { pub struct Logprobs {
pub tokens: Vec<String>, pub tokens: Vec<String>,
pub token_logprobs: Vec<Option<f32>>, // Option is to account for null value in the list pub token_logprobs: Vec<Option<f32>>, // Option is to account for null value in the list
...@@ -44,7 +45,7 @@ pub struct Logprobs { ...@@ -44,7 +45,7 @@ pub struct Logprobs {
pub text_offset: Vec<u32>, pub text_offset: Vec<u32>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum CompletionFinishReason { pub enum CompletionFinishReason {
Stop, Stop,
...@@ -52,7 +53,7 @@ pub enum CompletionFinishReason { ...@@ -52,7 +53,7 @@ pub enum CompletionFinishReason {
ContentFilter, ContentFilter,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Choice { pub struct Choice {
pub text: String, pub text: String,
pub index: u32, pub index: u32,
...@@ -62,7 +63,7 @@ pub struct Choice { ...@@ -62,7 +63,7 @@ pub struct Choice {
pub finish_reason: Option<CompletionFinishReason>, pub finish_reason: Option<CompletionFinishReason>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub enum ChatCompletionFunctionCall { pub enum ChatCompletionFunctionCall {
/// The model does not call a function, and responds to the end-user. /// The model does not call a function, and responds to the end-user.
#[serde(rename = "none")] #[serde(rename = "none")]
...@@ -79,7 +80,7 @@ pub enum ChatCompletionFunctionCall { ...@@ -79,7 +80,7 @@ pub enum ChatCompletionFunctionCall {
Function { name: String }, Function { name: String },
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, Default, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum Role { pub enum Role {
System, System,
...@@ -91,7 +92,7 @@ pub enum Role { ...@@ -91,7 +92,7 @@ pub enum Role {
} }
/// The name and arguments of a function that should be called, as generated by the model. /// The name and arguments of a function that should be called, as generated by the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct FunctionCall { pub struct FunctionCall {
/// The name of the function to call. /// The name of the function to call.
pub name: String, pub name: String,
...@@ -100,7 +101,7 @@ pub struct FunctionCall { ...@@ -100,7 +101,7 @@ pub struct FunctionCall {
} }
/// Usage statistics for the completion request. /// Usage statistics for the completion request.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
pub struct CompletionUsage { pub struct CompletionUsage {
/// Number of tokens in the prompt. /// Number of tokens in the prompt.
pub prompt_tokens: u32, pub prompt_tokens: u32,
...@@ -117,7 +118,7 @@ pub struct CompletionUsage { ...@@ -117,7 +118,7 @@ pub struct CompletionUsage {
} }
/// Breakdown of tokens used in a completion. /// Breakdown of tokens used in a completion.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
pub struct PromptTokensDetails { pub struct PromptTokensDetails {
/// Audio input tokens present in the prompt. /// Audio input tokens present in the prompt.
pub audio_tokens: Option<u32>, pub audio_tokens: Option<u32>,
...@@ -126,7 +127,7 @@ pub struct PromptTokensDetails { ...@@ -126,7 +127,7 @@ pub struct PromptTokensDetails {
} }
/// Breakdown of tokens used in a completion. /// Breakdown of tokens used in a completion.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq, Default)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq, Default)]
pub struct CompletionTokensDetails { pub struct CompletionTokensDetails {
pub accepted_prediction_tokens: Option<u32>, pub accepted_prediction_tokens: Option<u32>,
/// Audio input tokens generated by the model. /// Audio input tokens generated by the model.
...@@ -141,7 +142,7 @@ pub struct CompletionTokensDetails { ...@@ -141,7 +142,7 @@ pub struct CompletionTokensDetails {
pub rejected_prediction_tokens: Option<u32>, pub rejected_prediction_tokens: Option<u32>,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestDeveloperMessageArgs")] #[builder(name = "ChatCompletionRequestDeveloperMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -156,14 +157,14 @@ pub struct ChatCompletionRequestDeveloperMessage { ...@@ -156,14 +157,14 @@ pub struct ChatCompletionRequestDeveloperMessage {
pub name: Option<String>, pub name: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ChatCompletionRequestDeveloperMessageContent { pub enum ChatCompletionRequestDeveloperMessageContent {
Text(String), Text(String),
Array(Vec<ChatCompletionRequestMessageContentPartText>), Array(Vec<ChatCompletionRequestMessageContentPartText>),
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestSystemMessageArgs")] #[builder(name = "ChatCompletionRequestSystemMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -177,7 +178,7 @@ pub struct ChatCompletionRequestSystemMessage { ...@@ -177,7 +178,7 @@ pub struct ChatCompletionRequestSystemMessage {
pub name: Option<String>, pub name: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestMessageContentPartTextArgs")] #[builder(name = "ChatCompletionRequestMessageContentPartTextArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -187,13 +188,13 @@ pub struct ChatCompletionRequestMessageContentPartText { ...@@ -187,13 +188,13 @@ pub struct ChatCompletionRequestMessageContentPartText {
pub text: String, pub text: String,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
pub struct ChatCompletionRequestMessageContentPartRefusal { pub struct ChatCompletionRequestMessageContentPartRefusal {
/// The refusal message generated by the model. /// The refusal message generated by the model.
pub refusal: String, pub refusal: String,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ImageDetail { pub enum ImageDetail {
#[default] #[default]
...@@ -202,7 +203,7 @@ pub enum ImageDetail { ...@@ -202,7 +203,7 @@ pub enum ImageDetail {
High, High,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "ImageUrlArgs")] #[builder(name = "ImageUrlArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -218,7 +219,7 @@ pub struct ImageUrl { ...@@ -218,7 +219,7 @@ pub struct ImageUrl {
pub uuid: Option<uuid::Uuid>, pub uuid: Option<uuid::Uuid>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "VideoUrlArgs")] #[builder(name = "VideoUrlArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -234,7 +235,7 @@ pub struct VideoUrl { ...@@ -234,7 +235,7 @@ pub struct VideoUrl {
pub uuid: Option<uuid::Uuid>, pub uuid: Option<uuid::Uuid>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestMessageContentPartImageArgs")] #[builder(name = "ChatCompletionRequestMessageContentPartImageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -244,7 +245,7 @@ pub struct ChatCompletionRequestMessageContentPartImage { ...@@ -244,7 +245,7 @@ pub struct ChatCompletionRequestMessageContentPartImage {
pub image_url: ImageUrl, pub image_url: ImageUrl,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestMessageContentPartVideoArgs")] #[builder(name = "ChatCompletionRequestMessageContentPartVideoArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -254,7 +255,7 @@ pub struct ChatCompletionRequestMessageContentPartVideo { ...@@ -254,7 +255,7 @@ pub struct ChatCompletionRequestMessageContentPartVideo {
pub video_url: VideoUrl, pub video_url: VideoUrl,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "AudioUrlArgs")] #[builder(name = "AudioUrlArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -268,7 +269,7 @@ pub struct AudioUrl { ...@@ -268,7 +269,7 @@ pub struct AudioUrl {
pub uuid: Option<uuid::Uuid>, pub uuid: Option<uuid::Uuid>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestMessageContentPartAudioUrlArgs")] #[builder(name = "ChatCompletionRequestMessageContentPartAudioUrlArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option))] #[builder(setter(into, strip_option))]
...@@ -278,7 +279,7 @@ pub struct ChatCompletionRequestMessageContentPartAudioUrl { ...@@ -278,7 +279,7 @@ pub struct ChatCompletionRequestMessageContentPartAudioUrl {
pub audio_url: AudioUrl, pub audio_url: AudioUrl,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum InputAudioFormat { pub enum InputAudioFormat {
Wav, Wav,
...@@ -286,7 +287,7 @@ pub enum InputAudioFormat { ...@@ -286,7 +287,7 @@ pub enum InputAudioFormat {
Mp3, Mp3,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
pub struct InputAudio { pub struct InputAudio {
/// Base64 encoded audio data. /// Base64 encoded audio data.
pub data: String, pub data: String,
...@@ -295,7 +296,7 @@ pub struct InputAudio { ...@@ -295,7 +296,7 @@ pub struct InputAudio {
} }
/// Learn about [audio inputs](https://platform.openai.com/docs/guides/audio). /// Learn about [audio inputs](https://platform.openai.com/docs/guides/audio).
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestMessageContentPartAudioArgs")] #[builder(name = "ChatCompletionRequestMessageContentPartAudioArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -305,7 +306,7 @@ pub struct ChatCompletionRequestMessageContentPartAudio { ...@@ -305,7 +306,7 @@ pub struct ChatCompletionRequestMessageContentPartAudio {
pub input_audio: InputAudio, pub input_audio: InputAudio,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChatCompletionRequestUserMessageContentPart { pub enum ChatCompletionRequestUserMessageContentPart {
...@@ -316,14 +317,14 @@ pub enum ChatCompletionRequestUserMessageContentPart { ...@@ -316,14 +317,14 @@ pub enum ChatCompletionRequestUserMessageContentPart {
InputAudio(ChatCompletionRequestMessageContentPartAudio), InputAudio(ChatCompletionRequestMessageContentPartAudio),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChatCompletionRequestSystemMessageContentPart { pub enum ChatCompletionRequestSystemMessageContentPart {
Text(ChatCompletionRequestMessageContentPartText), Text(ChatCompletionRequestMessageContentPartText),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChatCompletionRequestAssistantMessageContentPart { pub enum ChatCompletionRequestAssistantMessageContentPart {
...@@ -331,14 +332,14 @@ pub enum ChatCompletionRequestAssistantMessageContentPart { ...@@ -331,14 +332,14 @@ pub enum ChatCompletionRequestAssistantMessageContentPart {
Refusal(ChatCompletionRequestMessageContentPartRefusal), Refusal(ChatCompletionRequestMessageContentPartRefusal),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ChatCompletionRequestToolMessageContentPart { pub enum ChatCompletionRequestToolMessageContentPart {
Text(ChatCompletionRequestMessageContentPartText), Text(ChatCompletionRequestMessageContentPartText),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ChatCompletionRequestSystemMessageContent { pub enum ChatCompletionRequestSystemMessageContent {
/// The text contents of the system message. /// The text contents of the system message.
...@@ -347,7 +348,7 @@ pub enum ChatCompletionRequestSystemMessageContent { ...@@ -347,7 +348,7 @@ pub enum ChatCompletionRequestSystemMessageContent {
Array(Vec<ChatCompletionRequestSystemMessageContentPart>), Array(Vec<ChatCompletionRequestSystemMessageContentPart>),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ChatCompletionRequestUserMessageContent { pub enum ChatCompletionRequestUserMessageContent {
/// The text contents of the message. /// The text contents of the message.
...@@ -356,7 +357,7 @@ pub enum ChatCompletionRequestUserMessageContent { ...@@ -356,7 +357,7 @@ pub enum ChatCompletionRequestUserMessageContent {
Array(Vec<ChatCompletionRequestUserMessageContentPart>), Array(Vec<ChatCompletionRequestUserMessageContentPart>),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ChatCompletionRequestAssistantMessageContent { pub enum ChatCompletionRequestAssistantMessageContent {
/// The text contents of the message. /// The text contents of the message.
...@@ -365,7 +366,7 @@ pub enum ChatCompletionRequestAssistantMessageContent { ...@@ -365,7 +366,7 @@ pub enum ChatCompletionRequestAssistantMessageContent {
Array(Vec<ChatCompletionRequestAssistantMessageContentPart>), Array(Vec<ChatCompletionRequestAssistantMessageContentPart>),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ChatCompletionRequestToolMessageContent { pub enum ChatCompletionRequestToolMessageContent {
/// The text contents of the tool message. /// The text contents of the tool message.
...@@ -374,7 +375,7 @@ pub enum ChatCompletionRequestToolMessageContent { ...@@ -374,7 +375,7 @@ pub enum ChatCompletionRequestToolMessageContent {
Array(Vec<ChatCompletionRequestToolMessageContentPart>), Array(Vec<ChatCompletionRequestToolMessageContentPart>),
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestUserMessageArgs")] #[builder(name = "ChatCompletionRequestUserMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -388,13 +389,13 @@ pub struct ChatCompletionRequestUserMessage { ...@@ -388,13 +389,13 @@ pub struct ChatCompletionRequestUserMessage {
pub name: Option<String>, pub name: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
pub struct ChatCompletionRequestAssistantMessageAudio { pub struct ChatCompletionRequestAssistantMessageAudio {
/// Unique identifier for a previous audio response from the model. /// Unique identifier for a previous audio response from the model.
pub id: String, pub id: String,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestAssistantMessageArgs")] #[builder(name = "ChatCompletionRequestAssistantMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -423,7 +424,7 @@ pub struct ChatCompletionRequestAssistantMessage { ...@@ -423,7 +424,7 @@ pub struct ChatCompletionRequestAssistantMessage {
} }
/// Tool message /// Tool message
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestToolMessageArgs")] #[builder(name = "ChatCompletionRequestToolMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -435,7 +436,7 @@ pub struct ChatCompletionRequestToolMessage { ...@@ -435,7 +436,7 @@ pub struct ChatCompletionRequestToolMessage {
pub tool_call_id: String, pub tool_call_id: String,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, Builder, PartialEq)]
#[builder(name = "ChatCompletionRequestFunctionMessageArgs")] #[builder(name = "ChatCompletionRequestFunctionMessageArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -448,7 +449,7 @@ pub struct ChatCompletionRequestFunctionMessage { ...@@ -448,7 +449,7 @@ pub struct ChatCompletionRequestFunctionMessage {
pub name: String, pub name: String,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "role")] #[serde(tag = "role")]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionRequestMessage { pub enum ChatCompletionRequestMessage {
...@@ -460,7 +461,7 @@ pub enum ChatCompletionRequestMessage { ...@@ -460,7 +461,7 @@ pub enum ChatCompletionRequestMessage {
Function(ChatCompletionRequestFunctionMessage), Function(ChatCompletionRequestFunctionMessage),
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionMessageToolCall { pub struct ChatCompletionMessageToolCall {
/// The ID of the tool call. /// The ID of the tool call.
pub id: String, pub id: String,
...@@ -470,7 +471,7 @@ pub struct ChatCompletionMessageToolCall { ...@@ -470,7 +471,7 @@ pub struct ChatCompletionMessageToolCall {
pub function: FunctionCall, pub function: FunctionCall,
} }
#[derive(Debug, Serialize, Deserialize, Default, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Default, Clone, PartialEq)]
pub struct ChatCompletionResponseMessageAudio { pub struct ChatCompletionResponseMessageAudio {
/// Unique identifier for this audio response. /// Unique identifier for this audio response.
pub id: String, pub id: String,
...@@ -483,7 +484,7 @@ pub struct ChatCompletionResponseMessageAudio { ...@@ -483,7 +484,7 @@ pub struct ChatCompletionResponseMessageAudio {
} }
/// A chat completion message generated by the model. /// A chat completion message generated by the model.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionResponseMessage { pub struct ChatCompletionResponseMessage {
/// The contents of the message. /// The contents of the message.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -512,7 +513,7 @@ pub struct ChatCompletionResponseMessage { ...@@ -512,7 +513,7 @@ pub struct ChatCompletionResponseMessage {
pub reasoning_content: Option<String>, pub reasoning_content: Option<String>,
} }
#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
#[builder(name = "ChatCompletionFunctionsArgs")] #[builder(name = "ChatCompletionFunctionsArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -531,7 +532,7 @@ pub struct ChatCompletionFunctions { ...@@ -531,7 +532,7 @@ pub struct ChatCompletionFunctions {
pub parameters: serde_json::Value, pub parameters: serde_json::Value,
} }
#[derive(Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, Builder, PartialEq)]
#[builder(name = "FunctionObjectArgs")] #[builder(name = "FunctionObjectArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -554,7 +555,7 @@ pub struct FunctionObject { ...@@ -554,7 +555,7 @@ pub struct FunctionObject {
pub strict: Option<bool>, pub strict: Option<bool>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum ResponseFormat { pub enum ResponseFormat {
/// The type of response format being defined: `text` /// The type of response format being defined: `text`
...@@ -567,7 +568,7 @@ pub enum ResponseFormat { ...@@ -567,7 +568,7 @@ pub enum ResponseFormat {
}, },
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ResponseFormatJsonSchema { pub struct ResponseFormatJsonSchema {
/// A description of what the response format is for, used by the model to determine how to respond in the format. /// A description of what the response format is for, used by the model to determine how to respond in the format.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -582,14 +583,14 @@ pub struct ResponseFormatJsonSchema { ...@@ -582,14 +583,14 @@ pub struct ResponseFormatJsonSchema {
pub strict: Option<bool>, pub strict: Option<bool>,
} }
#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionToolType { pub enum ChatCompletionToolType {
#[default] #[default]
Function, Function,
} }
#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
#[builder(name = "ChatCompletionToolArgs")] #[builder(name = "ChatCompletionToolArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -601,14 +602,14 @@ pub struct ChatCompletionTool { ...@@ -601,14 +602,14 @@ pub struct ChatCompletionTool {
pub function: FunctionObject, pub function: FunctionObject,
} }
#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
pub struct FunctionName { pub struct FunctionName {
/// The name of the function to call. /// The name of the function to call.
pub name: String, pub name: String,
} }
/// Specifies a tool the model should use. Use to force the model to call a specific function. /// Specifies a tool the model should use. Use to force the model to call a specific function.
#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
pub struct ChatCompletionNamedToolChoice { pub struct ChatCompletionNamedToolChoice {
/// The type of the tool. Currently, only `function` is supported. /// The type of the tool. Currently, only `function` is supported.
pub r#type: ChatCompletionToolType, pub r#type: ChatCompletionToolType,
...@@ -623,7 +624,7 @@ pub struct ChatCompletionNamedToolChoice { ...@@ -623,7 +624,7 @@ pub struct ChatCompletionNamedToolChoice {
/// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool. /// Specifying a particular tool via `{"type": "function", "function": {"name": "my_function"}}` forces the model to call that tool.
/// ///
/// `none` is the default when no tools are present. `auto` is the default if tools are present. /// `none` is the default when no tools are present. `auto` is the default if tools are present.
#[derive(Clone, Serialize, Default, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionToolChoiceOption { pub enum ChatCompletionToolChoiceOption {
#[default] #[default]
...@@ -634,7 +635,7 @@ pub enum ChatCompletionToolChoiceOption { ...@@ -634,7 +635,7 @@ pub enum ChatCompletionToolChoiceOption {
Named(ChatCompletionNamedToolChoice), Named(ChatCompletionNamedToolChoice),
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq, Default)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq, Default)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
/// The amount of context window space to use for the search. /// The amount of context window space to use for the search.
pub enum WebSearchContextSize { pub enum WebSearchContextSize {
...@@ -644,14 +645,14 @@ pub enum WebSearchContextSize { ...@@ -644,14 +645,14 @@ pub enum WebSearchContextSize {
High, High,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum WebSearchUserLocationType { pub enum WebSearchUserLocationType {
Approximate, Approximate,
} }
/// Approximate location parameters for the search. /// Approximate location parameters for the search.
#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
pub struct WebSearchLocation { pub struct WebSearchLocation {
/// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`. /// The two-letter [ISO country code](https://en.wikipedia.org/wiki/ISO_3166-1) of the user, e.g. `US`.
pub country: Option<String>, pub country: Option<String>,
...@@ -663,7 +664,7 @@ pub struct WebSearchLocation { ...@@ -663,7 +664,7 @@ pub struct WebSearchLocation {
pub timezone: Option<String>, pub timezone: Option<String>,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
pub struct WebSearchUserLocation { pub struct WebSearchUserLocation {
// The type of location approximation. Always `approximate`. // The type of location approximation. Always `approximate`.
pub r#type: WebSearchUserLocationType, pub r#type: WebSearchUserLocationType,
...@@ -672,7 +673,7 @@ pub struct WebSearchUserLocation { ...@@ -672,7 +673,7 @@ pub struct WebSearchUserLocation {
} }
/// Options for the web search tool. /// Options for the web search tool.
#[derive(Clone, Serialize, Debug, Default, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Default, Deserialize, PartialEq)]
pub struct WebSearchOptions { pub struct WebSearchOptions {
/// High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default. /// High level guidance for the amount of context window space to use for the search. One of `low`, `medium`, or `high`. `medium` is the default.
pub search_context_size: Option<WebSearchContextSize>, pub search_context_size: Option<WebSearchContextSize>,
...@@ -681,7 +682,7 @@ pub struct WebSearchOptions { ...@@ -681,7 +682,7 @@ pub struct WebSearchOptions {
pub user_location: Option<WebSearchUserLocation>, pub user_location: Option<WebSearchUserLocation>,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ServiceTier { pub enum ServiceTier {
Auto, Auto,
...@@ -691,7 +692,7 @@ pub enum ServiceTier { ...@@ -691,7 +692,7 @@ pub enum ServiceTier {
Priority, Priority,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ServiceTierResponse { pub enum ServiceTierResponse {
Scale, Scale,
...@@ -700,7 +701,7 @@ pub enum ServiceTierResponse { ...@@ -700,7 +701,7 @@ pub enum ServiceTierResponse {
Priority, Priority,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ReasoningEffort { pub enum ReasoningEffort {
Minimal, Minimal,
...@@ -715,7 +716,7 @@ pub enum ReasoningEffort { ...@@ -715,7 +716,7 @@ pub enum ReasoningEffort {
/// ///
/// The `gpt-4o-audio-preview` model can also be used to [generate /// The `gpt-4o-audio-preview` model can also be used to [generate
/// audio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `["text", "audio"]` /// audio](https://platform.openai.com/docs/guides/audio). To request that this model generate both text and audio responses, you can use: `["text", "audio"]`
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionModalities { pub enum ChatCompletionModalities {
Text, Text,
...@@ -723,7 +724,7 @@ pub enum ChatCompletionModalities { ...@@ -723,7 +724,7 @@ pub enum ChatCompletionModalities {
} }
/// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly. /// The content that should be matched when generating a model response. If generated tokens would match this content, the entire model response can be returned much more quickly.
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum PredictionContentContent { pub enum PredictionContentContent {
/// The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes. /// The content used for a Predicted Output. This is often the text of a file you are regenerating with minor changes.
...@@ -733,7 +734,7 @@ pub enum PredictionContentContent { ...@@ -733,7 +734,7 @@ pub enum PredictionContentContent {
} }
/// Static predicted output content, such as the content of a text file that is being regenerated. /// Static predicted output content, such as the content of a text file that is being regenerated.
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(tag = "type", rename_all = "lowercase", content = "content")] #[serde(tag = "type", rename_all = "lowercase", content = "content")]
pub enum PredictionContent { pub enum PredictionContent {
/// The type of the predicted content you want to provide. This type is /// The type of the predicted content you want to provide. This type is
...@@ -741,7 +742,7 @@ pub enum PredictionContent { ...@@ -741,7 +742,7 @@ pub enum PredictionContent {
Content(PredictionContentContent), Content(PredictionContentContent),
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionAudioVoice { pub enum ChatCompletionAudioVoice {
Alloy, Alloy,
...@@ -754,7 +755,7 @@ pub enum ChatCompletionAudioVoice { ...@@ -754,7 +755,7 @@ pub enum ChatCompletionAudioVoice {
Verse, Verse,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ChatCompletionAudioFormat { pub enum ChatCompletionAudioFormat {
Wav, Wav,
...@@ -764,7 +765,7 @@ pub enum ChatCompletionAudioFormat { ...@@ -764,7 +765,7 @@ pub enum ChatCompletionAudioFormat {
Pcm16, Pcm16,
} }
#[derive(Clone, Serialize, Debug, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Debug, Deserialize, PartialEq)]
pub struct ChatCompletionAudio { pub struct ChatCompletionAudio {
/// The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive). /// The voice the model uses to respond. Supported voices are `ash`, `ballad`, `coral`, `sage`, and `verse` (also supported but not recommended are `alloy`, `echo`, and `shimmer`; these voices are less expressive).
pub voice: ChatCompletionAudioVoice, pub voice: ChatCompletionAudioVoice,
...@@ -772,7 +773,7 @@ pub struct ChatCompletionAudio { ...@@ -772,7 +773,7 @@ pub struct ChatCompletionAudio {
pub format: ChatCompletionAudioFormat, pub format: ChatCompletionAudioFormat,
} }
#[derive(Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)] #[derive(ToSchema, Clone, Serialize, Default, Debug, Builder, Deserialize, PartialEq)]
#[builder(name = "CreateChatCompletionRequestArgs")] #[builder(name = "CreateChatCompletionRequestArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -961,13 +962,13 @@ pub struct CreateChatCompletionRequest { ...@@ -961,13 +962,13 @@ pub struct CreateChatCompletionRequest {
} }
/// Options for streaming response. Only set this when you set `stream: true`. /// Options for streaming response. Only set this when you set `stream: true`.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
pub struct ChatCompletionStreamOptions { pub struct ChatCompletionStreamOptions {
/// If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value. /// If set, an additional chunk will be streamed before the `data: [DONE]` message. The `usage` field on this chunk shows the token usage statistics for the entire request, and the `choices` field will always be an empty array. All other chunks will also include a `usage` field, but with a null value.
pub include_usage: bool, pub include_usage: bool,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum FinishReason { pub enum FinishReason {
Stop, Stop,
...@@ -977,7 +978,7 @@ pub enum FinishReason { ...@@ -977,7 +978,7 @@ pub enum FinishReason {
FunctionCall, FunctionCall,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct TopLogprobs { pub struct TopLogprobs {
/// The token. /// The token.
pub token: String, pub token: String,
...@@ -987,7 +988,7 @@ pub struct TopLogprobs { ...@@ -987,7 +988,7 @@ pub struct TopLogprobs {
pub bytes: Option<Vec<u8>>, pub bytes: Option<Vec<u8>>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionTokenLogprob { pub struct ChatCompletionTokenLogprob {
/// The token. /// The token.
pub token: String, pub token: String,
...@@ -999,21 +1000,21 @@ pub struct ChatCompletionTokenLogprob { ...@@ -999,21 +1000,21 @@ pub struct ChatCompletionTokenLogprob {
pub top_logprobs: Vec<TopLogprobs>, pub top_logprobs: Vec<TopLogprobs>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoiceLogprobs { pub struct ChatChoiceLogprobs {
/// A list of message content tokens with log probability information. /// A list of message content tokens with log probability information.
pub content: Option<Vec<ChatCompletionTokenLogprob>>, pub content: Option<Vec<ChatCompletionTokenLogprob>>,
pub refusal: Option<Vec<ChatCompletionTokenLogprob>>, pub refusal: Option<Vec<ChatCompletionTokenLogprob>>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum StopReason { pub enum StopReason {
String(String), // matched user-provided stop sequence String(String), // matched user-provided stop sequence
Int(i64), // matched stop token id (requires stop_token_id support) Int(i64), // matched stop token id (requires stop_token_id support)
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoice { pub struct ChatChoice {
/// The index of the choice in the list of choices. /// The index of the choice in the list of choices.
pub index: u32, pub index: u32,
...@@ -1034,7 +1035,7 @@ pub struct ChatChoice { ...@@ -1034,7 +1035,7 @@ pub struct ChatChoice {
} }
/// Represents a chat completion response returned by model, based on the provided input. /// Represents a chat completion response returned by model, based on the provided input.
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] #[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct CreateChatCompletionResponse { pub struct CreateChatCompletionResponse {
/// A unique identifier for the chat completion. /// A unique identifier for the chat completion.
pub id: String, pub id: String,
...@@ -1066,7 +1067,7 @@ pub struct CreateChatCompletionResponse { ...@@ -1066,7 +1067,7 @@ pub struct CreateChatCompletionResponse {
pub type ChatCompletionResponseStream = pub type ChatCompletionResponseStream =
Pin<Box<dyn Stream<Item = Result<CreateChatCompletionStreamResponse, OpenAIError>> + Send>>; Pin<Box<dyn Stream<Item = Result<CreateChatCompletionStreamResponse, OpenAIError>> + Send>>;
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct FunctionCallStream { pub struct FunctionCallStream {
/// The name of the function to call. /// The name of the function to call.
pub name: Option<String>, pub name: Option<String>,
...@@ -1077,7 +1078,7 @@ pub struct FunctionCallStream { ...@@ -1077,7 +1078,7 @@ pub struct FunctionCallStream {
pub arguments: Option<String>, pub arguments: Option<String>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionMessageToolCallChunk { pub struct ChatCompletionMessageToolCallChunk {
pub index: u32, pub index: u32,
/// The ID of the tool call. /// The ID of the tool call.
...@@ -1088,7 +1089,7 @@ pub struct ChatCompletionMessageToolCallChunk { ...@@ -1088,7 +1089,7 @@ pub struct ChatCompletionMessageToolCallChunk {
} }
/// A chat completion delta generated by streamed model responses. /// A chat completion delta generated by streamed model responses.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatCompletionStreamResponseDelta { pub struct ChatCompletionStreamResponseDelta {
/// The contents of the chunk message. /// The contents of the chunk message.
pub content: Option<String>, pub content: Option<String>,
...@@ -1106,7 +1107,7 @@ pub struct ChatCompletionStreamResponseDelta { ...@@ -1106,7 +1107,7 @@ pub struct ChatCompletionStreamResponseDelta {
pub reasoning_content: Option<String>, pub reasoning_content: Option<String>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct ChatChoiceStream { pub struct ChatChoiceStream {
/// The index of the choice in the list of choices. /// The index of the choice in the list of choices.
pub index: u32, pub index: u32,
...@@ -1132,7 +1133,7 @@ pub struct ChatChoiceStream { ...@@ -1132,7 +1133,7 @@ pub struct ChatChoiceStream {
pub logprobs: Option<ChatChoiceLogprobs>, pub logprobs: Option<ChatChoiceLogprobs>,
} }
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] #[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
/// Represents a streamed chunk of a chat completion response returned by model, based on the provided input. /// Represents a streamed chunk of a chat completion response returned by model, based on the provided input.
pub struct CreateChatCompletionStreamResponse { pub struct CreateChatCompletionStreamResponse {
/// A unique identifier for the chat completion. Each chunk has the same ID. /// A unique identifier for the chat completion. Each chunk has the same ID.
......
...@@ -13,6 +13,7 @@ use std::{collections::HashMap, pin::Pin}; ...@@ -13,6 +13,7 @@ use std::{collections::HashMap, pin::Pin};
use derive_builder::Builder; use derive_builder::Builder;
use futures::Stream; use futures::Stream;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::error::OpenAIError; use crate::error::OpenAIError;
...@@ -89,7 +90,7 @@ where ...@@ -89,7 +90,7 @@ where
deserializer.deserialize_option(StrictBoolVisitor) deserializer.deserialize_option(StrictBoolVisitor)
} }
#[derive(Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)] #[derive(ToSchema, Clone, Serialize, Deserialize, Default, Debug, Builder, PartialEq)]
#[builder(name = "CreateCompletionRequestArgs")] #[builder(name = "CreateCompletionRequestArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -197,7 +198,7 @@ pub struct CreateCompletionRequest { ...@@ -197,7 +198,7 @@ pub struct CreateCompletionRequest {
pub seed: Option<i64>, pub seed: Option<i64>,
} }
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] #[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct CreateCompletionResponse { pub struct CreateCompletionResponse {
/// A unique identifier for the completion. /// A unique identifier for the completion.
pub id: String, pub id: String,
......
...@@ -11,10 +11,11 @@ ...@@ -11,10 +11,11 @@
use base64::engine::{Engine, general_purpose}; use base64::engine::{Engine, general_purpose};
use derive_builder::Builder; use derive_builder::Builder;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::error::OpenAIError; use crate::error::OpenAIError;
#[derive(Debug, Serialize, Clone, PartialEq, Deserialize)] #[derive(ToSchema, Debug, Serialize, Clone, PartialEq, Deserialize)]
#[serde(untagged)] #[serde(untagged)]
pub enum EmbeddingInput { pub enum EmbeddingInput {
String(String), String(String),
...@@ -24,7 +25,7 @@ pub enum EmbeddingInput { ...@@ -24,7 +25,7 @@ pub enum EmbeddingInput {
ArrayOfIntegerArray(Vec<Vec<u32>>), ArrayOfIntegerArray(Vec<Vec<u32>>),
} }
#[derive(Debug, Serialize, Default, Clone, PartialEq, Deserialize)] #[derive(ToSchema, Debug, Serialize, Default, Clone, PartialEq, Deserialize)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum EncodingFormat { pub enum EncodingFormat {
#[default] #[default]
...@@ -32,7 +33,7 @@ pub enum EncodingFormat { ...@@ -32,7 +33,7 @@ pub enum EncodingFormat {
Base64, Base64,
} }
#[derive(Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)] #[derive(ToSchema, Debug, Serialize, Default, Clone, Builder, PartialEq, Deserialize)]
#[builder(name = "CreateEmbeddingRequestArgs")] #[builder(name = "CreateEmbeddingRequestArgs")]
#[builder(pattern = "mutable")] #[builder(pattern = "mutable")]
#[builder(setter(into, strip_option), default)] #[builder(setter(into, strip_option), default)]
...@@ -64,7 +65,7 @@ pub struct CreateEmbeddingRequest { ...@@ -64,7 +65,7 @@ pub struct CreateEmbeddingRequest {
} }
/// Represents an embedding vector returned by embedding endpoint. /// Represents an embedding vector returned by embedding endpoint.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Embedding { pub struct Embedding {
/// The index of the embedding in the list of embeddings. /// The index of the embedding in the list of embeddings.
pub index: u32, pub index: u32,
...@@ -75,7 +76,7 @@ pub struct Embedding { ...@@ -75,7 +76,7 @@ pub struct Embedding {
pub embedding: Vec<f32>, pub embedding: Vec<f32>,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Base64EmbeddingVector(pub String); pub struct Base64EmbeddingVector(pub String);
impl From<Base64EmbeddingVector> for Vec<f32> { impl From<Base64EmbeddingVector> for Vec<f32> {
...@@ -91,7 +92,7 @@ impl From<Base64EmbeddingVector> for Vec<f32> { ...@@ -91,7 +92,7 @@ impl From<Base64EmbeddingVector> for Vec<f32> {
} }
/// Represents an base64-encoded embedding vector returned by embedding endpoint. /// Represents an base64-encoded embedding vector returned by embedding endpoint.
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct Base64Embedding { pub struct Base64Embedding {
/// The index of the embedding in the list of embeddings. /// The index of the embedding in the list of embeddings.
pub index: u32, pub index: u32,
...@@ -101,7 +102,7 @@ pub struct Base64Embedding { ...@@ -101,7 +102,7 @@ pub struct Base64Embedding {
pub embedding: Base64EmbeddingVector, pub embedding: Base64EmbeddingVector,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
pub struct EmbeddingUsage { pub struct EmbeddingUsage {
/// The number of tokens used by the prompt. /// The number of tokens used by the prompt.
pub prompt_tokens: u32, pub prompt_tokens: u32,
...@@ -109,7 +110,7 @@ pub struct EmbeddingUsage { ...@@ -109,7 +110,7 @@ pub struct EmbeddingUsage {
pub total_tokens: u32, pub total_tokens: u32,
} }
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] #[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct CreateEmbeddingResponse { pub struct CreateEmbeddingResponse {
pub object: String, pub object: String,
/// The name of the model used to generate the embedding. /// The name of the model used to generate the embedding.
...@@ -120,7 +121,7 @@ pub struct CreateEmbeddingResponse { ...@@ -120,7 +121,7 @@ pub struct CreateEmbeddingResponse {
pub usage: EmbeddingUsage, pub usage: EmbeddingUsage,
} }
#[derive(Debug, Deserialize, Clone, PartialEq, Serialize)] #[derive(ToSchema, Debug, Deserialize, Clone, PartialEq, Serialize)]
pub struct CreateBase64EmbeddingResponse { pub struct CreateBase64EmbeddingResponse {
pub object: String, pub object: String,
/// The name of the model used to generate the embedding. /// The name of the model used to generate the embedding.
......
...@@ -19,9 +19,10 @@ use serde::{Deserialize, Serialize}; ...@@ -19,9 +19,10 @@ use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::HashMap; use std::collections::HashMap;
use std::pin::Pin; use std::pin::Pin;
use utoipa::ToSchema;
/// Role of messages in the API. /// Role of messages in the API.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum Role { pub enum Role {
User, User,
...@@ -31,7 +32,7 @@ pub enum Role { ...@@ -31,7 +32,7 @@ pub enum Role {
} }
/// Status of input/output items. /// Status of input/output items.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum OutputStatus { pub enum OutputStatus {
InProgress, InProgress,
...@@ -40,7 +41,7 @@ pub enum OutputStatus { ...@@ -40,7 +41,7 @@ pub enum OutputStatus {
} }
/// Input payload: raw text or structured context items. /// Input payload: raw text or structured context items.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum Input { pub enum Input {
/// A text input to the model, equivalent to a text input with the user role. /// A text input to the model, equivalent to a text input with the user role.
...@@ -50,7 +51,7 @@ pub enum Input { ...@@ -50,7 +51,7 @@ pub enum Input {
} }
/// A context item: currently only messages. /// A context item: currently only messages.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged, rename_all = "snake_case")] #[serde(untagged, rename_all = "snake_case")]
pub enum InputItem { pub enum InputItem {
Message(InputMessage), Message(InputMessage),
...@@ -58,7 +59,7 @@ pub enum InputItem { ...@@ -58,7 +59,7 @@ pub enum InputItem {
} }
/// A message to prime the model. /// A message to prime the model.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "InputMessageArgs", name = "InputMessageArgs",
pattern = "mutable", pattern = "mutable",
...@@ -76,14 +77,14 @@ pub struct InputMessage { ...@@ -76,14 +77,14 @@ pub struct InputMessage {
pub content: InputContent, pub content: InputContent,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum InputMessageType { pub enum InputMessageType {
#[default] #[default]
Message, Message,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum InputContent { pub enum InputContent {
/// A text input to the model. /// A text input to the model.
...@@ -93,7 +94,7 @@ pub enum InputContent { ...@@ -93,7 +94,7 @@ pub enum InputContent {
} }
/// Parts of a message: text, image, video, file, or audio. /// Parts of a message: text, image, video, file, or audio.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum ContentType { pub enum ContentType {
/// A text input to the model. /// A text input to the model.
...@@ -108,12 +109,12 @@ pub enum ContentType { ...@@ -108,12 +109,12 @@ pub enum ContentType {
InputFile(InputFile), InputFile(InputFile),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputText { pub struct InputText {
text: String, text: String,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "InputImageArgs", name = "InputImageArgs",
pattern = "mutable", pattern = "mutable",
...@@ -133,7 +134,7 @@ pub struct InputImage { ...@@ -133,7 +134,7 @@ pub struct InputImage {
image_url: Option<String>, image_url: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "InputVideoArgs", name = "InputVideoArgs",
pattern = "mutable", pattern = "mutable",
...@@ -153,7 +154,7 @@ pub struct InputVideo { ...@@ -153,7 +154,7 @@ pub struct InputVideo {
video_url: Option<String>, video_url: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "InputAudioArgs", name = "InputAudioArgs",
pattern = "mutable", pattern = "mutable",
...@@ -171,7 +172,7 @@ pub struct InputAudio { ...@@ -171,7 +172,7 @@ pub struct InputAudio {
audio_url: Option<String>, audio_url: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "InputFileArgs", name = "InputFileArgs",
pattern = "mutable", pattern = "mutable",
...@@ -192,7 +193,7 @@ pub struct InputFile { ...@@ -192,7 +193,7 @@ pub struct InputFile {
} }
/// Builder for a Responses API request. /// Builder for a Responses API request.
#[derive(Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)] #[derive(ToSchema, Clone, Serialize, Deserialize, Debug, Default, Builder, PartialEq)]
#[builder( #[builder(
name = "CreateResponseArgs", name = "CreateResponseArgs",
pattern = "mutable", pattern = "mutable",
...@@ -202,6 +203,8 @@ pub struct InputFile { ...@@ -202,6 +203,8 @@ pub struct InputFile {
#[builder(build_fn(error = "OpenAIError"))] #[builder(build_fn(error = "OpenAIError"))]
pub struct CreateResponse { pub struct CreateResponse {
/// Text, image, or file inputs to the model, used to generate a response. /// Text, image, or file inputs to the model, used to generate a response.
/// Using value_type to prevent deep schema recursion from Input's nested content types.
#[schema(value_type = Object)]
pub input: Input, pub input: Input,
/// Model ID used to generate the response, like `gpt-4o`. /// Model ID used to generate the response, like `gpt-4o`.
...@@ -319,12 +322,14 @@ pub struct CreateResponse { ...@@ -319,12 +322,14 @@ pub struct CreateResponse {
/// How the model should select which tool (or tools) to use when generating /// How the model should select which tool (or tools) to use when generating
/// a response. /// a response.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
#[schema(value_type = Object)]
pub tool_choice: Option<ToolChoice>, pub tool_choice: Option<ToolChoice>,
/// An array of tools the model may call while generating a response. /// An array of tools the model may call while generating a response.
/// Can include built-in tools (file_search, web_search_preview, /// Can include built-in tools (file_search, web_search_preview,
/// computer_use_preview) or custom function definitions. /// computer_use_preview) or custom function definitions.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
#[schema(value_type = Vec<Object>)]
pub tools: Option<Vec<ToolDefinition>>, pub tools: Option<Vec<ToolDefinition>>,
/// An integer between 0 and 20 specifying the number of most likely tokens to return /// An integer between 0 and 20 specifying the number of most likely tokens to return
...@@ -353,7 +358,7 @@ pub struct CreateResponse { ...@@ -353,7 +358,7 @@ pub struct CreateResponse {
} }
/// Service tier request options. /// Service tier request options.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct PromptConfig { pub struct PromptConfig {
/// The unique identifier of the prompt template to use. /// The unique identifier of the prompt template to use.
pub id: String, pub id: String,
...@@ -370,7 +375,7 @@ pub struct PromptConfig { ...@@ -370,7 +375,7 @@ pub struct PromptConfig {
} }
/// Service tier request options. /// Service tier request options.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ServiceTier { pub enum ServiceTier {
Auto, Auto,
...@@ -379,7 +384,7 @@ pub enum ServiceTier { ...@@ -379,7 +384,7 @@ pub enum ServiceTier {
} }
/// Truncation strategies. /// Truncation strategies.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum Truncation { pub enum Truncation {
Auto, Auto,
...@@ -387,7 +392,7 @@ pub enum Truncation { ...@@ -387,7 +392,7 @@ pub enum Truncation {
} }
/// o-series reasoning settings. /// o-series reasoning settings.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "ReasoningConfigArgs", name = "ReasoningConfigArgs",
pattern = "mutable", pattern = "mutable",
...@@ -404,7 +409,7 @@ pub struct ReasoningConfig { ...@@ -404,7 +409,7 @@ pub struct ReasoningConfig {
pub summary: Option<ReasoningSummary>, pub summary: Option<ReasoningSummary>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ReasoningSummary { pub enum ReasoningSummary {
Auto, Auto,
...@@ -413,13 +418,13 @@ pub enum ReasoningSummary { ...@@ -413,13 +418,13 @@ pub enum ReasoningSummary {
} }
/// Configuration for text response format. /// Configuration for text response format.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct TextConfig { pub struct TextConfig {
/// Defines the format: plain text, JSON object, or JSON schema. /// Defines the format: plain text, JSON object, or JSON schema.
pub format: TextResponseFormat, pub format: TextResponseFormat,
} }
#[derive(Debug, Deserialize, Serialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Deserialize, Serialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum TextResponseFormat { pub enum TextResponseFormat {
/// The type of response format being defined: `text` /// The type of response format being defined: `text`
...@@ -431,7 +436,7 @@ pub enum TextResponseFormat { ...@@ -431,7 +436,7 @@ pub enum TextResponseFormat {
} }
/// Definitions for model-callable tools. /// Definitions for model-callable tools.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum ToolDefinition { pub enum ToolDefinition {
/// File search tool. /// File search tool.
...@@ -452,7 +457,7 @@ pub enum ToolDefinition { ...@@ -452,7 +457,7 @@ pub enum ToolDefinition {
LocalShell, LocalShell,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "FileSearchArgs", name = "FileSearchArgs",
pattern = "mutable", pattern = "mutable",
...@@ -474,7 +479,7 @@ pub struct FileSearch { ...@@ -474,7 +479,7 @@ pub struct FileSearch {
pub ranking_options: Option<RankingOptions>, pub ranking_options: Option<RankingOptions>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "FunctionArgs", name = "FunctionArgs",
pattern = "mutable", pattern = "mutable",
...@@ -494,7 +499,7 @@ pub struct Function { ...@@ -494,7 +499,7 @@ pub struct Function {
pub description: Option<String>, pub description: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "WebSearchPreviewArgs", name = "WebSearchPreviewArgs",
pattern = "mutable", pattern = "mutable",
...@@ -510,7 +515,7 @@ pub struct WebSearchPreview { ...@@ -510,7 +515,7 @@ pub struct WebSearchPreview {
pub search_context_size: Option<WebSearchContextSize>, pub search_context_size: Option<WebSearchContextSize>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum WebSearchContextSize { pub enum WebSearchContextSize {
Low, Low,
...@@ -518,7 +523,7 @@ pub enum WebSearchContextSize { ...@@ -518,7 +523,7 @@ pub enum WebSearchContextSize {
High, High,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "ComputerUsePreviewArgs", name = "ComputerUsePreviewArgs",
pattern = "mutable", pattern = "mutable",
...@@ -535,7 +540,7 @@ pub struct ComputerUsePreview { ...@@ -535,7 +540,7 @@ pub struct ComputerUsePreview {
} }
/// Options for search result ranking. /// Options for search result ranking.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct RankingOptions { pub struct RankingOptions {
/// The ranker to use for the file search. /// The ranker to use for the file search.
pub ranker: String, pub ranker: String,
...@@ -546,7 +551,7 @@ pub struct RankingOptions { ...@@ -546,7 +551,7 @@ pub struct RankingOptions {
} }
/// Filters for file search. /// Filters for file search.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum Filter { pub enum Filter {
/// A filter used to compare a specified attribute key to a given value using a defined /// A filter used to compare a specified attribute key to a given value using a defined
...@@ -557,7 +562,7 @@ pub enum Filter { ...@@ -557,7 +562,7 @@ pub enum Filter {
} }
/// Single comparison filter. /// Single comparison filter.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ComparisonFilter { pub struct ComparisonFilter {
/// Specifies the comparison operator /// Specifies the comparison operator
#[serde(rename = "type")] #[serde(rename = "type")]
...@@ -568,7 +573,7 @@ pub struct ComparisonFilter { ...@@ -568,7 +573,7 @@ pub struct ComparisonFilter {
pub value: serde_json::Value, pub value: serde_json::Value,
} }
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
pub enum ComparisonType { pub enum ComparisonType {
#[serde(rename = "eq")] #[serde(rename = "eq")]
Equals, Equals,
...@@ -585,7 +590,7 @@ pub enum ComparisonType { ...@@ -585,7 +590,7 @@ pub enum ComparisonType {
} }
/// Combine multiple filters. /// Combine multiple filters.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CompoundFilter { pub struct CompoundFilter {
/// Type of operation /// Type of operation
#[serde(rename = "type")] #[serde(rename = "type")]
...@@ -594,7 +599,7 @@ pub struct CompoundFilter { ...@@ -594,7 +599,7 @@ pub struct CompoundFilter {
pub filters: Vec<Filter>, pub filters: Vec<Filter>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum CompoundType { pub enum CompoundType {
And, And,
...@@ -602,7 +607,7 @@ pub enum CompoundType { ...@@ -602,7 +607,7 @@ pub enum CompoundType {
} }
/// Approximate user location for web search. /// Approximate user location for web search.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "LocationArgs", name = "LocationArgs",
pattern = "mutable", pattern = "mutable",
...@@ -629,7 +634,7 @@ pub struct Location { ...@@ -629,7 +634,7 @@ pub struct Location {
} }
/// MCP (Model Context Protocol) tool configuration. /// MCP (Model Context Protocol) tool configuration.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "McpArgs", name = "McpArgs",
pattern = "mutable", pattern = "mutable",
...@@ -654,7 +659,7 @@ pub struct Mcp { ...@@ -654,7 +659,7 @@ pub struct Mcp {
} }
/// Allowed tools configuration for MCP. /// Allowed tools configuration for MCP.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum AllowedTools { pub enum AllowedTools {
/// A flat list of allowed tool names. /// A flat list of allowed tool names.
...@@ -664,7 +669,7 @@ pub enum AllowedTools { ...@@ -664,7 +669,7 @@ pub enum AllowedTools {
} }
/// Filter object for MCP allowed tools. /// Filter object for MCP allowed tools.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpAllowedToolsFilter { pub struct McpAllowedToolsFilter {
/// Names of tools in the filter /// Names of tools in the filter
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -672,7 +677,7 @@ pub struct McpAllowedToolsFilter { ...@@ -672,7 +677,7 @@ pub struct McpAllowedToolsFilter {
} }
/// Approval policy or filter for MCP tools. /// Approval policy or filter for MCP tools.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum RequireApproval { pub enum RequireApproval {
/// A blanket policy: "always" or "never". /// A blanket policy: "always" or "never".
...@@ -681,7 +686,7 @@ pub enum RequireApproval { ...@@ -681,7 +686,7 @@ pub enum RequireApproval {
Filter(McpApprovalFilter), Filter(McpApprovalFilter),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum RequireApprovalPolicy { pub enum RequireApprovalPolicy {
Always, Always,
...@@ -689,7 +694,7 @@ pub enum RequireApprovalPolicy { ...@@ -689,7 +694,7 @@ pub enum RequireApprovalPolicy {
} }
/// Filter object for MCP tool approval. /// Filter object for MCP tool approval.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpApprovalFilter { pub struct McpApprovalFilter {
/// A list of tools that always require approval. /// A list of tools that always require approval.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -700,7 +705,7 @@ pub struct McpApprovalFilter { ...@@ -700,7 +705,7 @@ pub struct McpApprovalFilter {
} }
/// Container configuration for a code interpreter. /// Container configuration for a code interpreter.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum CodeInterpreterContainer { pub enum CodeInterpreterContainer {
/// A simple container ID. /// A simple container ID.
...@@ -710,7 +715,7 @@ pub enum CodeInterpreterContainer { ...@@ -710,7 +715,7 @@ pub enum CodeInterpreterContainer {
} }
/// Auto configuration for code interpreter container. /// Auto configuration for code interpreter container.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum CodeInterpreterContainerKind { pub enum CodeInterpreterContainerKind {
Auto { Auto {
...@@ -721,7 +726,7 @@ pub enum CodeInterpreterContainerKind { ...@@ -721,7 +726,7 @@ pub enum CodeInterpreterContainerKind {
} }
/// Code interpreter tool definition. /// Code interpreter tool definition.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "CodeInterpreterArgs", name = "CodeInterpreterArgs",
pattern = "mutable", pattern = "mutable",
...@@ -735,7 +740,7 @@ pub struct CodeInterpreter { ...@@ -735,7 +740,7 @@ pub struct CodeInterpreter {
} }
/// Mask image input for image generation. /// Mask image input for image generation.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct InputImageMask { pub struct InputImageMask {
/// Base64-encoded mask image. /// Base64-encoded mask image.
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -746,7 +751,7 @@ pub struct InputImageMask { ...@@ -746,7 +751,7 @@ pub struct InputImageMask {
} }
/// Image generation tool definition. /// Image generation tool definition.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq, Default, Builder)]
#[builder( #[builder(
name = "ImageGenerationArgs", name = "ImageGenerationArgs",
pattern = "mutable", pattern = "mutable",
...@@ -784,7 +789,7 @@ pub struct ImageGeneration { ...@@ -784,7 +789,7 @@ pub struct ImageGeneration {
pub size: Option<ImageGenerationSize>, pub size: Option<ImageGenerationSize>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ImageGenerationBackground { pub enum ImageGenerationBackground {
Transparent, Transparent,
...@@ -792,7 +797,7 @@ pub enum ImageGenerationBackground { ...@@ -792,7 +797,7 @@ pub enum ImageGenerationBackground {
Auto, Auto,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ImageGenerationOutputFormat { pub enum ImageGenerationOutputFormat {
Png, Png,
...@@ -800,7 +805,7 @@ pub enum ImageGenerationOutputFormat { ...@@ -800,7 +805,7 @@ pub enum ImageGenerationOutputFormat {
Jpeg, Jpeg,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ImageGenerationQuality { pub enum ImageGenerationQuality {
Low, Low,
...@@ -809,7 +814,7 @@ pub enum ImageGenerationQuality { ...@@ -809,7 +814,7 @@ pub enum ImageGenerationQuality {
Auto, Auto,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ImageGenerationSize { pub enum ImageGenerationSize {
Auto, Auto,
...@@ -822,7 +827,7 @@ pub enum ImageGenerationSize { ...@@ -822,7 +827,7 @@ pub enum ImageGenerationSize {
} }
/// Control how the model picks or is forced to pick a tool. /// Control how the model picks or is forced to pick a tool.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(untagged)] #[serde(untagged)]
pub enum ToolChoice { pub enum ToolChoice {
/// Controls which (if any) tool is called by the model. /// Controls which (if any) tool is called by the model.
...@@ -841,7 +846,7 @@ pub enum ToolChoice { ...@@ -841,7 +846,7 @@ pub enum ToolChoice {
} }
/// Simple tool-choice modes. /// Simple tool-choice modes.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "lowercase")] #[serde(rename_all = "lowercase")]
pub enum ToolChoiceMode { pub enum ToolChoiceMode {
/// The model will not call any tool and instead generates a message. /// The model will not call any tool and instead generates a message.
...@@ -853,7 +858,7 @@ pub enum ToolChoiceMode { ...@@ -853,7 +858,7 @@ pub enum ToolChoiceMode {
} }
/// Hosted tool type identifiers. /// Hosted tool type identifiers.
#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, Copy, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum HostedToolType { pub enum HostedToolType {
FileSearch, FileSearch,
...@@ -862,7 +867,7 @@ pub enum HostedToolType { ...@@ -862,7 +867,7 @@ pub enum HostedToolType {
} }
/// Error returned by the API when a request fails. /// Error returned by the API when a request fails.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ErrorObject { pub struct ErrorObject {
/// The error code for the response. /// The error code for the response.
pub code: String, pub code: String,
...@@ -871,14 +876,14 @@ pub struct ErrorObject { ...@@ -871,14 +876,14 @@ pub struct ErrorObject {
} }
/// Details about an incomplete response. /// Details about an incomplete response.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct IncompleteDetails { pub struct IncompleteDetails {
/// The reason why the response is incomplete. /// The reason why the response is incomplete.
pub reason: String, pub reason: String,
} }
/// A simple text output from the model. /// A simple text output from the model.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct OutputText { pub struct OutputText {
/// The annotations of the text output. /// The annotations of the text output.
pub annotations: Vec<Annotation>, pub annotations: Vec<Annotation>,
...@@ -886,7 +891,7 @@ pub struct OutputText { ...@@ -886,7 +891,7 @@ pub struct OutputText {
pub text: String, pub text: String,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum Annotation { pub enum Annotation {
/// A citation to a file. /// A citation to a file.
...@@ -897,7 +902,7 @@ pub enum Annotation { ...@@ -897,7 +902,7 @@ pub enum Annotation {
FilePath(FilePath), FilePath(FilePath),
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct FileCitation { pub struct FileCitation {
/// The ID of the file. /// The ID of the file.
file_id: String, file_id: String,
...@@ -905,7 +910,7 @@ pub struct FileCitation { ...@@ -905,7 +910,7 @@ pub struct FileCitation {
index: u32, index: u32,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct UrlCitation { pub struct UrlCitation {
/// The index of the last character of the URL citation in the message. /// The index of the last character of the URL citation in the message.
end_index: u32, end_index: u32,
...@@ -917,7 +922,7 @@ pub struct UrlCitation { ...@@ -917,7 +922,7 @@ pub struct UrlCitation {
url: String, url: String,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct FilePath { pub struct FilePath {
/// The ID of the file. /// The ID of the file.
file_id: String, file_id: String,
...@@ -926,14 +931,14 @@ pub struct FilePath { ...@@ -926,14 +931,14 @@ pub struct FilePath {
} }
/// A refusal explanation from the model. /// A refusal explanation from the model.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Refusal { pub struct Refusal {
/// The refusal explanationfrom the model. /// The refusal explanationfrom the model.
pub refusal: String, pub refusal: String,
} }
/// A message generated by the model. /// A message generated by the model.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct OutputMessage { pub struct OutputMessage {
/// The content of the output message. /// The content of the output message.
pub content: Vec<Content>, pub content: Vec<Content>,
...@@ -945,7 +950,7 @@ pub struct OutputMessage { ...@@ -945,7 +950,7 @@ pub struct OutputMessage {
pub status: OutputStatus, pub status: OutputStatus,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum Content { pub enum Content {
/// A text output from the model. /// A text output from the model.
...@@ -955,7 +960,7 @@ pub enum Content { ...@@ -955,7 +960,7 @@ pub enum Content {
} }
/// Nested content within an output message. /// Nested content within an output message.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum OutputContent { pub enum OutputContent {
/// An output message from the model. /// An output message from the model.
...@@ -987,7 +992,7 @@ pub enum OutputContent { ...@@ -987,7 +992,7 @@ pub enum OutputContent {
} }
/// A reasoning item representing the model's chain of thought, including summary paragraphs. /// A reasoning item representing the model's chain of thought, including summary paragraphs.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ReasoningItem { pub struct ReasoningItem {
/// Unique identifier of the reasoning content. /// Unique identifier of the reasoning content.
pub id: String, pub id: String,
...@@ -1003,14 +1008,14 @@ pub struct ReasoningItem { ...@@ -1003,14 +1008,14 @@ pub struct ReasoningItem {
} }
/// A single summary text fragment from reasoning. /// A single summary text fragment from reasoning.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SummaryText { pub struct SummaryText {
/// A short summary of the reasoning used by the model. /// A short summary of the reasoning used by the model.
pub text: String, pub text: String,
} }
/// File search tool call output. /// File search tool call output.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct FileSearchCallOutput { pub struct FileSearchCallOutput {
/// The unique ID of the file search tool call. /// The unique ID of the file search tool call.
pub id: String, pub id: String,
...@@ -1023,7 +1028,7 @@ pub struct FileSearchCallOutput { ...@@ -1023,7 +1028,7 @@ pub struct FileSearchCallOutput {
pub results: Option<Vec<FileSearchResult>>, pub results: Option<Vec<FileSearchResult>>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum FileSearchCallOutputStatus { pub enum FileSearchCallOutputStatus {
InProgress, InProgress,
...@@ -1034,7 +1039,7 @@ pub enum FileSearchCallOutputStatus { ...@@ -1034,7 +1039,7 @@ pub enum FileSearchCallOutputStatus {
} }
/// A single result from a file search. /// A single result from a file search.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct FileSearchResult { pub struct FileSearchResult {
/// The unique ID of the file. /// The unique ID of the file.
pub file_id: String, pub file_id: String,
...@@ -1051,7 +1056,7 @@ pub struct FileSearchResult { ...@@ -1051,7 +1056,7 @@ pub struct FileSearchResult {
pub attributes: HashMap<String, serde_json::Value>, pub attributes: HashMap<String, serde_json::Value>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct SafetyCheck { pub struct SafetyCheck {
/// The ID of the safety check. /// The ID of the safety check.
pub id: String, pub id: String,
...@@ -1062,7 +1067,7 @@ pub struct SafetyCheck { ...@@ -1062,7 +1067,7 @@ pub struct SafetyCheck {
} }
/// Web search tool call output. /// Web search tool call output.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct WebSearchCallOutput { pub struct WebSearchCallOutput {
/// The unique ID of the web search tool call. /// The unique ID of the web search tool call.
pub id: String, pub id: String,
...@@ -1071,7 +1076,7 @@ pub struct WebSearchCallOutput { ...@@ -1071,7 +1076,7 @@ pub struct WebSearchCallOutput {
} }
/// Output from a computer tool call. /// Output from a computer tool call.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ComputerCallOutput { pub struct ComputerCallOutput {
pub action: ComputerCallAction, pub action: ComputerCallAction,
/// An identifier used when responding to the tool call with output. /// An identifier used when responding to the tool call with output.
...@@ -1085,14 +1090,14 @@ pub struct ComputerCallOutput { ...@@ -1085,14 +1090,14 @@ pub struct ComputerCallOutput {
} }
/// A point in 2D space. /// A point in 2D space.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Point { pub struct Point {
pub x: i32, pub x: i32,
pub y: i32, pub y: i32,
} }
/// Represents all user‐triggered actions. /// Represents all user‐triggered actions.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum ComputerCallAction { pub enum ComputerCallAction {
/// A click action. /// A click action.
...@@ -1123,7 +1128,7 @@ pub enum ComputerCallAction { ...@@ -1123,7 +1128,7 @@ pub enum ComputerCallAction {
Wait, Wait,
} }
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum ButtonPress { pub enum ButtonPress {
Left, Left,
...@@ -1134,7 +1139,7 @@ pub enum ButtonPress { ...@@ -1134,7 +1139,7 @@ pub enum ButtonPress {
} }
/// A click action. /// A click action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Click { pub struct Click {
/// Which mouse button was pressed. /// Which mouse button was pressed.
pub button: ButtonPress, pub button: ButtonPress,
...@@ -1145,7 +1150,7 @@ pub struct Click { ...@@ -1145,7 +1150,7 @@ pub struct Click {
} }
/// A double click action. /// A double click action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct DoubleClick { pub struct DoubleClick {
/// X‐coordinate of the double click. /// X‐coordinate of the double click.
pub x: i32, pub x: i32,
...@@ -1154,7 +1159,7 @@ pub struct DoubleClick { ...@@ -1154,7 +1159,7 @@ pub struct DoubleClick {
} }
/// A drag action. /// A drag action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Drag { pub struct Drag {
/// The path of points the cursor drags through. /// The path of points the cursor drags through.
pub path: Vec<Point>, pub path: Vec<Point>,
...@@ -1165,14 +1170,14 @@ pub struct Drag { ...@@ -1165,14 +1170,14 @@ pub struct Drag {
} }
/// A keypress action. /// A keypress action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct KeyPress { pub struct KeyPress {
/// The list of keys to press (e.g. `["Control", "C"]`). /// The list of keys to press (e.g. `["Control", "C"]`).
pub keys: Vec<String>, pub keys: Vec<String>,
} }
/// A mouse move action. /// A mouse move action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct MoveAction { pub struct MoveAction {
/// X‐coordinate to move to. /// X‐coordinate to move to.
pub x: i32, pub x: i32,
...@@ -1181,7 +1186,7 @@ pub struct MoveAction { ...@@ -1181,7 +1186,7 @@ pub struct MoveAction {
} }
/// A scroll action. /// A scroll action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct Scroll { pub struct Scroll {
/// Horizontal scroll distance. /// Horizontal scroll distance.
pub scroll_x: i32, pub scroll_x: i32,
...@@ -1194,14 +1199,14 @@ pub struct Scroll { ...@@ -1194,14 +1199,14 @@ pub struct Scroll {
} }
/// A typing (text entry) action. /// A typing (text entry) action.
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[derive(ToSchema, Debug, Clone, PartialEq, Serialize, Deserialize)]
pub struct TypeAction { pub struct TypeAction {
/// The text to type. /// The text to type.
pub text: String, pub text: String,
} }
/// Metadata for a function call request. /// Metadata for a function call request.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct FunctionCall { pub struct FunctionCall {
/// The unique ID of the function tool call. /// The unique ID of the function tool call.
pub id: String, pub id: String,
...@@ -1216,7 +1221,7 @@ pub struct FunctionCall { ...@@ -1216,7 +1221,7 @@ pub struct FunctionCall {
} }
/// Output of an image generation request. /// Output of an image generation request.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct ImageGenerationCallOutput { pub struct ImageGenerationCallOutput {
/// Unique ID of the image generation call. /// Unique ID of the image generation call.
pub id: String, pub id: String,
...@@ -1227,7 +1232,7 @@ pub struct ImageGenerationCallOutput { ...@@ -1227,7 +1232,7 @@ pub struct ImageGenerationCallOutput {
} }
/// Output of a code interpreter request. /// Output of a code interpreter request.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CodeInterpreterCallOutput { pub struct CodeInterpreterCallOutput {
/// The code that was executed. /// The code that was executed.
pub code: String, pub code: String,
...@@ -1242,7 +1247,7 @@ pub struct CodeInterpreterCallOutput { ...@@ -1242,7 +1247,7 @@ pub struct CodeInterpreterCallOutput {
} }
/// Individual result from a code interpreter: either logs or files. /// Individual result from a code interpreter: either logs or files.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type", rename_all = "snake_case")] #[serde(tag = "type", rename_all = "snake_case")]
pub enum CodeInterpreterResult { pub enum CodeInterpreterResult {
/// Text logs from the execution. /// Text logs from the execution.
...@@ -1252,20 +1257,20 @@ pub enum CodeInterpreterResult { ...@@ -1252,20 +1257,20 @@ pub enum CodeInterpreterResult {
} }
/// The output containing execution logs. /// The output containing execution logs.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CodeInterpreterTextOutput { pub struct CodeInterpreterTextOutput {
/// The logs of the code interpreter tool call. /// The logs of the code interpreter tool call.
pub logs: String, pub logs: String,
} }
/// The output containing file references. /// The output containing file references.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CodeInterpreterFileOutput { pub struct CodeInterpreterFileOutput {
/// List of file IDs produced. /// List of file IDs produced.
pub files: Vec<CodeInterpreterFile>, pub files: Vec<CodeInterpreterFile>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct CodeInterpreterFile { pub struct CodeInterpreterFile {
/// The ID of the file. /// The ID of the file.
file_id: String, file_id: String,
...@@ -1274,7 +1279,7 @@ pub struct CodeInterpreterFile { ...@@ -1274,7 +1279,7 @@ pub struct CodeInterpreterFile {
} }
/// Output of a local shell command request. /// Output of a local shell command request.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct LocalShellCallOutput { pub struct LocalShellCallOutput {
/// Details of the exec action. /// Details of the exec action.
pub action: LocalShellAction, pub action: LocalShellAction,
...@@ -1287,7 +1292,7 @@ pub struct LocalShellCallOutput { ...@@ -1287,7 +1292,7 @@ pub struct LocalShellCallOutput {
} }
/// Define the shape of a local shell action (exec). /// Define the shape of a local shell action (exec).
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct LocalShellAction { pub struct LocalShellAction {
/// The command to run. /// The command to run.
pub command: Vec<String>, pub command: Vec<String>,
...@@ -1302,7 +1307,7 @@ pub struct LocalShellAction { ...@@ -1302,7 +1307,7 @@ pub struct LocalShellAction {
} }
/// Output of an MCP server tool invocation. /// Output of an MCP server tool invocation.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpCallOutput { pub struct McpCallOutput {
/// JSON string of the arguments passed. /// JSON string of the arguments passed.
pub arguments: String, pub arguments: String,
...@@ -1319,7 +1324,7 @@ pub struct McpCallOutput { ...@@ -1319,7 +1324,7 @@ pub struct McpCallOutput {
} }
/// Output listing tools available on an MCP server. /// Output listing tools available on an MCP server.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpListToolsOutput { pub struct McpListToolsOutput {
/// Unique ID of the list request. /// Unique ID of the list request.
pub id: String, pub id: String,
...@@ -1333,7 +1338,7 @@ pub struct McpListToolsOutput { ...@@ -1333,7 +1338,7 @@ pub struct McpListToolsOutput {
} }
/// Information about a single tool on an MCP server. /// Information about a single tool on an MCP server.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpToolInfo { pub struct McpToolInfo {
/// The name of the tool. /// The name of the tool.
pub name: String, pub name: String,
...@@ -1348,7 +1353,7 @@ pub struct McpToolInfo { ...@@ -1348,7 +1353,7 @@ pub struct McpToolInfo {
} }
/// Output representing a human approval request for an MCP tool. /// Output representing a human approval request for an MCP tool.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct McpApprovalRequestOutput { pub struct McpApprovalRequestOutput {
/// JSON string of arguments for the tool. /// JSON string of arguments for the tool.
pub arguments: String, pub arguments: String,
...@@ -1361,7 +1366,7 @@ pub struct McpApprovalRequestOutput { ...@@ -1361,7 +1366,7 @@ pub struct McpApprovalRequestOutput {
} }
/// Usage statistics for a response. /// Usage statistics for a response.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Usage { pub struct Usage {
/// The number of input tokens. /// The number of input tokens.
pub input_tokens: u32, pub input_tokens: u32,
...@@ -1376,7 +1381,7 @@ pub struct Usage { ...@@ -1376,7 +1381,7 @@ pub struct Usage {
} }
/// The complete response returned by the Responses API. /// The complete response returned by the Responses API.
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
pub struct Response { pub struct Response {
/// Unix timestamp (in seconds) when this Response was created. /// Unix timestamp (in seconds) when this Response was created.
pub created_at: u64, pub created_at: u64,
...@@ -1475,7 +1480,7 @@ pub struct Response { ...@@ -1475,7 +1480,7 @@ pub struct Response {
pub user: Option<String>, pub user: Option<String>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(rename_all = "snake_case")] #[serde(rename_all = "snake_case")]
pub enum Status { pub enum Status {
Completed, Completed,
...@@ -1485,7 +1490,7 @@ pub enum Status { ...@@ -1485,7 +1490,7 @@ pub enum Status {
} }
/// Event types for streaming responses from the Responses API /// Event types for streaming responses from the Responses API
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[serde(tag = "type")] #[serde(tag = "type")]
#[non_exhaustive] // Future-proof against breaking changes #[non_exhaustive] // Future-proof against breaking changes
pub enum ResponseEvent { pub enum ResponseEvent {
...@@ -1639,21 +1644,21 @@ pub enum ResponseEvent { ...@@ -1639,21 +1644,21 @@ pub enum ResponseEvent {
/// Stream of response events /// Stream of response events
pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>; pub type ResponseStream = Pin<Box<dyn Stream<Item = Result<ResponseEvent, OpenAIError>> + Send>>;
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCreated { pub struct ResponseCreated {
pub sequence_number: u64, pub sequence_number: u64,
pub response: ResponseMetadata, pub response: ResponseMetadata,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseInProgress { pub struct ResponseInProgress {
pub sequence_number: u64, pub sequence_number: u64,
pub response: ResponseMetadata, pub response: ResponseMetadata,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseOutputItemAdded { pub struct ResponseOutputItemAdded {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1661,7 +1666,7 @@ pub struct ResponseOutputItemAdded { ...@@ -1661,7 +1666,7 @@ pub struct ResponseOutputItemAdded {
pub item: OutputItem, pub item: OutputItem,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseContentPartAdded { pub struct ResponseContentPartAdded {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1671,7 +1676,7 @@ pub struct ResponseContentPartAdded { ...@@ -1671,7 +1676,7 @@ pub struct ResponseContentPartAdded {
pub part: ContentPart, pub part: ContentPart,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseOutputTextDelta { pub struct ResponseOutputTextDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1683,7 +1688,7 @@ pub struct ResponseOutputTextDelta { ...@@ -1683,7 +1688,7 @@ pub struct ResponseOutputTextDelta {
pub logprobs: Option<serde_json::Value>, pub logprobs: Option<serde_json::Value>,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseContentPartDone { pub struct ResponseContentPartDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1693,7 +1698,7 @@ pub struct ResponseContentPartDone { ...@@ -1693,7 +1698,7 @@ pub struct ResponseContentPartDone {
pub part: ContentPart, pub part: ContentPart,
} }
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseOutputItemDone { pub struct ResponseOutputItemDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1702,7 +1707,7 @@ pub struct ResponseOutputItemDone { ...@@ -1702,7 +1707,7 @@ pub struct ResponseOutputItemDone {
} }
/// Response completed event /// Response completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCompleted { pub struct ResponseCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1710,7 +1715,7 @@ pub struct ResponseCompleted { ...@@ -1710,7 +1715,7 @@ pub struct ResponseCompleted {
} }
/// Response failed event /// Response failed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFailed { pub struct ResponseFailed {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1718,7 +1723,7 @@ pub struct ResponseFailed { ...@@ -1718,7 +1723,7 @@ pub struct ResponseFailed {
} }
/// Response incomplete event /// Response incomplete event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseIncomplete { pub struct ResponseIncomplete {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1726,7 +1731,7 @@ pub struct ResponseIncomplete { ...@@ -1726,7 +1731,7 @@ pub struct ResponseIncomplete {
} }
/// Response queued event /// Response queued event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseQueued { pub struct ResponseQueued {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1734,7 +1739,7 @@ pub struct ResponseQueued { ...@@ -1734,7 +1739,7 @@ pub struct ResponseQueued {
} }
/// Text output completed event /// Text output completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseOutputTextDone { pub struct ResponseOutputTextDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1746,7 +1751,7 @@ pub struct ResponseOutputTextDone { ...@@ -1746,7 +1751,7 @@ pub struct ResponseOutputTextDone {
} }
/// Refusal delta event /// Refusal delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseRefusalDelta { pub struct ResponseRefusalDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1757,7 +1762,7 @@ pub struct ResponseRefusalDelta { ...@@ -1757,7 +1762,7 @@ pub struct ResponseRefusalDelta {
} }
/// Refusal done event /// Refusal done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseRefusalDone { pub struct ResponseRefusalDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1768,7 +1773,7 @@ pub struct ResponseRefusalDone { ...@@ -1768,7 +1773,7 @@ pub struct ResponseRefusalDone {
} }
/// Function call arguments delta event /// Function call arguments delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFunctionCallArgumentsDelta { pub struct ResponseFunctionCallArgumentsDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1778,7 +1783,7 @@ pub struct ResponseFunctionCallArgumentsDelta { ...@@ -1778,7 +1783,7 @@ pub struct ResponseFunctionCallArgumentsDelta {
} }
/// Function call arguments done event /// Function call arguments done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFunctionCallArgumentsDone { pub struct ResponseFunctionCallArgumentsDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1788,7 +1793,7 @@ pub struct ResponseFunctionCallArgumentsDone { ...@@ -1788,7 +1793,7 @@ pub struct ResponseFunctionCallArgumentsDone {
} }
/// Error event /// Error event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseError { pub struct ResponseError {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1798,7 +1803,7 @@ pub struct ResponseError { ...@@ -1798,7 +1803,7 @@ pub struct ResponseError {
} }
/// File search call in progress event /// File search call in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFileSearchCallInProgress { pub struct ResponseFileSearchCallInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1807,7 +1812,7 @@ pub struct ResponseFileSearchCallInProgress { ...@@ -1807,7 +1812,7 @@ pub struct ResponseFileSearchCallInProgress {
} }
/// File search call searching event /// File search call searching event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFileSearchCallSearching { pub struct ResponseFileSearchCallSearching {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1816,7 +1821,7 @@ pub struct ResponseFileSearchCallSearching { ...@@ -1816,7 +1821,7 @@ pub struct ResponseFileSearchCallSearching {
} }
/// File search call completed event /// File search call completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseFileSearchCallCompleted { pub struct ResponseFileSearchCallCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1825,7 +1830,7 @@ pub struct ResponseFileSearchCallCompleted { ...@@ -1825,7 +1830,7 @@ pub struct ResponseFileSearchCallCompleted {
} }
/// Web search call in progress event /// Web search call in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseWebSearchCallInProgress { pub struct ResponseWebSearchCallInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1834,7 +1839,7 @@ pub struct ResponseWebSearchCallInProgress { ...@@ -1834,7 +1839,7 @@ pub struct ResponseWebSearchCallInProgress {
} }
/// Web search call searching event /// Web search call searching event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseWebSearchCallSearching { pub struct ResponseWebSearchCallSearching {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1843,7 +1848,7 @@ pub struct ResponseWebSearchCallSearching { ...@@ -1843,7 +1848,7 @@ pub struct ResponseWebSearchCallSearching {
} }
/// Web search call completed event /// Web search call completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseWebSearchCallCompleted { pub struct ResponseWebSearchCallCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1852,7 +1857,7 @@ pub struct ResponseWebSearchCallCompleted { ...@@ -1852,7 +1857,7 @@ pub struct ResponseWebSearchCallCompleted {
} }
/// Reasoning summary part added event /// Reasoning summary part added event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryPartAdded { pub struct ResponseReasoningSummaryPartAdded {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1863,7 +1868,7 @@ pub struct ResponseReasoningSummaryPartAdded { ...@@ -1863,7 +1868,7 @@ pub struct ResponseReasoningSummaryPartAdded {
} }
/// Reasoning summary part done event /// Reasoning summary part done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryPartDone { pub struct ResponseReasoningSummaryPartDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1874,7 +1879,7 @@ pub struct ResponseReasoningSummaryPartDone { ...@@ -1874,7 +1879,7 @@ pub struct ResponseReasoningSummaryPartDone {
} }
/// Reasoning summary text delta event /// Reasoning summary text delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryTextDelta { pub struct ResponseReasoningSummaryTextDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1885,7 +1890,7 @@ pub struct ResponseReasoningSummaryTextDelta { ...@@ -1885,7 +1890,7 @@ pub struct ResponseReasoningSummaryTextDelta {
} }
/// Reasoning summary text done event /// Reasoning summary text done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryTextDone { pub struct ResponseReasoningSummaryTextDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1896,7 +1901,7 @@ pub struct ResponseReasoningSummaryTextDone { ...@@ -1896,7 +1901,7 @@ pub struct ResponseReasoningSummaryTextDone {
} }
/// Reasoning summary delta event /// Reasoning summary delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryDelta { pub struct ResponseReasoningSummaryDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1907,7 +1912,7 @@ pub struct ResponseReasoningSummaryDelta { ...@@ -1907,7 +1912,7 @@ pub struct ResponseReasoningSummaryDelta {
} }
/// Reasoning summary done event /// Reasoning summary done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseReasoningSummaryDone { pub struct ResponseReasoningSummaryDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1918,7 +1923,7 @@ pub struct ResponseReasoningSummaryDone { ...@@ -1918,7 +1923,7 @@ pub struct ResponseReasoningSummaryDone {
} }
/// Image generation call in progress event /// Image generation call in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseImageGenerationCallInProgress { pub struct ResponseImageGenerationCallInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1927,7 +1932,7 @@ pub struct ResponseImageGenerationCallInProgress { ...@@ -1927,7 +1932,7 @@ pub struct ResponseImageGenerationCallInProgress {
} }
/// Image generation call generating event /// Image generation call generating event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseImageGenerationCallGenerating { pub struct ResponseImageGenerationCallGenerating {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1936,7 +1941,7 @@ pub struct ResponseImageGenerationCallGenerating { ...@@ -1936,7 +1941,7 @@ pub struct ResponseImageGenerationCallGenerating {
} }
/// Image generation call partial image event /// Image generation call partial image event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseImageGenerationCallPartialImage { pub struct ResponseImageGenerationCallPartialImage {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1947,7 +1952,7 @@ pub struct ResponseImageGenerationCallPartialImage { ...@@ -1947,7 +1952,7 @@ pub struct ResponseImageGenerationCallPartialImage {
} }
/// Image generation call completed event /// Image generation call completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseImageGenerationCallCompleted { pub struct ResponseImageGenerationCallCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1956,7 +1961,7 @@ pub struct ResponseImageGenerationCallCompleted { ...@@ -1956,7 +1961,7 @@ pub struct ResponseImageGenerationCallCompleted {
} }
/// MCP call arguments delta event /// MCP call arguments delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpCallArgumentsDelta { pub struct ResponseMcpCallArgumentsDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1966,7 +1971,7 @@ pub struct ResponseMcpCallArgumentsDelta { ...@@ -1966,7 +1971,7 @@ pub struct ResponseMcpCallArgumentsDelta {
} }
/// MCP call arguments done event /// MCP call arguments done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpCallArgumentsDone { pub struct ResponseMcpCallArgumentsDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1976,7 +1981,7 @@ pub struct ResponseMcpCallArgumentsDone { ...@@ -1976,7 +1981,7 @@ pub struct ResponseMcpCallArgumentsDone {
} }
/// MCP call completed event /// MCP call completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpCallCompleted { pub struct ResponseMcpCallCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1985,7 +1990,7 @@ pub struct ResponseMcpCallCompleted { ...@@ -1985,7 +1990,7 @@ pub struct ResponseMcpCallCompleted {
} }
/// MCP call failed event /// MCP call failed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpCallFailed { pub struct ResponseMcpCallFailed {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -1994,7 +1999,7 @@ pub struct ResponseMcpCallFailed { ...@@ -1994,7 +1999,7 @@ pub struct ResponseMcpCallFailed {
} }
/// MCP call in progress event /// MCP call in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpCallInProgress { pub struct ResponseMcpCallInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2003,7 +2008,7 @@ pub struct ResponseMcpCallInProgress { ...@@ -2003,7 +2008,7 @@ pub struct ResponseMcpCallInProgress {
} }
/// MCP list tools completed event /// MCP list tools completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpListToolsCompleted { pub struct ResponseMcpListToolsCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2012,7 +2017,7 @@ pub struct ResponseMcpListToolsCompleted { ...@@ -2012,7 +2017,7 @@ pub struct ResponseMcpListToolsCompleted {
} }
/// MCP list tools failed event /// MCP list tools failed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpListToolsFailed { pub struct ResponseMcpListToolsFailed {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2021,7 +2026,7 @@ pub struct ResponseMcpListToolsFailed { ...@@ -2021,7 +2026,7 @@ pub struct ResponseMcpListToolsFailed {
} }
/// MCP list tools in progress event /// MCP list tools in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMcpListToolsInProgress { pub struct ResponseMcpListToolsInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2030,7 +2035,7 @@ pub struct ResponseMcpListToolsInProgress { ...@@ -2030,7 +2035,7 @@ pub struct ResponseMcpListToolsInProgress {
} }
/// Code interpreter call in progress event /// Code interpreter call in progress event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCodeInterpreterCallInProgress { pub struct ResponseCodeInterpreterCallInProgress {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2039,7 +2044,7 @@ pub struct ResponseCodeInterpreterCallInProgress { ...@@ -2039,7 +2044,7 @@ pub struct ResponseCodeInterpreterCallInProgress {
} }
/// Code interpreter call interpreting event /// Code interpreter call interpreting event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCodeInterpreterCallInterpreting { pub struct ResponseCodeInterpreterCallInterpreting {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2048,7 +2053,7 @@ pub struct ResponseCodeInterpreterCallInterpreting { ...@@ -2048,7 +2053,7 @@ pub struct ResponseCodeInterpreterCallInterpreting {
} }
/// Code interpreter call completed event /// Code interpreter call completed event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCodeInterpreterCallCompleted { pub struct ResponseCodeInterpreterCallCompleted {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2057,7 +2062,7 @@ pub struct ResponseCodeInterpreterCallCompleted { ...@@ -2057,7 +2062,7 @@ pub struct ResponseCodeInterpreterCallCompleted {
} }
/// Code interpreter call code delta event /// Code interpreter call code delta event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCodeInterpreterCallCodeDelta { pub struct ResponseCodeInterpreterCallCodeDelta {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2067,7 +2072,7 @@ pub struct ResponseCodeInterpreterCallCodeDelta { ...@@ -2067,7 +2072,7 @@ pub struct ResponseCodeInterpreterCallCodeDelta {
} }
/// Code interpreter call code done event /// Code interpreter call code done event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseCodeInterpreterCallCodeDone { pub struct ResponseCodeInterpreterCallCodeDone {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2077,7 +2082,7 @@ pub struct ResponseCodeInterpreterCallCodeDone { ...@@ -2077,7 +2082,7 @@ pub struct ResponseCodeInterpreterCallCodeDone {
} }
/// Response metadata /// Response metadata
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseMetadata { pub struct ResponseMetadata {
pub id: String, pub id: String,
...@@ -2146,7 +2151,7 @@ pub struct ResponseMetadata { ...@@ -2146,7 +2151,7 @@ pub struct ResponseMetadata {
} }
/// Output item /// Output item
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct OutputItem { pub struct OutputItem {
pub id: String, pub id: String,
...@@ -2164,7 +2169,7 @@ pub struct OutputItem { ...@@ -2164,7 +2169,7 @@ pub struct OutputItem {
} }
/// Content part /// Content part
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ContentPart { pub struct ContentPart {
#[serde(rename = "type")] #[serde(rename = "type")]
...@@ -2180,7 +2185,7 @@ pub struct ContentPart { ...@@ -2180,7 +2185,7 @@ pub struct ContentPart {
/// Collects streaming response events into a complete response /// Collects streaming response events into a complete response
/// Output text annotation added event /// Output text annotation added event
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct ResponseOutputTextAnnotationAdded { pub struct ResponseOutputTextAnnotationAdded {
pub sequence_number: u64, pub sequence_number: u64,
...@@ -2192,7 +2197,7 @@ pub struct ResponseOutputTextAnnotationAdded { ...@@ -2192,7 +2197,7 @@ pub struct ResponseOutputTextAnnotationAdded {
} }
/// Text annotation object for output text /// Text annotation object for output text
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)] #[derive(ToSchema, Debug, Serialize, Deserialize, Clone, PartialEq)]
#[non_exhaustive] #[non_exhaustive]
pub struct TextAnnotation { pub struct TextAnnotation {
#[serde(rename = "type")] #[serde(rename = "type")]
......
...@@ -195,6 +195,10 @@ insta = { version = "1.41", features = [ ...@@ -195,6 +195,10 @@ insta = { version = "1.41", features = [
lazy_static = "1.4" lazy_static = "1.4"
mockito = "1.7.0" mockito = "1.7.0"
[[bin]]
name = "generate-frontend-openapi"
path = "src/bin/generate_frontend_openapi.rs"
[build-dependencies] [build-dependencies]
tonic-build = { version = "0.13.1" } tonic-build = { version = "0.13.1" }
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Helper binary to generate the Dynamo HTTP frontend OpenAPI specification.
//!
//! This allows CI, documentation tooling, and NIM to obtain the exact same
//! OpenAPI document that is served at `/openapi.json` by the frontend
//! without having to start the HTTP service and scrape the endpoint.
//!
//! Usage (from the repository root):
//! ```bash
//! cargo run -p dynamo-llm --bin generate-frontend-openapi
//! ```
//! The generated spec will be written to:
//! `docs/frontends/openapi.json`
use std::fs;
use std::path::PathBuf;
use std::thread;
use anyhow::Context as _;
use dynamo_llm::http::service::{openapi_docs, service_v2::HttpService};
/// Stack size for the generator thread (8 MB).
/// The utoipa schema derivation for deeply nested OpenAI types requires
/// additional stack space due to recursive type expansion.
const GENERATOR_STACK_SIZE: usize = 8 * 1024 * 1024;
fn main() -> anyhow::Result<()> {
// Spawn a thread with a larger stack to handle deeply nested schema generation
let handle = thread::Builder::new()
.stack_size(GENERATOR_STACK_SIZE)
.spawn(generate_openapi)
.context("failed to spawn generator thread")?;
handle
.join()
.map_err(|e| anyhow::anyhow!("generator thread panicked: {:?}", e))?
}
fn generate_openapi() -> anyhow::Result<()> {
// Build an HttpService instance with all standard OpenAI-compatible
// frontend endpoints enabled so that the generated OpenAPI document
// reflects the full surface area exposed to users.
//
// This does NOT start any network listeners; it only builds the router
// graph and associated route documentation.
let http_service = HttpService::builder()
.enable_chat_endpoints(true)
.enable_cmpl_endpoints(true)
.enable_embeddings_endpoints(true)
.enable_responses_endpoints(true)
.build()
.context("failed to build HttpService for OpenAPI generation")?;
let route_docs = http_service.route_docs().to_vec();
let openapi = openapi_docs::generate_openapi_spec(&route_docs);
// Write the spec to a stable location relative to the repository root.
let out_dir = PathBuf::from("docs/frontends");
let out_path = out_dir.join("openapi.json");
fs::create_dir_all(&out_dir)
.with_context(|| format!("failed to create OpenAPI output directory: {out_dir:?}"))?;
let json =
serde_json::to_string_pretty(&openapi).context("failed to serialize OpenAPI spec")?;
fs::write(&out_path, json)
.with_context(|| format!("failed to write OpenAPI spec to: {out_path:?}"))?;
println!(
"Generated Dynamo frontend OpenAPI specification at {}",
out_path.display()
);
Ok(())
}
...@@ -330,6 +330,9 @@ async fn completions( ...@@ -330,6 +330,9 @@ async fn completions(
// return a 503 if the service is not ready // return a 503 if the service is not ready
check_ready(&state)?; check_ready(&state)?;
// Validate stream_options is only used when streaming (NVBug 5662680)
validate_completion_stream_options(&request)?;
validate_completion_fields_generic(&request)?; validate_completion_fields_generic(&request)?;
// Detect batch prompts // Detect batch prompts
...@@ -873,6 +876,9 @@ async fn chat_completions( ...@@ -873,6 +876,9 @@ async fn chat_completions(
// Handle required fields like messages shouldn't be empty. // Handle required fields like messages shouldn't be empty.
validate_chat_completion_required_fields(&request)?; validate_chat_completion_required_fields(&request)?;
// Validate stream_options is only used when streaming (NVBug 5662680)
validate_chat_completion_stream_options(&request)?;
// Handle Rest of Validation Errors // Handle Rest of Validation Errors
validate_chat_completion_fields_generic(&request)?; validate_chat_completion_fields_generic(&request)?;
...@@ -1063,6 +1069,22 @@ pub fn validate_chat_completion_required_fields( ...@@ -1063,6 +1069,22 @@ pub fn validate_chat_completion_required_fields(
Ok(()) Ok(())
} }
/// Validates that stream_options is only used when stream=true for chat completions (NVBug 5662680)
pub fn validate_chat_completion_stream_options(
request: &NvCreateChatCompletionRequest,
) -> Result<(), ErrorResponse> {
let inner = &request.inner;
let streaming = inner.stream.unwrap_or(false);
if !streaming && inner.stream_options.is_some() {
return Err(ErrorMessage::from_http_error(HttpError {
code: 400,
message: VALIDATION_PREFIX.to_string()
+ "The 'stream_options' field is only allowed when 'stream' is set to true.",
}));
}
Ok(())
}
/// Validates a chat completion request and returns an error response if validation fails. /// Validates a chat completion request and returns an error response if validation fails.
/// ///
/// This function calls the `validate` method implemented for `NvCreateChatCompletionRequest`. /// This function calls the `validate` method implemented for `NvCreateChatCompletionRequest`.
...@@ -1078,6 +1100,22 @@ pub fn validate_chat_completion_fields_generic( ...@@ -1078,6 +1100,22 @@ pub fn validate_chat_completion_fields_generic(
}) })
} }
/// Validates that stream_options is only used when stream=true for completions (NVBug 5662680)
pub fn validate_completion_stream_options(
request: &NvCreateCompletionRequest,
) -> Result<(), ErrorResponse> {
let inner = &request.inner;
let streaming = inner.stream.unwrap_or(false);
if !streaming && inner.stream_options.is_some() {
return Err(ErrorMessage::from_http_error(HttpError {
code: 400,
message: VALIDATION_PREFIX.to_string()
+ "The 'stream_options' field is only allowed when 'stream' is set to true.",
}));
}
Ok(())
}
/// Validates a completion request and returns an error response if validation fails. /// Validates a completion request and returns an error response if validation fails.
/// ///
/// This function calls the `validate` method implemented for `NvCreateCompletionRequest`. /// This function calls the `validate` method implemented for `NvCreateCompletionRequest`.
...@@ -1395,9 +1433,9 @@ async fn list_models_openai( ...@@ -1395,9 +1433,9 @@ async fn list_models_openai(
for model_name in models { for model_name in models {
data.push(ModelListing { data.push(ModelListing {
id: model_name.clone(), id: model_name.clone(),
object: "object", object: "model", // Per OpenAI spec, this should be "model"
created, // Where would this come from? created,
owned_by: "nvidia".to_string(), // Get organization from config owned_by: "nvidia".to_string(),
}); });
} }
...@@ -1417,8 +1455,8 @@ struct ListModelOpenAI { ...@@ -1417,8 +1455,8 @@ struct ListModelOpenAI {
#[derive(Serialize)] #[derive(Serialize)]
struct ModelListing { struct ModelListing {
id: String, id: String,
object: &'static str, // always "object" object: &'static str, // always "model" per OpenAI spec
created: u64, // Seconds since epoch created: u64, // Seconds since epoch
owned_by: String, owned_by: String,
} }
......
...@@ -54,12 +54,24 @@ use crate::http::service::RouteDoc; ...@@ -54,12 +54,24 @@ use crate::http::service::RouteDoc;
), ),
servers( servers(
(url = "/", description = "Current server") (url = "/", description = "Current server")
),
components(
schemas(
crate::protocols::openai::chat_completions::NvCreateChatCompletionRequest,
crate::protocols::openai::completions::NvCreateCompletionRequest,
crate::protocols::openai::embeddings::NvCreateEmbeddingRequest,
crate::protocols::openai::responses::NvCreateResponse
)
) )
)] )]
struct ApiDoc; struct ApiDoc;
/// Generate OpenAPI specification from route documentation /// Generate OpenAPI specification from route documentation
fn generate_openapi_spec(route_docs: &[RouteDoc]) -> utoipa::openapi::OpenApi { ///
/// This is the core helper used both by the embedded Swagger UI and by
/// external tools (for example CI or NIM) which need to materialize the
/// same frontend OpenAPI specification without running the HTTP service.
pub fn generate_openapi_spec(route_docs: &[RouteDoc]) -> utoipa::openapi::OpenApi {
let mut openapi = ApiDoc::openapi(); let mut openapi = ApiDoc::openapi();
// Build paths from route documentation // Build paths from route documentation
...@@ -216,60 +228,8 @@ fn add_request_body_for_path( ...@@ -216,60 +228,8 @@ fn add_request_body_for_path(
/// Create schema for chat completion request /// Create schema for chat completion request
fn create_chat_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> { fn create_chat_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> {
use utoipa::openapi::schema::{ArrayBuilder, ObjectBuilder}; // Schema derived from actual NvCreateChatCompletionRequest type via ToSchema
<crate::protocols::openai::chat_completions::NvCreateChatCompletionRequest as utoipa::PartialSchema>::schema()
RefOr::T(utoipa::openapi::schema::Schema::Object(
ObjectBuilder::new()
.property(
"model",
ObjectBuilder::new()
.description(Some("ID of the model to use"))
.build(),
)
.property(
"messages",
ArrayBuilder::new()
.description(Some("A list of messages comprising the conversation so far"))
.items(
ObjectBuilder::new()
.property(
"role",
ObjectBuilder::new()
.description(Some("The role of the message author (system, user, assistant)"))
.build(),
)
.property(
"content",
ObjectBuilder::new()
.description(Some("The contents of the message"))
.build(),
)
.build(),
)
.build(),
)
.property(
"temperature",
ObjectBuilder::new()
.description(Some("Sampling temperature between 0 and 2. Higher values make output more random"))
.build(),
)
.property(
"max_tokens",
ObjectBuilder::new()
.description(Some("Maximum number of tokens to generate"))
.build(),
)
.property(
"stream",
ObjectBuilder::new()
.description(Some("Whether to stream back partial progress"))
.build(),
)
.required("model")
.required("messages")
.build(),
))
} }
/// Create example for chat completion request /// Create example for chat completion request
...@@ -294,44 +254,7 @@ fn create_chat_completion_example() -> serde_json::Value { ...@@ -294,44 +254,7 @@ fn create_chat_completion_example() -> serde_json::Value {
/// Create schema for completion request /// Create schema for completion request
fn create_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> { fn create_completion_schema() -> RefOr<utoipa::openapi::schema::Schema> {
use utoipa::openapi::schema::ObjectBuilder; <crate::protocols::openai::completions::NvCreateCompletionRequest as utoipa::PartialSchema>::schema()
RefOr::T(utoipa::openapi::schema::Schema::Object(
ObjectBuilder::new()
.property(
"model",
ObjectBuilder::new()
.description(Some("ID of the model to use"))
.build(),
)
.property(
"prompt",
ObjectBuilder::new()
.description(Some("The prompt to generate completions for"))
.build(),
)
.property(
"temperature",
ObjectBuilder::new()
.description(Some("Sampling temperature between 0 and 2"))
.build(),
)
.property(
"max_tokens",
ObjectBuilder::new()
.description(Some("Maximum number of tokens to generate"))
.build(),
)
.property(
"stream",
ObjectBuilder::new()
.description(Some("Whether to stream back partial progress"))
.build(),
)
.required("model")
.required("prompt")
.build(),
))
} }
/// Create example for completion request /// Create example for completion request
...@@ -347,28 +270,7 @@ fn create_completion_example() -> serde_json::Value { ...@@ -347,28 +270,7 @@ fn create_completion_example() -> serde_json::Value {
/// Create schema for embedding request /// Create schema for embedding request
fn create_embedding_schema() -> RefOr<utoipa::openapi::schema::Schema> { fn create_embedding_schema() -> RefOr<utoipa::openapi::schema::Schema> {
use utoipa::openapi::schema::ObjectBuilder; <crate::protocols::openai::embeddings::NvCreateEmbeddingRequest as utoipa::PartialSchema>::schema()
RefOr::T(utoipa::openapi::schema::Schema::Object(
ObjectBuilder::new()
.property(
"model",
ObjectBuilder::new()
.description(Some("ID of the model to use"))
.build(),
)
.property(
"input",
ObjectBuilder::new()
.description(Some(
"Input text to embed, encoded as a string or array of strings",
))
.build(),
)
.required("model")
.required("input")
.build(),
))
} }
/// Create example for embedding request /// Create example for embedding request
...@@ -381,26 +283,8 @@ fn create_embedding_example() -> serde_json::Value { ...@@ -381,26 +283,8 @@ fn create_embedding_example() -> serde_json::Value {
/// Create schema for response request /// Create schema for response request
fn create_response_schema() -> RefOr<utoipa::openapi::schema::Schema> { fn create_response_schema() -> RefOr<utoipa::openapi::schema::Schema> {
use utoipa::openapi::schema::ObjectBuilder; // Schema derived from NvCreateResponse type via ToSchema
<crate::protocols::openai::responses::NvCreateResponse as utoipa::PartialSchema>::schema()
RefOr::T(utoipa::openapi::schema::Schema::Object(
ObjectBuilder::new()
.property(
"model",
ObjectBuilder::new()
.description(Some("ID of the model to use"))
.build(),
)
.property(
"input",
ObjectBuilder::new()
.description(Some("The input text"))
.build(),
)
.required("model")
.required("input")
.build(),
))
} }
/// Create example for response request /// Create example for response request
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
use anyhow::Result; use anyhow::Result;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use super::common::EncodedMediaData; use super::common::EncodedMediaData;
use super::rdma::DecodedMediaData; use super::rdma::DecodedMediaData;
...@@ -34,7 +35,7 @@ pub trait Decoder: Clone + Send + 'static { ...@@ -34,7 +35,7 @@ pub trait Decoder: Clone + Send + 'static {
/// Media decoder configuration. /// Media decoder configuration.
/// Used both for MDC server config and runtime `media_io_kwargs`. /// Used both for MDC server config and runtime `media_io_kwargs`.
/// When used at runtime, limits are enforced from MDC and cannot be overridden. /// When used at runtime, limits are enforced from MDC and cannot be overridden.
#[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize)] #[derive(Clone, Debug, Default, serde::Serialize, serde::Deserialize, ToSchema)]
pub struct MediaDecoder { pub struct MediaDecoder {
#[serde(default, skip_serializing_if = "Option::is_none")] #[serde(default, skip_serializing_if = "Option::is_none")]
pub image: Option<ImageDecoder>, pub image: Option<ImageDecoder>,
......
...@@ -7,6 +7,7 @@ use anyhow::Result; ...@@ -7,6 +7,7 @@ use anyhow::Result;
use image::{ColorType, GenericImageView, ImageFormat, ImageReader}; use image::{ColorType, GenericImageView, ImageFormat, ImageReader};
use ndarray::Array3; use ndarray::Array3;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use super::super::common::EncodedMediaData; use super::super::common::EncodedMediaData;
use super::super::rdma::DecodedMediaData; use super::super::rdma::DecodedMediaData;
...@@ -15,7 +16,7 @@ use super::{DecodedMediaMetadata, Decoder}; ...@@ -15,7 +16,7 @@ use super::{DecodedMediaMetadata, Decoder};
const DEFAULT_MAX_ALLOC: u64 = 128 * 1024 * 1024; // 128 MB const DEFAULT_MAX_ALLOC: u64 = 128 * 1024 * 1024; // 128 MB
/// Image decoder limits - can only be set via server config, not runtime kwargs. /// Image decoder limits - can only be set via server config, not runtime kwargs.
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize, ToSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct ImageDecoderLimits { pub struct ImageDecoderLimits {
#[serde(default)] #[serde(default)]
...@@ -37,7 +38,7 @@ impl Default for ImageDecoderLimits { ...@@ -37,7 +38,7 @@ impl Default for ImageDecoderLimits {
} }
} }
#[derive(Clone, Debug, Default, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Serialize, Deserialize, ToSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct ImageDecoder { pub struct ImageDecoder {
#[serde(default)] #[serde(default)]
......
...@@ -10,6 +10,7 @@ use ffmpeg_next::ffi::{AVPixelFormat, av_image_copy_to_buffer}; ...@@ -10,6 +10,7 @@ use ffmpeg_next::ffi::{AVPixelFormat, av_image_copy_to_buffer};
use memfile::{CreateOptions, MemFile, Seal}; use memfile::{CreateOptions, MemFile, Seal};
use ndarray::Array4; use ndarray::Array4;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use video_rs::frame::RawFrame; use video_rs::frame::RawFrame;
use video_rs::{Location, Time}; use video_rs::{Location, Time};
...@@ -22,7 +23,7 @@ use crate::preprocessor::media::{ ...@@ -22,7 +23,7 @@ use crate::preprocessor::media::{
const FRAME_TIME_BUFFER_SECS: f64 = 0.001; const FRAME_TIME_BUFFER_SECS: f64 = 0.001;
const DEFAULT_MAX_ALLOC: u64 = 512 * 1024 * 1024; // 512 MB const DEFAULT_MAX_ALLOC: u64 = 512 * 1024 * 1024; // 512 MB
#[derive(Clone, Debug, Serialize, Deserialize)] #[derive(Clone, Debug, Serialize, Deserialize, ToSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct VideoDecoderLimits { pub struct VideoDecoderLimits {
/// Maximum allowed total allocation of decoded frames in bytes /// Maximum allowed total allocation of decoded frames in bytes
...@@ -38,7 +39,7 @@ impl Default for VideoDecoderLimits { ...@@ -38,7 +39,7 @@ impl Default for VideoDecoderLimits {
} }
} }
#[derive(Clone, Debug, Default, Serialize, Deserialize)] #[derive(Clone, Debug, Default, Serialize, Deserialize, ToSchema)]
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
pub struct VideoDecoder { pub struct VideoDecoder {
#[serde(default)] #[serde(default)]
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::sync::{Mutex, OnceLock}; use std::sync::{Mutex, OnceLock};
use std::time::{Instant, SystemTime, UNIX_EPOCH}; use std::time::{Instant, SystemTime, UNIX_EPOCH};
use utoipa::ToSchema;
use crate::protocols::openai::nvext::WorkerIdInfo; use crate::protocols::openai::nvext::WorkerIdInfo;
...@@ -241,7 +242,7 @@ impl Default for RequestTracker { ...@@ -241,7 +242,7 @@ impl Default for RequestTracker {
/// ///
/// This struct is serialized and included in the response's `nvext` field /// This struct is serialized and included in the response's `nvext` field
/// when the client requests timing information via `extra_fields: ["timing"]`. /// when the client requests timing information via `extra_fields: ["timing"]`.
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] #[derive(ToSchema, Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct TimingInfo { pub struct TimingInfo {
/// When the request was received (epoch milliseconds) /// When the request was received (epoch milliseconds)
pub request_received_ms: u64, pub request_received_ms: u64,
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
use dynamo_runtime::protocols::annotated::AnnotationsProvider; use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::Validate; use validator::Validate;
use crate::engines::ValidateRequest; use crate::engines::ValidateRequest;
...@@ -31,7 +32,7 @@ pub use delta::DeltaGenerator; ...@@ -31,7 +32,7 @@ pub use delta::DeltaGenerator;
/// - `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`. /// - `common`: Common extension fields (ignore_eos, min_tokens) at root level, embedded using `serde(flatten)`.
/// - `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details. /// - `nvext`: The optional NVIDIA extension field. See [`NvExt`] for more details.
/// Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence. /// Note: If ignore_eos is specified in both common and nvext, the common (root-level) value takes precedence.
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateChatCompletionRequest { pub struct NvCreateChatCompletionRequest {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateChatCompletionRequest, pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,
......
...@@ -2,11 +2,12 @@ ...@@ -2,11 +2,12 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use derive_builder::Builder; use derive_builder::Builder;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::Validate; use validator::Validate;
/// Common extensions for OpenAI API requests that are not part of the standard OpenAI spec /// Common extensions for OpenAI API requests that are not part of the standard OpenAI spec
/// but are commonly needed across different request types. /// but are commonly needed across different request types.
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone, Default)] #[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone, Default)]
pub struct CommonExt { pub struct CommonExt {
/// If true, the model will ignore the end of string token and generate to max_tokens. /// If true, the model will ignore the end of string token and generate to max_tokens.
/// This field can also be specified in nvext, but the root-level value takes precedence. /// This field can also be specified in nvext, but the root-level value takes precedence.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
use derive_builder::Builder; use derive_builder::Builder;
use dynamo_runtime::protocols::annotated::AnnotationsProvider; use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::Validate; use validator::Validate;
use crate::engines::ValidateRequest; use crate::engines::ValidateRequest;
...@@ -23,7 +24,7 @@ mod delta; ...@@ -23,7 +24,7 @@ mod delta;
pub use aggregator::DeltaAggregator; pub use aggregator::DeltaAggregator;
pub use delta::DeltaGenerator; pub use delta::DeltaGenerator;
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateCompletionRequest { pub struct NvCreateCompletionRequest {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateCompletionRequest, pub inner: dynamo_async_openai::types::CreateCompletionRequest,
...@@ -43,7 +44,7 @@ pub struct NvCreateCompletionRequest { ...@@ -43,7 +44,7 @@ pub struct NvCreateCompletionRequest {
pub unsupported_fields: std::collections::HashMap<String, serde_json::Value>, pub unsupported_fields: std::collections::HashMap<String, serde_json::Value>,
} }
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateCompletionResponse { pub struct NvCreateCompletionResponse {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateCompletionResponse, pub inner: dynamo_async_openai::types::CreateCompletionResponse,
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
use dynamo_runtime::protocols::annotated::AnnotationsProvider; use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::Validate; use validator::Validate;
mod aggregator; mod aggregator;
...@@ -11,7 +12,7 @@ mod nvext; ...@@ -11,7 +12,7 @@ mod nvext;
pub use aggregator::DeltaAggregator; pub use aggregator::DeltaAggregator;
pub use nvext::{NvExt, NvExtProvider}; pub use nvext::{NvExt, NvExtProvider};
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateEmbeddingRequest { pub struct NvCreateEmbeddingRequest {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateEmbeddingRequest, pub inner: dynamo_async_openai::types::CreateEmbeddingRequest,
...@@ -26,7 +27,7 @@ pub struct NvCreateEmbeddingRequest { ...@@ -26,7 +27,7 @@ pub struct NvCreateEmbeddingRequest {
/// # Fields /// # Fields
/// - `inner`: The base OpenAI unary chat completion response, embedded /// - `inner`: The base OpenAI unary chat completion response, embedded
/// using `serde(flatten)`. /// using `serde(flatten)`.
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateEmbeddingResponse { pub struct NvCreateEmbeddingResponse {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateEmbeddingResponse, pub inner: dynamo_async_openai::types::CreateEmbeddingResponse,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment