Unverified Commit f63e273c authored by smatta-star's avatar smatta-star Committed by GitHub
Browse files

feat: add auto-generated frontend OpenAPI spec and helper binary (#4802)


Signed-off-by: default avatarSatvik Matta <smatta@nvidia.com>
parent ac8c9023
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
use derive_builder::Builder; use derive_builder::Builder;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::{Validate, ValidationError}; use validator::{Validate, ValidationError};
pub trait NvExtProvider { pub trait NvExtProvider {
...@@ -10,7 +11,7 @@ pub trait NvExtProvider { ...@@ -10,7 +11,7 @@ pub trait NvExtProvider {
} }
/// NVIDIA LLM extensions to the OpenAI API /// NVIDIA LLM extensions to the OpenAI API
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[validate(schema(function = "validate_nv_ext"))] #[validate(schema(function = "validate_nv_ext"))]
pub struct NvExt { pub struct NvExt {
/// Annotations /// Annotations
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
use derive_builder::Builder; use derive_builder::Builder;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::{Validate, ValidationError}; use validator::{Validate, ValidationError};
pub use crate::protocols::common::timing::TimingInfo; pub use crate::protocols::common::timing::TimingInfo;
...@@ -13,7 +14,7 @@ pub trait NvExtProvider { ...@@ -13,7 +14,7 @@ pub trait NvExtProvider {
} }
/// Worker ID information for disaggregated serving /// Worker ID information for disaggregated serving
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] #[derive(ToSchema, Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct WorkerIdInfo { pub struct WorkerIdInfo {
/// The prefill worker ID that processed this request /// The prefill worker ID that processed this request
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -25,7 +26,7 @@ pub struct WorkerIdInfo { ...@@ -25,7 +26,7 @@ pub struct WorkerIdInfo {
} }
/// NVIDIA LLM response extensions /// NVIDIA LLM response extensions
#[derive(Serialize, Deserialize, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Debug, Clone)]
pub struct NvExtResponse { pub struct NvExtResponse {
/// Worker ID information (prefill and decode worker IDs) /// Worker ID information (prefill and decode worker IDs)
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
...@@ -43,7 +44,7 @@ pub struct NvExtResponse { ...@@ -43,7 +44,7 @@ pub struct NvExtResponse {
} }
/// NVIDIA LLM extensions to the OpenAI API /// NVIDIA LLM extensions to the OpenAI API
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[validate(schema(function = "validate_nv_ext"))] #[validate(schema(function = "validate_nv_ext"))]
pub struct NvExt { pub struct NvExt {
/// If true, sampling will be forced to be greedy. /// If true, sampling will be forced to be greedy.
......
...@@ -11,6 +11,7 @@ use dynamo_async_openai::types::{ ...@@ -11,6 +11,7 @@ use dynamo_async_openai::types::{
}; };
use dynamo_runtime::protocols::annotated::AnnotationsProvider; use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid; use uuid::Uuid;
use validator::Validate; use validator::Validate;
...@@ -18,8 +19,9 @@ use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatComplet ...@@ -18,8 +19,9 @@ use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatComplet
use super::nvext::{NvExt, NvExtProvider}; use super::nvext::{NvExt, NvExtProvider};
use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider}; use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateResponse { pub struct NvCreateResponse {
/// Flattened CreateResponse fields (model, input, temperature, etc.)
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::responses::CreateResponse, pub inner: dynamo_async_openai::types::responses::CreateResponse,
...@@ -27,8 +29,9 @@ pub struct NvCreateResponse { ...@@ -27,8 +29,9 @@ pub struct NvCreateResponse {
pub nvext: Option<NvExt>, pub nvext: Option<NvExt>,
} }
#[derive(Serialize, Deserialize, Validate, Debug, Clone)] #[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvResponse { pub struct NvResponse {
/// Flattened Response fields.
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::responses::Response, pub inner: dynamo_async_openai::types::responses::Response,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment