Unverified Commit f63e273c authored by smatta-star's avatar smatta-star Committed by GitHub
Browse files

feat: add auto-generated frontend OpenAPI spec and helper binary (#4802)


Signed-off-by: default avatarSatvik Matta <smatta@nvidia.com>
parent ac8c9023
......@@ -3,6 +3,7 @@
use derive_builder::Builder;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::{Validate, ValidationError};
pub trait NvExtProvider {
......@@ -10,7 +11,7 @@ pub trait NvExtProvider {
}
/// NVIDIA LLM extensions to the OpenAI API
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[validate(schema(function = "validate_nv_ext"))]
pub struct NvExt {
/// Annotations
......
......@@ -3,6 +3,7 @@
use derive_builder::Builder;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use validator::{Validate, ValidationError};
pub use crate::protocols::common::timing::TimingInfo;
......@@ -13,7 +14,7 @@ pub trait NvExtProvider {
}
/// Worker ID information for disaggregated serving
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct WorkerIdInfo {
/// The prefill worker ID that processed this request
#[serde(skip_serializing_if = "Option::is_none")]
......@@ -25,7 +26,7 @@ pub struct WorkerIdInfo {
}
/// NVIDIA LLM response extensions
#[derive(Serialize, Deserialize, Debug, Clone)]
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone)]
pub struct NvExtResponse {
/// Worker ID information (prefill and decode worker IDs)
#[serde(skip_serializing_if = "Option::is_none")]
......@@ -43,7 +44,7 @@ pub struct NvExtResponse {
}
/// NVIDIA LLM extensions to the OpenAI API
#[derive(Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[derive(ToSchema, Serialize, Deserialize, Builder, Validate, Debug, Clone)]
#[validate(schema(function = "validate_nv_ext"))]
pub struct NvExt {
/// If true, sampling will be forced to be greedy.
......
......@@ -11,6 +11,7 @@ use dynamo_async_openai::types::{
};
use dynamo_runtime::protocols::annotated::AnnotationsProvider;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use uuid::Uuid;
use validator::Validate;
......@@ -18,8 +19,9 @@ use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatComplet
use super::nvext::{NvExt, NvExtProvider};
use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvCreateResponse {
/// Flattened CreateResponse fields (model, input, temperature, etc.)
#[serde(flatten)]
pub inner: dynamo_async_openai::types::responses::CreateResponse,
......@@ -27,8 +29,9 @@ pub struct NvCreateResponse {
pub nvext: Option<NvExt>,
}
#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
#[derive(ToSchema, Serialize, Deserialize, Validate, Debug, Clone)]
pub struct NvResponse {
/// Flattened Response fields.
#[serde(flatten)]
pub inner: dynamo_async_openai::types::responses::Response,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment