// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. use std::collections::HashMap; use derive_builder::Builder; use serde::{Deserialize, Serialize}; use validator::Validate; mod aggregator; mod delta; pub use aggregator::DeltaAggregator; pub use delta::DeltaGenerator; use super::{ common::{self, SamplingOptionsProvider, StopConditionsProvider}, nvext::{NvExt, NvExtProvider}, CompletionUsage, ContentProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider, }; use dynamo_runtime::protocols::annotated::AnnotationsProvider; #[derive(Serialize, Deserialize, Validate, Debug, Clone)] pub struct CompletionRequest { #[serde(flatten)] pub inner: async_openai::types::CreateCompletionRequest, #[serde(skip_serializing_if = "Option::is_none")] pub nvext: Option, } /// Legacy OpenAI CompletionResponse /// Represents a completion response from the API. /// Note: both the streamed and non-streamed response objects share the same /// shape (unlike the chat endpoint). #[derive(Clone, Debug, Deserialize, Serialize)] pub struct CompletionResponse { /// A unique identifier for the completion. pub id: String, /// The list of completion choices the model generated for the input prompt. pub choices: Vec, /// The Unix timestamp (in seconds) of when the completion was created. pub created: u64, /// The model used for completion. pub model: String, /// The object type, which is always "text_completion" pub object: String, /// Usage statistics for the completion request. pub usage: Option, /// This fingerprint represents the backend configuration that the model runs with. /// Can be used in conjunction with the seed request parameter to understand when backend /// changes have been made that might impact determinism. /// /// NIM Compatibility: /// This field is not supported by the NIM; however it will be added in the future. /// The optional nature of this field will be relaxed when it is supported. #[serde(skip_serializing_if = "Option::is_none")] pub system_fingerprint: Option, // TODO(ryan) // pub nvext: Option, } /// Legacy OpenAI CompletionResponse Choice component #[derive(Clone, Debug, Deserialize, Serialize, Builder)] pub struct CompletionChoice { #[builder(setter(into))] pub text: String, #[builder(default = "0")] pub index: u64, #[builder(default, setter(into, strip_option))] pub finish_reason: Option, #[serde(skip_serializing_if = "Option::is_none")] #[builder(default, setter(strip_option))] pub logprobs: Option, } impl ContentProvider for CompletionChoice { fn content(&self) -> String { self.text.clone() } } impl CompletionChoice { pub fn builder() -> CompletionChoiceBuilder { CompletionChoiceBuilder::default() } } // TODO: validate this is the correct format /// Legacy OpenAI LogprobResult component #[derive(Clone, Debug, Deserialize, Serialize)] pub struct LogprobResult { pub tokens: Vec, pub token_logprobs: Vec, pub top_logprobs: Vec>, pub text_offset: Vec, } pub fn prompt_to_string(prompt: &async_openai::types::Prompt) -> String { match prompt { async_openai::types::Prompt::String(s) => s.clone(), async_openai::types::Prompt::StringArray(arr) => arr.join(" "), // Join strings with spaces async_openai::types::Prompt::IntegerArray(arr) => arr .iter() .map(|&num| num.to_string()) .collect::>() .join(" "), async_openai::types::Prompt::ArrayOfIntegerArray(arr) => arr .iter() .map(|inner| { inner .iter() .map(|&num| num.to_string()) .collect::>() .join(" ") }) .collect::>() .join(" | "), // Separate arrays with a delimiter } } impl NvExtProvider for CompletionRequest { fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } fn raw_prompt(&self) -> Option { if let Some(nvext) = self.nvext.as_ref() { if let Some(use_raw_prompt) = nvext.use_raw_prompt { if use_raw_prompt { return Some(prompt_to_string(&self.inner.prompt)); } } } None } } impl AnnotationsProvider for CompletionRequest { fn annotations(&self) -> Option> { self.nvext .as_ref() .and_then(|nvext| nvext.annotations.clone()) } fn has_annotation(&self, annotation: &str) -> bool { self.nvext .as_ref() .and_then(|nvext| nvext.annotations.as_ref()) .map(|annotations| annotations.contains(&annotation.to_string())) .unwrap_or(false) } } impl OpenAISamplingOptionsProvider for CompletionRequest { fn get_temperature(&self) -> Option { self.inner.temperature } fn get_top_p(&self) -> Option { self.inner.top_p } fn get_frequency_penalty(&self) -> Option { self.inner.frequency_penalty } fn get_presence_penalty(&self) -> Option { self.inner.presence_penalty } fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } } impl OpenAIStopConditionsProvider for CompletionRequest { fn get_max_tokens(&self) -> Option { self.inner.max_tokens } fn get_min_tokens(&self) -> Option { None } fn get_stop(&self) -> Option> { None } fn nvext(&self) -> Option<&NvExt> { self.nvext.as_ref() } } #[derive(Builder)] pub struct ResponseFactory { #[builder(setter(into))] pub model: String, #[builder(default)] pub system_fingerprint: Option, #[builder(default = "format!(\"cmpl-{}\", uuid::Uuid::new_v4())")] pub id: String, #[builder(default = "\"text_completion\".to_string()")] pub object: String, #[builder(default = "chrono::Utc::now().timestamp() as u64")] pub created: u64, } impl ResponseFactory { pub fn builder() -> ResponseFactoryBuilder { ResponseFactoryBuilder::default() } pub fn make_response( &self, choice: CompletionChoice, usage: Option, ) -> CompletionResponse { CompletionResponse { id: self.id.clone(), object: self.object.clone(), created: self.created, model: self.model.clone(), choices: vec![choice], system_fingerprint: self.system_fingerprint.clone(), usage, } } } /// Implements TryFrom for converting an OpenAI's CompletionRequest to an Engine's CompletionRequest impl TryFrom for common::CompletionRequest { type Error = anyhow::Error; fn try_from(request: CompletionRequest) -> Result { // openai_api_rs::v1::completion::CompletionRequest { // NA pub model: String, // pub prompt: String, // ** pub suffix: Option, // pub max_tokens: Option, // pub temperature: Option, // pub top_p: Option, // pub n: Option, // pub stream: Option, // pub logprobs: Option, // pub echo: Option, // pub stop: Option>, // pub presence_penalty: Option, // pub frequency_penalty: Option, // pub best_of: Option, // pub logit_bias: Option>, // pub user: Option, // } // // ** no supported if request.inner.suffix.is_some() { return Err(anyhow::anyhow!("suffix is not supported")); } let stop_conditions = request .extract_stop_conditions() .map_err(|e| anyhow::anyhow!("Failed to extract stop conditions: {}", e))?; let sampling_options = request .extract_sampling_options() .map_err(|e| anyhow::anyhow!("Failed to extract sampling options: {}", e))?; let prompt = common::PromptType::Completion(common::CompletionContext { prompt: prompt_to_string(&request.inner.prompt), system_prompt: None, }); Ok(common::CompletionRequest { prompt, stop_conditions, sampling_options, mdc_sum: None, annotations: None, }) } } impl TryFrom for CompletionChoice { type Error = anyhow::Error; fn try_from(response: common::StreamingCompletionResponse) -> Result { let choice = CompletionChoice { text: response .delta .text .ok_or(anyhow::anyhow!("No text in response"))?, index: response.delta.index.unwrap_or(0) as u64, logprobs: None, finish_reason: match &response.delta.finish_reason { Some(common::FinishReason::EoS) => Some("stop".to_string()), Some(common::FinishReason::Stop) => Some("stop".to_string()), Some(common::FinishReason::Length) => Some("length".to_string()), Some(common::FinishReason::Error(err_msg)) => { return Err(anyhow::anyhow!("finish_reason::error = {}", err_msg)); } Some(common::FinishReason::Cancelled) => Some("cancelled".to_string()), None => None, }, }; Ok(choice) } }