use std::collections::HashMap; use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use super::common::*; // ============================================================================ // Completions API (v1/completions) - DEPRECATED but still supported // ============================================================================ #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionRequest { /// ID of the model to use (required for OpenAI, optional for some implementations, such as SGLang) pub model: String, /// The prompt(s) to generate completions for pub prompt: StringOrArray, /// The suffix that comes after a completion of inserted text #[serde(skip_serializing_if = "Option::is_none")] pub suffix: Option, /// The maximum number of tokens to generate #[serde(skip_serializing_if = "Option::is_none")] pub max_tokens: Option, /// What sampling temperature to use, between 0 and 2 #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, /// An alternative to sampling with temperature (nucleus sampling) #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, /// How many completions to generate for each prompt #[serde(skip_serializing_if = "Option::is_none")] pub n: Option, /// Whether to stream back partial progress #[serde(default)] pub stream: bool, /// Options for streaming response #[serde(skip_serializing_if = "Option::is_none")] pub stream_options: Option, /// Include the log probabilities on the logprobs most likely tokens #[serde(skip_serializing_if = "Option::is_none")] pub logprobs: Option, /// Echo back the prompt in addition to the completion #[serde(default)] pub echo: bool, /// Up to 4 sequences where the API will stop generating further tokens #[serde(skip_serializing_if = "Option::is_none")] pub stop: Option, /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far #[serde(skip_serializing_if = "Option::is_none")] pub presence_penalty: Option, /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far #[serde(skip_serializing_if = "Option::is_none")] pub frequency_penalty: Option, /// Generates best_of completions server-side and returns the "best" #[serde(skip_serializing_if = "Option::is_none")] pub best_of: Option, /// Modify the likelihood of specified tokens appearing in the completion #[serde(skip_serializing_if = "Option::is_none")] pub logit_bias: Option>, /// A unique identifier representing your end-user #[serde(skip_serializing_if = "Option::is_none")] pub user: Option, /// If specified, our system will make a best effort to sample deterministically #[serde(skip_serializing_if = "Option::is_none")] pub seed: Option, // -------- Engine Specific Sampling Parameters -------- /// Top-k sampling parameter (-1 to disable) #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option, /// Min-p nucleus sampling parameter #[serde(skip_serializing_if = "Option::is_none")] pub min_p: Option, /// Minimum number of tokens to generate #[serde(skip_serializing_if = "Option::is_none")] pub min_tokens: Option, /// Repetition penalty for reducing repetitive text #[serde(skip_serializing_if = "Option::is_none")] pub repetition_penalty: Option, /// Regex constraint for output generation #[serde(skip_serializing_if = "Option::is_none")] pub regex: Option, /// EBNF grammar constraint for structured output #[serde(skip_serializing_if = "Option::is_none")] pub ebnf: Option, /// JSON schema constraint for structured output #[serde(skip_serializing_if = "Option::is_none")] pub json_schema: Option, /// Specific token IDs to use as stop conditions #[serde(skip_serializing_if = "Option::is_none")] pub stop_token_ids: Option>, /// Skip trimming stop tokens from output #[serde(default)] pub no_stop_trim: bool, /// Ignore end-of-sequence tokens during generation #[serde(default)] pub ignore_eos: bool, /// Skip special tokens during detokenization #[serde(default = "default_true")] pub skip_special_tokens: bool, /// Path to LoRA adapter(s) for model customization #[serde(skip_serializing_if = "Option::is_none")] pub lora_path: Option, /// Session parameters for continual prompting #[serde(skip_serializing_if = "Option::is_none")] pub session_params: Option>, /// Return model hidden states #[serde(default)] pub return_hidden_states: bool, /// Sampling seed for deterministic outputs #[serde(skip_serializing_if = "Option::is_none")] pub sampling_seed: Option, /// Additional fields including bootstrap info for PD routing #[serde(flatten)] pub other: Map, } impl GenerationRequest for CompletionRequest { fn is_stream(&self) -> bool { self.stream } fn get_model(&self) -> Option<&str> { Some(&self.model) } fn extract_text_for_routing(&self) -> String { match &self.prompt { StringOrArray::String(s) => s.clone(), StringOrArray::Array(v) => v.join(" "), } } } // ============================================================================ // Response Types // ============================================================================ #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionResponse { pub id: String, pub object: String, // "text_completion" pub created: u64, pub model: String, pub choices: Vec, #[serde(skip_serializing_if = "Option::is_none")] pub usage: Option, #[serde(skip_serializing_if = "Option::is_none")] pub system_fingerprint: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionChoice { pub text: String, pub index: u32, #[serde(skip_serializing_if = "Option::is_none")] pub logprobs: Option, pub finish_reason: Option, // "stop", "length", "content_filter", etc. /// Information about which stop condition was matched #[serde(skip_serializing_if = "Option::is_none")] pub matched_stop: Option, // Can be string or integer } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionStreamResponse { pub id: String, pub object: String, // "text_completion" pub created: u64, pub choices: Vec, pub model: String, #[serde(skip_serializing_if = "Option::is_none")] pub system_fingerprint: Option, } #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionStreamChoice { pub text: String, pub index: u32, #[serde(skip_serializing_if = "Option::is_none")] pub logprobs: Option, pub finish_reason: Option, }