// Completions API request types (v1/completions) - DEPRECATED but still supported use crate::protocols::common::{default_true, GenerationRequest, LoRAPath, StringOrArray}; use crate::protocols::openai::common::StreamOptions; use serde::{Deserialize, Serialize}; use std::collections::HashMap; #[derive(Debug, Clone, Deserialize, Serialize)] pub struct CompletionRequest { /// ID of the model to use (required for OpenAI, optional for some implementations, such as SGLang) pub model: String, /// The prompt(s) to generate completions for pub prompt: StringOrArray, /// The suffix that comes after a completion of inserted text #[serde(skip_serializing_if = "Option::is_none")] pub suffix: Option, /// The maximum number of tokens to generate #[serde(skip_serializing_if = "Option::is_none")] pub max_tokens: Option, /// What sampling temperature to use, between 0 and 2 #[serde(skip_serializing_if = "Option::is_none")] pub temperature: Option, /// An alternative to sampling with temperature (nucleus sampling) #[serde(skip_serializing_if = "Option::is_none")] pub top_p: Option, /// How many completions to generate for each prompt #[serde(skip_serializing_if = "Option::is_none")] pub n: Option, /// Whether to stream back partial progress #[serde(default)] pub stream: bool, /// Options for streaming response #[serde(skip_serializing_if = "Option::is_none")] pub stream_options: Option, /// Include the log probabilities on the logprobs most likely tokens #[serde(skip_serializing_if = "Option::is_none")] pub logprobs: Option, /// Echo back the prompt in addition to the completion #[serde(default)] pub echo: bool, /// Up to 4 sequences where the API will stop generating further tokens #[serde(skip_serializing_if = "Option::is_none")] pub stop: Option, /// Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far #[serde(skip_serializing_if = "Option::is_none")] pub presence_penalty: Option, /// Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far #[serde(skip_serializing_if = "Option::is_none")] pub frequency_penalty: Option, /// Generates best_of completions server-side and returns the "best" #[serde(skip_serializing_if = "Option::is_none")] pub best_of: Option, /// Modify the likelihood of specified tokens appearing in the completion #[serde(skip_serializing_if = "Option::is_none")] pub logit_bias: Option>, /// A unique identifier representing your end-user #[serde(skip_serializing_if = "Option::is_none")] pub user: Option, /// If specified, our system will make a best effort to sample deterministically #[serde(skip_serializing_if = "Option::is_none")] pub seed: Option, // ============= SGLang Extensions ============= /// Top-k sampling parameter (-1 to disable) #[serde(skip_serializing_if = "Option::is_none")] pub top_k: Option, /// Min-p nucleus sampling parameter #[serde(skip_serializing_if = "Option::is_none")] pub min_p: Option, /// Minimum number of tokens to generate #[serde(skip_serializing_if = "Option::is_none")] pub min_tokens: Option, /// Repetition penalty for reducing repetitive text #[serde(skip_serializing_if = "Option::is_none")] pub repetition_penalty: Option, /// Regex constraint for output generation #[serde(skip_serializing_if = "Option::is_none")] pub regex: Option, /// EBNF grammar constraint for structured output #[serde(skip_serializing_if = "Option::is_none")] pub ebnf: Option, /// JSON schema constraint for structured output #[serde(skip_serializing_if = "Option::is_none")] pub json_schema: Option, /// Specific token IDs to use as stop conditions #[serde(skip_serializing_if = "Option::is_none")] pub stop_token_ids: Option>, /// Skip trimming stop tokens from output #[serde(default)] pub no_stop_trim: bool, /// Ignore end-of-sequence tokens during generation #[serde(default)] pub ignore_eos: bool, /// Skip special tokens during detokenization #[serde(default = "default_true")] pub skip_special_tokens: bool, // ============= SGLang Extensions ============= /// Path to LoRA adapter(s) for model customization #[serde(skip_serializing_if = "Option::is_none")] pub lora_path: Option, /// Session parameters for continual prompting #[serde(skip_serializing_if = "Option::is_none")] pub session_params: Option>, /// Return model hidden states #[serde(default)] pub return_hidden_states: bool, /// Additional fields including bootstrap info for PD routing #[serde(flatten)] pub other: serde_json::Map, } impl GenerationRequest for CompletionRequest { fn is_stream(&self) -> bool { self.stream } fn get_model(&self) -> Option<&str> { Some(&self.model) } fn extract_text_for_routing(&self) -> String { match &self.prompt { StringOrArray::String(s) => s.clone(), StringOrArray::Array(v) => v.join(" "), } } }