Unverified Commit 51ae4030 authored by Shuaiyi Zhang's avatar Shuaiyi Zhang Committed by GitHub
Browse files

[router] forward stream_options in request (#7860)

parent 653b873b
...@@ -59,6 +59,7 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest { ...@@ -59,6 +59,7 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
stop: None, stop: None,
presence_penalty: Some(0.0), presence_penalty: Some(0.0),
frequency_penalty: Some(0.0), frequency_penalty: Some(0.0),
...@@ -86,6 +87,7 @@ fn create_sample_completion_request() -> CompletionRequest { ...@@ -86,6 +87,7 @@ fn create_sample_completion_request() -> CompletionRequest {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
logprobs: None, logprobs: None,
echo: false, echo: false,
stop: None, stop: None,
...@@ -130,6 +132,7 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest { ...@@ -130,6 +132,7 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
top_p: Some(0.95), top_p: Some(0.95),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
stop: None, stop: None,
presence_penalty: Some(0.1), presence_penalty: Some(0.1),
frequency_penalty: Some(0.1), frequency_penalty: Some(0.1),
......
...@@ -52,6 +52,10 @@ pub struct CompletionRequest { ...@@ -52,6 +52,10 @@ pub struct CompletionRequest {
#[serde(default)] #[serde(default)]
pub stream: bool, pub stream: bool,
/// Options for streaming response
#[serde(skip_serializing_if = "Option::is_none")]
pub stream_options: Option<StreamOptions>,
/// Include the log probabilities on the logprobs most likely tokens /// Include the log probabilities on the logprobs most likely tokens
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub logprobs: Option<u32>, pub logprobs: Option<u32>,
...@@ -132,6 +136,10 @@ pub struct ChatCompletionRequest { ...@@ -132,6 +136,10 @@ pub struct ChatCompletionRequest {
#[serde(default)] #[serde(default)]
pub stream: bool, pub stream: bool,
/// Options for streaming response
#[serde(skip_serializing_if = "Option::is_none")]
pub stream_options: Option<StreamOptions>,
/// Up to 4 sequences where the API will stop generating further tokens /// Up to 4 sequences where the API will stop generating further tokens
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub stop: Option<StringOrArray>, pub stop: Option<StringOrArray>,
...@@ -258,6 +266,12 @@ pub struct ImageUrl { ...@@ -258,6 +266,12 @@ pub struct ImageUrl {
pub detail: Option<String>, // "auto", "low", or "high" pub detail: Option<String>, // "auto", "low", or "high"
} }
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct StreamOptions {
#[serde(skip_serializing_if = "Option::is_none")]
pub include_usage: Option<bool>,
}
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(tag = "type")] #[serde(tag = "type")]
pub enum ResponseFormat { pub enum ResponseFormat {
......
...@@ -50,6 +50,7 @@ fn test_benchmark_request_creation() { ...@@ -50,6 +50,7 @@ fn test_benchmark_request_creation() {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
stop: None, stop: None,
presence_penalty: Some(0.0), presence_penalty: Some(0.0),
frequency_penalty: Some(0.0), frequency_penalty: Some(0.0),
...@@ -75,6 +76,7 @@ fn test_benchmark_request_creation() { ...@@ -75,6 +76,7 @@ fn test_benchmark_request_creation() {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
logprobs: None, logprobs: None,
echo: false, echo: false,
stop: None, stop: None,
...@@ -143,6 +145,7 @@ fn test_benchmark_request_adaptation() { ...@@ -143,6 +145,7 @@ fn test_benchmark_request_adaptation() {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
stop: None, stop: None,
presence_penalty: Some(0.0), presence_penalty: Some(0.0),
frequency_penalty: Some(0.0), frequency_penalty: Some(0.0),
...@@ -168,6 +171,7 @@ fn test_benchmark_request_adaptation() { ...@@ -168,6 +171,7 @@ fn test_benchmark_request_adaptation() {
top_p: Some(1.0), top_p: Some(1.0),
n: Some(1), n: Some(1),
stream: false, stream: false,
stream_options: None,
logprobs: None, logprobs: None,
echo: false, echo: false,
stop: None, stop: None,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment