Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
5d62b56f
Unverified
Commit
5d62b56f
authored
Aug 05, 2025
by
Simo Lin
Committed by
GitHub
Aug 05, 2025
Browse files
[router] complete router oai spec (#8828)
parent
3ae8e3ea
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
856 additions
and
365 deletions
+856
-365
sgl-router/benches/request_processing.rs
sgl-router/benches/request_processing.rs
+114
-55
sgl-router/src/openai_api_types.rs
sgl-router/src/openai_api_types.rs
+201
-2
sgl-router/src/routers/pd_types.rs
sgl-router/src/routers/pd_types.rs
+34
-72
sgl-router/src/routers/request_adapter.rs
sgl-router/src/routers/request_adapter.rs
+392
-162
sgl-router/tests/benchmark_integration.rs
sgl-router/tests/benchmark_integration.rs
+115
-74
No files found.
sgl-router/benches/request_processing.rs
View file @
5d62b56f
...
@@ -8,12 +8,116 @@ use sglang_router_rs::openai_api_types::{
...
@@ -8,12 +8,116 @@ use sglang_router_rs::openai_api_types::{
};
};
use
sglang_router_rs
::
routers
::
request_adapter
::{
RouteableRequest
,
ToPdRequest
};
use
sglang_router_rs
::
routers
::
request_adapter
::{
RouteableRequest
,
ToPdRequest
};
/// Create a default GenerateRequest for benchmarks with minimal fields set
fn
default_generate_request
()
->
GenerateRequest
{
GenerateRequest
{
text
:
None
,
prompt
:
None
,
input_ids
:
None
,
stream
:
false
,
parameters
:
None
,
sampling_params
:
None
,
return_logprob
:
false
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_hidden_states
:
false
,
rid
:
None
,
}
}
/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
fn
default_chat_completion_request
()
->
ChatCompletionRequest
{
ChatCompletionRequest
{
model
:
String
::
new
(),
messages
:
vec!
[],
max_tokens
:
None
,
max_completion_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
n
:
None
,
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
None
,
user
:
None
,
response_format
:
None
,
seed
:
None
,
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
None
,
function_call
:
None
,
functions
:
None
,
// SGLang Extensions
top_k
:
None
,
min_p
:
None
,
min_tokens
:
None
,
repetition_penalty
:
None
,
regex
:
None
,
ebnf
:
None
,
stop_token_ids
:
None
,
no_stop_trim
:
false
,
ignore_eos
:
false
,
continue_final_message
:
false
,
skip_special_tokens
:
true
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
separate_reasoning
:
true
,
stream_reasoning
:
true
,
return_hidden_states
:
false
,
}
}
/// Create a default CompletionRequest for benchmarks with minimal fields set
fn
default_completion_request
()
->
CompletionRequest
{
CompletionRequest
{
model
:
String
::
new
(),
prompt
:
StringOrArray
::
String
(
String
::
new
()),
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
n
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
// SGLang Extensions
top_k
:
None
,
min_p
:
None
,
min_tokens
:
None
,
repetition_penalty
:
None
,
regex
:
None
,
ebnf
:
None
,
json_schema
:
None
,
stop_token_ids
:
None
,
no_stop_trim
:
false
,
ignore_eos
:
false
,
skip_special_tokens
:
true
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_hidden_states
:
false
,
other
:
serde_json
::
Map
::
new
(),
}
}
// Sample request data for benchmarks
// Sample request data for benchmarks
fn
create_sample_generate_request
()
->
GenerateRequest
{
fn
create_sample_generate_request
()
->
GenerateRequest
{
GenerateRequest
{
GenerateRequest
{
text
:
Some
(
"Write a story about artificial intelligence"
.to_string
()),
text
:
Some
(
"Write a story about artificial intelligence"
.to_string
()),
input_ids
:
None
,
prompt
:
None
,
parameters
:
Some
(
GenerateParameters
{
parameters
:
Some
(
GenerateParameters
{
max_new_tokens
:
Some
(
100
),
max_new_tokens
:
Some
(
100
),
temperature
:
Some
(
0.8
),
temperature
:
Some
(
0.8
),
...
@@ -31,8 +135,7 @@ fn create_sample_generate_request() -> GenerateRequest {
...
@@ -31,8 +135,7 @@ fn create_sample_generate_request() -> GenerateRequest {
repetition_penalty
:
Some
(
1.0
),
repetition_penalty
:
Some
(
1.0
),
..
Default
::
default
()
..
Default
::
default
()
}),
}),
stream
:
false
,
..
default_generate_request
()
return_logprob
:
false
,
}
}
}
}
...
@@ -58,22 +161,10 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest {
...
@@ -58,22 +161,10 @@ fn create_sample_chat_completion_request() -> ChatCompletionRequest {
temperature
:
Some
(
0.7
),
temperature
:
Some
(
0.7
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
None
,
user
:
None
,
response_format
:
None
,
seed
:
None
,
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
Some
(
true
),
parallel_tool_calls
:
Some
(
true
),
function_call
:
None
,
..
default_chat_completion_request
()
functions
:
None
,
}
}
}
}
...
@@ -81,23 +172,14 @@ fn create_sample_completion_request() -> CompletionRequest {
...
@@ -81,23 +172,14 @@ fn create_sample_completion_request() -> CompletionRequest {
CompletionRequest
{
CompletionRequest
{
model
:
"text-davinci-003"
.to_string
(),
model
:
"text-davinci-003"
.to_string
(),
prompt
:
StringOrArray
::
String
(
"Complete this sentence: The future of AI is"
.to_string
()),
prompt
:
StringOrArray
::
String
(
"Complete this sentence: The future of AI is"
.to_string
()),
suffix
:
None
,
max_tokens
:
Some
(
50
),
max_tokens
:
Some
(
50
),
temperature
:
Some
(
0.8
),
temperature
:
Some
(
0.8
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
best_of
:
Some
(
1
),
best_of
:
Some
(
1
),
logit_bias
:
None
,
..
default_completion_request
()
user
:
None
,
seed
:
None
,
other
:
serde_json
::
Map
::
new
(),
}
}
}
}
...
@@ -121,6 +203,7 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
...
@@ -121,6 +203,7 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
name
:
None
,
name
:
None
,
tool_calls
:
None
,
tool_calls
:
None
,
function_call
:
None
,
function_call
:
None
,
reasoning_content
:
None
,
});
});
}
}
...
@@ -132,22 +215,13 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
...
@@ -132,22 +215,13 @@ fn create_large_chat_completion_request() -> ChatCompletionRequest {
temperature
:
Some
(
0.7
),
temperature
:
Some
(
0.7
),
top_p
:
Some
(
0.95
),
top_p
:
Some
(
0.95
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
Some
(
0.1
),
presence_penalty
:
Some
(
0.1
),
frequency_penalty
:
Some
(
0.1
),
frequency_penalty
:
Some
(
0.1
),
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
Some
(
5
),
top_logprobs
:
Some
(
5
),
user
:
Some
(
"benchmark_user"
.to_string
()),
user
:
Some
(
"benchmark_user"
.to_string
()),
response_format
:
None
,
seed
:
Some
(
42
),
seed
:
Some
(
42
),
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
Some
(
true
),
parallel_tool_calls
:
Some
(
true
),
function_call
:
None
,
..
default_chat_completion_request
()
functions
:
None
,
}
}
}
}
...
@@ -331,32 +405,17 @@ fn bench_throughput_by_size(c: &mut Criterion) {
...
@@ -331,32 +405,17 @@ fn bench_throughput_by_size(c: &mut Criterion) {
// Create requests of different sizes
// Create requests of different sizes
let
small_generate
=
GenerateRequest
{
let
small_generate
=
GenerateRequest
{
text
:
Some
(
"Hi"
.to_string
()),
text
:
Some
(
"Hi"
.to_string
()),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
let
medium_generate
=
GenerateRequest
{
let
medium_generate
=
GenerateRequest
{
text
:
Some
(
"Write a medium length story about AI"
.repeat
(
10
)),
text
:
Some
(
"Write a medium length story about AI"
.repeat
(
10
)),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
let
large_generate
=
GenerateRequest
{
let
large_generate
=
GenerateRequest
{
text
:
Some
(
"Write a very long and detailed story about artificial intelligence and its impact on society"
.repeat
(
100
)),
text
:
Some
(
"Write a very long and detailed story about artificial intelligence and its impact on society"
.repeat
(
100
)),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
for
(
name
,
req
)
in
[
for
(
name
,
req
)
in
[
...
...
sgl-router/src/openai_api_types.rs
View file @
5d62b56f
...
@@ -6,6 +6,21 @@ use serde::{Deserialize, Serialize};
...
@@ -6,6 +6,21 @@ use serde::{Deserialize, Serialize};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
use
std
::
collections
::
HashMap
;
/// Helper function for serde default value
fn
default_true
()
->
bool
{
true
}
// ============= SGLang-Specific Types =============
/// LoRA adapter path - can be single path or batch of paths
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[serde(untagged)]
pub
enum
LoRAPath
{
Single
(
Option
<
String
>
),
Batch
(
Vec
<
Option
<
String
>>
),
}
/// Common trait for all generation requests
/// Common trait for all generation requests
pub
trait
GenerationRequest
:
Send
+
Sync
{
pub
trait
GenerationRequest
:
Send
+
Sync
{
/// Check if the request is for streaming
/// Check if the request is for streaming
...
@@ -92,6 +107,64 @@ pub struct CompletionRequest {
...
@@ -92,6 +107,64 @@ pub struct CompletionRequest {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
seed
:
Option
<
i64
>
,
pub
seed
:
Option
<
i64
>
,
// ============= SGLang Extensions =============
/// Top-k sampling parameter (-1 to disable)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_k
:
Option
<
i32
>
,
/// Min-p nucleus sampling parameter
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_p
:
Option
<
f32
>
,
/// Minimum number of tokens to generate
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_tokens
:
Option
<
u32
>
,
/// Repetition penalty for reducing repetitive text
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
repetition_penalty
:
Option
<
f32
>
,
/// Regex constraint for output generation
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
regex
:
Option
<
String
>
,
/// EBNF grammar constraint for structured output
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
ebnf
:
Option
<
String
>
,
/// JSON schema constraint for structured output
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
json_schema
:
Option
<
String
>
,
/// Specific token IDs to use as stop conditions
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i32
>>
,
/// Skip trimming stop tokens from output
#[serde(default)]
pub
no_stop_trim
:
bool
,
/// Ignore end-of-sequence tokens during generation
#[serde(default)]
pub
ignore_eos
:
bool
,
/// Skip special tokens during detokenization
#[serde(default
=
"default_true"
)]
pub
skip_special_tokens
:
bool
,
// ============= SGLang Extensions =============
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
session_params
:
Option
<
HashMap
<
String
,
serde_json
::
Value
>>
,
/// Return model hidden states
#[serde(default)]
pub
return_hidden_states
:
bool
,
/// Additional fields including bootstrap info for PD routing
/// Additional fields including bootstrap info for PD routing
#[serde(flatten)]
#[serde(flatten)]
pub
other
:
serde_json
::
Map
<
String
,
serde_json
::
Value
>
,
pub
other
:
serde_json
::
Map
<
String
,
serde_json
::
Value
>
,
...
@@ -166,7 +239,7 @@ pub struct ChatCompletionRequest {
...
@@ -166,7 +239,7 @@ pub struct ChatCompletionRequest {
/// Modify the likelihood of specified tokens appearing in the completion
/// Modify the likelihood of specified tokens appearing in the completion
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
logit_bias
:
Option
<
HashMap
<
String
,
i
32
>>
,
pub
logit_bias
:
Option
<
HashMap
<
String
,
f
32
>>
,
/// A unique identifier representing your end-user
/// A unique identifier representing your end-user
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
@@ -207,6 +280,72 @@ pub struct ChatCompletionRequest {
...
@@ -207,6 +280,72 @@ pub struct ChatCompletionRequest {
/// Deprecated: use tool_choice instead
/// Deprecated: use tool_choice instead
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
function_call
:
Option
<
FunctionCall
>
,
pub
function_call
:
Option
<
FunctionCall
>
,
// ============= SGLang Extensions =============
/// Top-k sampling parameter (-1 to disable)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_k
:
Option
<
i32
>
,
/// Min-p nucleus sampling parameter
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_p
:
Option
<
f32
>
,
/// Minimum number of tokens to generate
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_tokens
:
Option
<
u32
>
,
/// Repetition penalty for reducing repetitive text
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
repetition_penalty
:
Option
<
f32
>
,
/// Regex constraint for output generation
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
regex
:
Option
<
String
>
,
/// EBNF grammar constraint for structured output
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
ebnf
:
Option
<
String
>
,
/// Specific token IDs to use as stop conditions
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i32
>>
,
/// Skip trimming stop tokens from output
#[serde(default)]
pub
no_stop_trim
:
bool
,
/// Ignore end-of-sequence tokens during generation
#[serde(default)]
pub
ignore_eos
:
bool
,
/// Continue generating from final assistant message
#[serde(default)]
pub
continue_final_message
:
bool
,
/// Skip special tokens during detokenization
#[serde(default
=
"default_true"
)]
pub
skip_special_tokens
:
bool
,
// ============= SGLang Extensions =============
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
session_params
:
Option
<
HashMap
<
String
,
serde_json
::
Value
>>
,
/// Separate reasoning content from final answer (O1-style models)
#[serde(default
=
"default_true"
)]
pub
separate_reasoning
:
bool
,
/// Stream reasoning tokens during generation
#[serde(default
=
"default_true"
)]
pub
stream_reasoning
:
bool
,
/// Return model hidden states
#[serde(default)]
pub
return_hidden_states
:
bool
,
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
@@ -234,6 +373,9 @@ pub enum ChatMessage {
...
@@ -234,6 +373,9 @@ pub enum ChatMessage {
tool_calls
:
Option
<
Vec
<
ToolCall
>>
,
tool_calls
:
Option
<
Vec
<
ToolCall
>>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
function_call
:
Option
<
FunctionCallResponse
>
,
function_call
:
Option
<
FunctionCallResponse
>
,
/// Reasoning content for O1-style models (SGLang extension)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
reasoning_content
:
Option
<
String
>
,
},
},
Tool
{
Tool
{
role
:
String
,
// "tool"
role
:
String
,
// "tool"
...
@@ -378,7 +520,20 @@ impl GenerationRequest for ChatCompletionRequest {
...
@@ -378,7 +520,20 @@ impl GenerationRequest for ChatCompletionRequest {
Some
(
texts
.join
(
" "
))
Some
(
texts
.join
(
" "
))
}
}
},
},
ChatMessage
::
Assistant
{
content
,
..
}
=>
content
.clone
(),
ChatMessage
::
Assistant
{
content
,
reasoning_content
,
..
}
=>
{
// Combine content and reasoning content for routing decisions
let
main_content
=
content
.clone
()
.unwrap_or_default
();
let
reasoning
=
reasoning_content
.clone
()
.unwrap_or_default
();
if
main_content
.is_empty
()
&&
reasoning
.is_empty
()
{
None
}
else
{
Some
(
format!
(
"{} {}"
,
main_content
,
reasoning
)
.trim
()
.to_string
())
}
}
ChatMessage
::
Tool
{
content
,
..
}
=>
Some
(
content
.clone
()),
ChatMessage
::
Tool
{
content
,
..
}
=>
Some
(
content
.clone
()),
ChatMessage
::
Function
{
content
,
..
}
=>
Some
(
content
.clone
()),
ChatMessage
::
Function
{
content
,
..
}
=>
Some
(
content
.clone
()),
})
})
...
@@ -418,6 +573,23 @@ pub struct GenerateRequest {
...
@@ -418,6 +573,23 @@ pub struct GenerateRequest {
/// Whether to return logprobs
/// Whether to return logprobs
#[serde(default)]
#[serde(default)]
pub
return_logprob
:
bool
,
pub
return_logprob
:
bool
,
// ============= SGLang Extensions =============
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
session_params
:
Option
<
HashMap
<
String
,
serde_json
::
Value
>>
,
/// Return model hidden states
#[serde(default)]
pub
return_hidden_states
:
bool
,
/// Request ID for tracking
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
rid
:
Option
<
String
>
,
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
@@ -485,6 +657,18 @@ pub struct SamplingParams {
...
@@ -485,6 +657,18 @@ pub struct SamplingParams {
pub
skip_special_tokens
:
Option
<
bool
>
,
pub
skip_special_tokens
:
Option
<
bool
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
json_schema
:
Option
<
String
>
,
pub
json_schema
:
Option
<
String
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
regex
:
Option
<
String
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
ebnf
:
Option
<
String
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_p
:
Option
<
f32
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
min_tokens
:
Option
<
u32
>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
stop_token_ids
:
Option
<
Vec
<
i32
>>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
no_stop_trim
:
Option
<
bool
>
,
}
}
impl
GenerationRequest
for
GenerateRequest
{
impl
GenerationRequest
for
GenerateRequest
{
...
@@ -561,6 +745,12 @@ pub struct CompletionChoice {
...
@@ -561,6 +745,12 @@ pub struct CompletionChoice {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
logprobs
:
Option
<
LogProbs
>
,
pub
logprobs
:
Option
<
LogProbs
>
,
pub
finish_reason
:
Option
<
String
>
,
// "stop", "length", "content_filter", etc.
pub
finish_reason
:
Option
<
String
>
,
// "stop", "length", "content_filter", etc.
/// Information about which stop condition was matched
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
matched_stop
:
Option
<
serde_json
::
Value
>
,
// Can be string or integer
/// Hidden states from the model (SGLang extension)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
hidden_states
:
Option
<
Vec
<
f32
>>
,
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
@@ -591,6 +781,12 @@ pub struct ChatChoice {
...
@@ -591,6 +781,12 @@ pub struct ChatChoice {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
logprobs
:
Option
<
ChatLogProbs
>
,
pub
logprobs
:
Option
<
ChatLogProbs
>
,
pub
finish_reason
:
Option
<
String
>
,
// "stop", "length", "tool_calls", "content_filter", "function_call"
pub
finish_reason
:
Option
<
String
>
,
// "stop", "length", "tool_calls", "content_filter", "function_call"
/// Information about which stop condition was matched
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
matched_stop
:
Option
<
serde_json
::
Value
>
,
// Can be string or integer
/// Hidden states from the model (SGLang extension)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
hidden_states
:
Option
<
Vec
<
f32
>>
,
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
@@ -681,6 +877,9 @@ pub struct ChatMessageDelta {
...
@@ -681,6 +877,9 @@ pub struct ChatMessageDelta {
pub
tool_calls
:
Option
<
Vec
<
ToolCallDelta
>>
,
pub
tool_calls
:
Option
<
Vec
<
ToolCallDelta
>>
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
function_call
:
Option
<
FunctionCallDelta
>
,
pub
function_call
:
Option
<
FunctionCallDelta
>
,
/// Reasoning content delta for O1-style models (SGLang extension)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
reasoning_content
:
Option
<
String
>
,
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
...
sgl-router/src/routers/pd_types.rs
View file @
5d62b56f
...
@@ -278,11 +278,11 @@ mod bootstrap_tests {
...
@@ -278,11 +278,11 @@ mod bootstrap_tests {
use
crate
::
core
::
BasicWorker
;
use
crate
::
core
::
BasicWorker
;
use
crate
::
openai_api_types
::
StringOrArray
;
use
crate
::
openai_api_types
::
StringOrArray
;
#[test]
/// Create a default CompletionRequest for testing with minimal fields set
fn
tes
t_completion_
batch_size_with_array_prompt
()
{
fn
defaul
t_completion_
request
()
->
CompletionRequest
{
let
req
=
CompletionRequest
{
CompletionRequest
{
model
:
"test"
.to_string
(),
model
:
String
::
new
(),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()
]
),
prompt
:
StringOrArray
::
String
(
String
::
new
()),
n
:
None
,
n
:
None
,
other
:
serde_json
::
Map
::
new
(),
other
:
serde_json
::
Map
::
new
(),
suffix
:
None
,
suffix
:
None
,
...
@@ -300,6 +300,31 @@ mod bootstrap_tests {
...
@@ -300,6 +300,31 @@ mod bootstrap_tests {
logit_bias
:
None
,
logit_bias
:
None
,
user
:
None
,
user
:
None
,
seed
:
None
,
seed
:
None
,
// SGLang Extensions
top_k
:
None
,
min_p
:
None
,
min_tokens
:
None
,
repetition_penalty
:
None
,
regex
:
None
,
ebnf
:
None
,
json_schema
:
None
,
stop_token_ids
:
None
,
no_stop_trim
:
false
,
ignore_eos
:
false
,
skip_special_tokens
:
true
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_hidden_states
:
false
,
}
}
#[test]
fn
test_completion_batch_size_with_array_prompt
()
{
let
req
=
CompletionRequest
{
model
:
"test"
.to_string
(),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()]),
..
default_completion_request
()
};
};
// Should return batch size for array prompt
// Should return batch size for array prompt
...
@@ -311,23 +336,7 @@ mod bootstrap_tests {
...
@@ -311,23 +336,7 @@ mod bootstrap_tests {
let
req
=
CompletionRequest
{
let
req
=
CompletionRequest
{
model
:
"test"
.to_string
(),
model
:
"test"
.to_string
(),
prompt
:
StringOrArray
::
String
(
"single prompt"
.to_string
()),
prompt
:
StringOrArray
::
String
(
"single prompt"
.to_string
()),
n
:
None
,
..
default_completion_request
()
other
:
serde_json
::
Map
::
new
(),
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
};
};
// Should return None for single prompt
// Should return None for single prompt
...
@@ -340,22 +349,7 @@ mod bootstrap_tests {
...
@@ -340,22 +349,7 @@ mod bootstrap_tests {
model
:
"test"
.to_string
(),
model
:
"test"
.to_string
(),
prompt
:
StringOrArray
::
String
(
"single prompt"
.to_string
()),
prompt
:
StringOrArray
::
String
(
"single prompt"
.to_string
()),
n
:
Some
(
3
),
n
:
Some
(
3
),
other
:
serde_json
::
Map
::
new
(),
..
default_completion_request
()
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
};
};
// Should return None for single string prompt, even with n > 1
// Should return None for single string prompt, even with n > 1
...
@@ -368,23 +362,7 @@ mod bootstrap_tests {
...
@@ -368,23 +362,7 @@ mod bootstrap_tests {
let
mut
req
=
CompletionRequest
{
let
mut
req
=
CompletionRequest
{
model
:
"test"
.to_string
(),
model
:
"test"
.to_string
(),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()]),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()]),
n
:
None
,
..
default_completion_request
()
other
:
serde_json
::
Map
::
new
(),
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
};
};
// Set bootstrap info - should always use single values
// Set bootstrap info - should always use single values
...
@@ -418,23 +396,7 @@ mod bootstrap_tests {
...
@@ -418,23 +396,7 @@ mod bootstrap_tests {
let
mut
req
=
CompletionRequest
{
let
mut
req
=
CompletionRequest
{
model
:
"test"
.to_string
(),
model
:
"test"
.to_string
(),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()]),
prompt
:
StringOrArray
::
Array
(
vec!
[
"prompt1"
.to_string
(),
"prompt2"
.to_string
()]),
n
:
None
,
..
default_completion_request
()
other
:
serde_json
::
Map
::
new
(),
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
};
};
// Set bootstrap info with arrays
// Set bootstrap info with arrays
...
...
sgl-router/src/routers/request_adapter.rs
View file @
5d62b56f
This diff is collapsed.
Click to expand it.
sgl-router/tests/benchmark_integration.rs
View file @
5d62b56f
...
@@ -8,14 +8,118 @@ use sglang_router_rs::openai_api_types::{
...
@@ -8,14 +8,118 @@ use sglang_router_rs::openai_api_types::{
};
};
use
sglang_router_rs
::
routers
::
request_adapter
::{
RouteableRequest
,
ToPdRequest
};
use
sglang_router_rs
::
routers
::
request_adapter
::{
RouteableRequest
,
ToPdRequest
};
/// Create a default GenerateRequest for benchmarks with minimal fields set
fn
default_generate_request
()
->
GenerateRequest
{
GenerateRequest
{
text
:
None
,
prompt
:
None
,
input_ids
:
None
,
stream
:
false
,
parameters
:
None
,
sampling_params
:
None
,
return_logprob
:
false
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_hidden_states
:
false
,
rid
:
None
,
}
}
/// Create a default ChatCompletionRequest for benchmarks with minimal fields set
fn
default_chat_completion_request
()
->
ChatCompletionRequest
{
ChatCompletionRequest
{
model
:
String
::
new
(),
messages
:
vec!
[],
max_tokens
:
None
,
max_completion_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
n
:
None
,
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
None
,
user
:
None
,
response_format
:
None
,
seed
:
None
,
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
None
,
function_call
:
None
,
functions
:
None
,
// SGLang Extensions
top_k
:
None
,
min_p
:
None
,
min_tokens
:
None
,
repetition_penalty
:
None
,
regex
:
None
,
ebnf
:
None
,
stop_token_ids
:
None
,
no_stop_trim
:
false
,
ignore_eos
:
false
,
continue_final_message
:
false
,
skip_special_tokens
:
true
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
separate_reasoning
:
true
,
stream_reasoning
:
true
,
return_hidden_states
:
false
,
}
}
/// Create a default CompletionRequest for benchmarks with minimal fields set
fn
default_completion_request
()
->
CompletionRequest
{
CompletionRequest
{
model
:
String
::
new
(),
prompt
:
StringOrArray
::
String
(
String
::
new
()),
suffix
:
None
,
max_tokens
:
None
,
temperature
:
None
,
top_p
:
None
,
n
:
None
,
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
None
,
frequency_penalty
:
None
,
best_of
:
None
,
logit_bias
:
None
,
user
:
None
,
seed
:
None
,
// SGLang Extensions
top_k
:
None
,
min_p
:
None
,
min_tokens
:
None
,
repetition_penalty
:
None
,
regex
:
None
,
ebnf
:
None
,
json_schema
:
None
,
stop_token_ids
:
None
,
no_stop_trim
:
false
,
ignore_eos
:
false
,
skip_special_tokens
:
true
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_hidden_states
:
false
,
other
:
serde_json
::
Map
::
new
(),
}
}
#[test]
#[test]
fn
test_benchmark_request_creation
()
{
fn
test_benchmark_request_creation
()
{
// Ensure all benchmark request types can be created without panicking
// Ensure all benchmark request types can be created without panicking
let
generate_req
=
GenerateRequest
{
let
generate_req
=
GenerateRequest
{
text
:
Some
(
"Test prompt"
.to_string
()),
text
:
Some
(
"Test prompt"
.to_string
()),
input_ids
:
None
,
prompt
:
None
,
parameters
:
Some
(
GenerateParameters
{
parameters
:
Some
(
GenerateParameters
{
max_new_tokens
:
Some
(
100
),
max_new_tokens
:
Some
(
100
),
temperature
:
Some
(
0.8
),
temperature
:
Some
(
0.8
),
...
@@ -33,8 +137,7 @@ fn test_benchmark_request_creation() {
...
@@ -33,8 +137,7 @@ fn test_benchmark_request_creation() {
repetition_penalty
:
Some
(
1.0
),
repetition_penalty
:
Some
(
1.0
),
..
Default
::
default
()
..
Default
::
default
()
}),
}),
stream
:
false
,
..
default_generate_request
()
return_logprob
:
false
,
};
};
let
chat_req
=
ChatCompletionRequest
{
let
chat_req
=
ChatCompletionRequest
{
...
@@ -49,44 +152,23 @@ fn test_benchmark_request_creation() {
...
@@ -49,44 +152,23 @@ fn test_benchmark_request_creation() {
temperature
:
Some
(
0.7
),
temperature
:
Some
(
0.7
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
None
,
user
:
None
,
response_format
:
None
,
seed
:
None
,
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
Some
(
true
),
parallel_tool_calls
:
Some
(
true
),
function_call
:
None
,
..
default_chat_completion_request
()
functions
:
None
,
};
};
let
completion_req
=
CompletionRequest
{
let
completion_req
=
CompletionRequest
{
model
:
"test-model"
.to_string
(),
model
:
"test-model"
.to_string
(),
prompt
:
StringOrArray
::
String
(
"Test prompt"
.to_string
()),
prompt
:
StringOrArray
::
String
(
"Test prompt"
.to_string
()),
suffix
:
None
,
max_tokens
:
Some
(
50
),
max_tokens
:
Some
(
50
),
temperature
:
Some
(
0.8
),
temperature
:
Some
(
0.8
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
best_of
:
Some
(
1
),
best_of
:
Some
(
1
),
logit_bias
:
None
,
..
default_completion_request
()
user
:
None
,
seed
:
None
,
other
:
serde_json
::
Map
::
new
(),
};
};
// Test serialization works
// Test serialization works
...
@@ -101,12 +183,7 @@ fn test_benchmark_serialization_roundtrip() {
...
@@ -101,12 +183,7 @@ fn test_benchmark_serialization_roundtrip() {
let
generate_req
=
GenerateRequest
{
let
generate_req
=
GenerateRequest
{
text
:
Some
(
"Test prompt"
.to_string
()),
text
:
Some
(
"Test prompt"
.to_string
()),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
// Serialize and deserialize
// Serialize and deserialize
...
@@ -125,12 +202,7 @@ fn test_benchmark_request_adaptation() {
...
@@ -125,12 +202,7 @@ fn test_benchmark_request_adaptation() {
let
generate_req
=
GenerateRequest
{
let
generate_req
=
GenerateRequest
{
text
:
Some
(
"Test prompt"
.to_string
()),
text
:
Some
(
"Test prompt"
.to_string
()),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
let
chat_req
=
ChatCompletionRequest
{
let
chat_req
=
ChatCompletionRequest
{
...
@@ -145,44 +217,23 @@ fn test_benchmark_request_adaptation() {
...
@@ -145,44 +217,23 @@ fn test_benchmark_request_adaptation() {
temperature
:
Some
(
0.7
),
temperature
:
Some
(
0.7
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
logit_bias
:
None
,
logprobs
:
false
,
top_logprobs
:
None
,
user
:
None
,
response_format
:
None
,
seed
:
None
,
tools
:
None
,
tool_choice
:
None
,
parallel_tool_calls
:
Some
(
true
),
parallel_tool_calls
:
Some
(
true
),
function_call
:
None
,
..
default_chat_completion_request
()
functions
:
None
,
};
};
let
completion_req
=
CompletionRequest
{
let
completion_req
=
CompletionRequest
{
model
:
"test-model"
.to_string
(),
model
:
"test-model"
.to_string
(),
prompt
:
StringOrArray
::
String
(
"Test prompt"
.to_string
()),
prompt
:
StringOrArray
::
String
(
"Test prompt"
.to_string
()),
suffix
:
None
,
max_tokens
:
Some
(
50
),
max_tokens
:
Some
(
50
),
temperature
:
Some
(
0.8
),
temperature
:
Some
(
0.8
),
top_p
:
Some
(
1.0
),
top_p
:
Some
(
1.0
),
n
:
Some
(
1
),
n
:
Some
(
1
),
stream
:
false
,
stream_options
:
None
,
logprobs
:
None
,
echo
:
false
,
stop
:
None
,
presence_penalty
:
Some
(
0.0
),
presence_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
frequency_penalty
:
Some
(
0.0
),
best_of
:
Some
(
1
),
best_of
:
Some
(
1
),
logit_bias
:
None
,
..
default_completion_request
()
user
:
None
,
seed
:
None
,
other
:
serde_json
::
Map
::
new
(),
};
};
// Test PD adaptation (should not panic)
// Test PD adaptation (should not panic)
...
@@ -197,12 +248,7 @@ fn test_benchmark_regular_routing() {
...
@@ -197,12 +248,7 @@ fn test_benchmark_regular_routing() {
let
generate_req
=
GenerateRequest
{
let
generate_req
=
GenerateRequest
{
text
:
Some
(
"Test prompt"
.to_string
()),
text
:
Some
(
"Test prompt"
.to_string
()),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
// Test regular routing methods (should not panic)
// Test regular routing methods (should not panic)
...
@@ -217,12 +263,7 @@ fn test_benchmark_performance_baseline() {
...
@@ -217,12 +263,7 @@ fn test_benchmark_performance_baseline() {
let
generate_req
=
GenerateRequest
{
let
generate_req
=
GenerateRequest
{
text
:
Some
(
"Short test prompt"
.to_string
()),
text
:
Some
(
"Short test prompt"
.to_string
()),
input_ids
:
None
,
..
default_generate_request
()
prompt
:
None
,
parameters
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
};
};
// Serialization should be fast (< 1ms for simple requests)
// Serialization should be fast (< 1ms for simple requests)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment