Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
zhaoyu6
sglang
Commits
0b9915c1
You need to sign in or sign up before continuing.
Unverified
Commit
0b9915c1
authored
Oct 14, 2025
by
Simo Lin
Committed by
GitHub
Oct 14, 2025
Browse files
[router] update generate spec to align with sgl io struct (#11591)
parent
27ef1459
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
191 additions
and
54 deletions
+191
-54
sgl-router/benches/request_processing.rs
sgl-router/benches/request_processing.rs
+30
-6
sgl-router/src/grpc_client/sglang_scheduler.rs
sgl-router/src/grpc_client/sglang_scheduler.rs
+5
-5
sgl-router/src/protocols/spec.rs
sgl-router/src/protocols/spec.rs
+124
-22
sgl-router/src/routers/grpc/pipeline.rs
sgl-router/src/routers/grpc/pipeline.rs
+1
-1
sgl-router/src/routers/grpc/streaming.rs
sgl-router/src/routers/grpc/streaming.rs
+1
-1
sgl-router/src/routers/http/pd_router.rs
sgl-router/src/routers/http/pd_router.rs
+2
-15
sgl-router/tests/test_openai_routing.rs
sgl-router/tests/test_openai_routing.rs
+28
-4
No files found.
sgl-router/benches/request_processing.rs
View file @
0b9915c1
...
...
@@ -28,15 +28,38 @@ fn get_bootstrap_info(worker: &BasicWorker) -> (String, Option<u16>) {
fn
default_generate_request
()
->
GenerateRequest
{
GenerateRequest
{
text
:
None
,
prompt
:
None
,
input_ids
:
None
,
stream
:
false
,
input_embeds
:
None
,
image_data
:
None
,
video_data
:
None
,
audio_data
:
None
,
sampling_params
:
None
,
return_logprob
:
false
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_logprob
:
None
,
logprob_start_len
:
None
,
top_logprobs_num
:
None
,
token_ids_logprob
:
None
,
return_text_in_logprobs
:
false
,
stream
:
false
,
log_metrics
:
true
,
return_hidden_states
:
false
,
modalities
:
None
,
session_params
:
None
,
lora_path
:
None
,
lora_id
:
None
,
custom_logit_processor
:
None
,
bootstrap_host
:
None
,
bootstrap_port
:
None
,
bootstrap_room
:
None
,
bootstrap_pair_key
:
None
,
data_parallel_rank
:
None
,
background
:
false
,
conversation_id
:
None
,
priority
:
None
,
extra_key
:
None
,
no_logs
:
false
,
custom_labels
:
None
,
return_bytes
:
false
,
return_entropy
:
false
,
rid
:
None
,
}
}
...
...
@@ -101,6 +124,7 @@ fn create_sample_generate_request() -> GenerateRequest {
GenerateRequest
{
text
:
Some
(
"Write a story about artificial intelligence"
.to_string
()),
sampling_params
:
Some
(
SamplingParams
{
max_new_tokens
:
Some
(
100
),
temperature
:
Some
(
0.8
),
top_p
:
Some
(
0.9
),
top_k
:
Some
(
50
),
...
...
sgl-router/src/grpc_client/sglang_scheduler.rs
View file @
0b9915c1
...
...
@@ -280,13 +280,13 @@ impl SglangSchedulerClient {
input_ids
:
token_ids
,
}),
sampling_params
:
Some
(
sampling_params
),
return_logprob
:
body
.return_logprob
,
logprob_start_len
:
-
1
,
top_logprobs_num
:
0
,
token_ids_logprob
:
vec!
[]
,
return_logprob
:
body
.return_logprob
.unwrap_or
(
false
)
,
logprob_start_len
:
body
.logprob_start_len
.unwrap_or
(
-
1
)
,
top_logprobs_num
:
body
.top_logprobs_num
.unwrap_or
(
0
)
,
token_ids_logprob
:
body
.token_ids_logprob
.clone
()
.unwrap_or_default
()
,
return_hidden_states
:
body
.return_hidden_states
,
stream
:
body
.stream
,
log_metrics
:
true
,
log_metrics
:
body
.log_metrics
,
..
Default
::
default
()
};
...
...
sgl-router/src/protocols/spec.rs
View file @
0b9915c1
...
...
@@ -356,7 +356,7 @@ pub struct ChatCompletionRequest {
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
lora_path
:
Option
<
String
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -905,7 +905,7 @@ pub struct CompletionRequest {
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
lora_path
:
Option
<
String
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -2309,10 +2309,6 @@ fn validate_sampling_params(params: &SamplingParams) -> Result<(), validator::Va
#[derive(Clone,
Debug,
Serialize,
Deserialize,
Validate)]
#[validate(schema(function
=
"validate_generate_request"
))]
pub
struct
GenerateRequest
{
/// The prompt to generate from (OpenAI style)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
prompt
:
Option
<
StringOrArray
>
,
/// Text input - SGLang native format
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
text
:
Option
<
String
>
,
...
...
@@ -2321,31 +2317,144 @@ pub struct GenerateRequest {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
input_ids
:
Option
<
InputIds
>
,
/// Input embeddings for direct embedding input
/// Can be a 2D array (single request) or 3D array (batch of requests)
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
input_embeds
:
Option
<
Value
>
,
/// Image input data
/// Can be an image instance, file name, URL, or base64 encoded string
/// Supports single images, lists of images, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
image_data
:
Option
<
Value
>
,
/// Video input data
/// Can be a file name, URL, or base64 encoded string
/// Supports single videos, lists of videos, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
video_data
:
Option
<
Value
>
,
/// Audio input data
/// Can be a file name, URL, or base64 encoded string
/// Supports single audio files, lists of audio, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
audio_data
:
Option
<
Value
>
,
/// Sampling parameters (sglang style)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
sampling_params
:
Option
<
SamplingParams
>
,
/// Whether to return logprobs
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
return_logprob
:
Option
<
bool
>
,
/// If return logprobs, the start location in the prompt for returning logprobs.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
logprob_start_len
:
Option
<
i32
>
,
/// If return logprobs, the number of top logprobs to return at each position.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_logprobs_num
:
Option
<
i32
>
,
/// If return logprobs, the token ids to return logprob for.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
token_ids_logprob
:
Option
<
Vec
<
u32
>>
,
/// Whether to detokenize tokens in text in the returned logprobs.
#[serde(default)]
pub
return_text_in_logprobs
:
bool
,
/// Whether to stream the response
#[serde(default)]
pub
stream
:
bool
,
/// Whether to return logprobs
/// Whether to log metrics for this request (e.g. health_generate calls do not log metrics)
#[serde(default
=
"default_true"
)]
pub
log_metrics
:
bool
,
/// Return model hidden states
#[serde(default)]
pub
return_
logprob
:
bool
,
pub
return_
hidden_states
:
bool
,
///
Path to LoRA adapter(s) for model customization
///
The modalities of the image data [image, multi-images, video]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
modalities
:
Option
<
Vec
<
String
>
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
session_params
:
Option
<
HashMap
<
String
,
Value
>>
,
/// Return model hidden states
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
String
>
,
/// LoRA adapter ID (if pre-loaded)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_id
:
Option
<
String
>
,
/// Custom logit processor for advanced sampling control. Must be a serialized instance
/// of `CustomLogitProcessor` in python/sglang/srt/sampling/custom_logit_processor.py
/// Use the processor's `to_str()` method to generate the serialized string.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
custom_logit_processor
:
Option
<
String
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_host
:
Option
<
String
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_port
:
Option
<
i32
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_room
:
Option
<
i32
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_pair_key
:
Option
<
String
>
,
/// Data parallel rank routing
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
data_parallel_rank
:
Option
<
i32
>
,
/// Background response
#[serde(default)]
pub
return_hidden_states
:
bool
,
pub
background
:
bool
,
/// Request ID for tracking
/// Conversation ID for tracking
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
conversation_id
:
Option
<
String
>
,
/// Priority for the request
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
priority
:
Option
<
i32
>
,
/// Extra key for classifying the request (e.g. cache_salt)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
extra_key
:
Option
<
String
>
,
/// Whether to disallow logging for this request (e.g. due to ZDR)
#[serde(default)]
pub
no_logs
:
bool
,
/// Custom metric labels
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
custom_labels
:
Option
<
HashMap
<
String
,
String
>>
,
/// Whether to return bytes for image generation
#[serde(default)]
pub
return_bytes
:
bool
,
/// Whether to return entropy
#[serde(default)]
pub
return_entropy
:
bool
,
/// Request ID for tracking (inherited from BaseReq in Python)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
rid
:
Option
<
String
>
,
}
...
...
@@ -2358,7 +2467,7 @@ impl Normalizable for GenerateRequest {
fn
validate_generate_request
(
req
:
&
GenerateRequest
)
->
Result
<
(),
validator
::
ValidationError
>
{
// Exactly one of text or input_ids must be provided
// Note: input_embeds not yet supported in Rust implementation
let
has_text
=
req
.text
.is_some
()
||
req
.prompt
.is_some
()
;
let
has_text
=
req
.text
.is_some
();
let
has_input_ids
=
req
.input_ids
.is_some
();
let
count
=
[
has_text
,
has_input_ids
]
.iter
()
.filter
(|
&&
x
|
x
)
.count
();
...
...
@@ -2389,18 +2498,11 @@ impl GenerationRequest for GenerateRequest {
}
fn
extract_text_for_routing
(
&
self
)
->
String
{
// Check fields in priority order: text,
prompt,
inputs
// Check fields in priority order: text, input
_id
s
if
let
Some
(
ref
text
)
=
self
.text
{
return
text
.clone
();
}
if
let
Some
(
ref
prompt
)
=
self
.prompt
{
return
match
prompt
{
StringOrArray
::
String
(
s
)
=>
s
.clone
(),
StringOrArray
::
Array
(
v
)
=>
v
.join
(
" "
),
};
}
if
let
Some
(
ref
input_ids
)
=
self
.input_ids
{
return
match
input_ids
{
InputIds
::
Single
(
ids
)
=>
ids
...
...
sgl-router/src/routers/grpc/pipeline.rs
View file @
0b9915c1
...
...
@@ -877,7 +877,7 @@ impl ResponseProcessingStage {
}
// Non-streaming: Delegate to ResponseProcessor
let
request_logprobs
=
ctx
.generate_request
()
.return_logprob
;
let
request_logprobs
=
ctx
.generate_request
()
.return_logprob
.unwrap_or
(
false
)
;
let
generate_request
=
ctx
.generate_request_arc
();
let
stop_decoder
=
ctx
...
...
sgl-router/src/routers/grpc/streaming.rs
View file @
0b9915c1
...
...
@@ -616,7 +616,7 @@ impl StreamingProcessor {
generate_request
:
Arc
<
GenerateRequest
>
,
dispatch
:
context
::
DispatchMetadata
,
)
->
Response
{
let
return_logprob
=
generate_request
.return_logprob
;
let
return_logprob
=
generate_request
.return_logprob
.unwrap_or
(
false
)
;
// Create SSE channel
let
(
tx
,
rx
)
=
mpsc
::
unbounded_channel
::
<
Result
<
Bytes
,
io
::
Error
>>
();
...
...
sgl-router/src/routers/http/pd_router.rs
View file @
0b9915c1
...
...
@@ -150,11 +150,6 @@ impl PDRouter {
}
fn
get_generate_batch_size
(
req
:
&
GenerateRequest
)
->
Option
<
usize
>
{
if
let
Some
(
StringOrArray
::
Array
(
arr
))
=
&
req
.prompt
{
if
!
arr
.is_empty
()
{
return
Some
(
arr
.len
());
}
}
if
let
Some
(
text
)
=
&
req
.text
{
if
text
.contains
(
"["
)
&&
text
.contains
(
"]"
)
{
return
None
;
...
...
@@ -1061,18 +1056,10 @@ impl RouterTrait for PDRouter {
model_id
:
Option
<&
str
>
,
)
->
Response
{
let
is_stream
=
body
.stream
;
let
return_logprob
=
body
.return_logprob
;
let
return_logprob
=
body
.return_logprob
.unwrap_or
(
false
)
;
let
request_text
=
if
self
.policies_need_request_text
()
{
body
.text
.as_deref
()
.or_else
(||
{
body
.prompt
.as_ref
()
.and_then
(|
p
|
match
p
{
StringOrArray
::
String
(
s
)
=>
Some
(
s
.as_str
()),
StringOrArray
::
Array
(
v
)
=>
v
.first
()
.map
(|
s
|
s
.as_str
()),
})
})
.map
(|
s
|
s
.to_string
())
body
.text
.as_deref
()
.map
(|
s
|
s
.to_string
())
}
else
{
None
};
...
...
sgl-router/tests/test_openai_routing.rs
View file @
0b9915c1
...
...
@@ -598,15 +598,39 @@ async fn test_unsupported_endpoints() {
.unwrap
();
let
generate_request
=
GenerateRequest
{
prompt
:
None
,
text
:
Some
(
"Hello world"
.to_string
()),
input_ids
:
None
,
input_embeds
:
None
,
image_data
:
None
,
video_data
:
None
,
audio_data
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
lora_path
:
None
,
session_params
:
None
,
return_logprob
:
Some
(
false
),
logprob_start_len
:
None
,
top_logprobs_num
:
None
,
token_ids_logprob
:
None
,
return_text_in_logprobs
:
false
,
log_metrics
:
true
,
return_hidden_states
:
false
,
modalities
:
None
,
session_params
:
None
,
lora_path
:
None
,
lora_id
:
None
,
custom_logit_processor
:
None
,
bootstrap_host
:
None
,
bootstrap_port
:
None
,
bootstrap_room
:
None
,
bootstrap_pair_key
:
None
,
data_parallel_rank
:
None
,
background
:
false
,
conversation_id
:
None
,
priority
:
None
,
extra_key
:
None
,
no_logs
:
false
,
custom_labels
:
None
,
return_bytes
:
false
,
return_entropy
:
false
,
rid
:
None
,
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment