Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0b9915c1
Unverified
Commit
0b9915c1
authored
Oct 14, 2025
by
Simo Lin
Committed by
GitHub
Oct 14, 2025
Browse files
[router] update generate spec to align with sgl io struct (#11591)
parent
27ef1459
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
191 additions
and
54 deletions
+191
-54
sgl-router/benches/request_processing.rs
sgl-router/benches/request_processing.rs
+30
-6
sgl-router/src/grpc_client/sglang_scheduler.rs
sgl-router/src/grpc_client/sglang_scheduler.rs
+5
-5
sgl-router/src/protocols/spec.rs
sgl-router/src/protocols/spec.rs
+124
-22
sgl-router/src/routers/grpc/pipeline.rs
sgl-router/src/routers/grpc/pipeline.rs
+1
-1
sgl-router/src/routers/grpc/streaming.rs
sgl-router/src/routers/grpc/streaming.rs
+1
-1
sgl-router/src/routers/http/pd_router.rs
sgl-router/src/routers/http/pd_router.rs
+2
-15
sgl-router/tests/test_openai_routing.rs
sgl-router/tests/test_openai_routing.rs
+28
-4
No files found.
sgl-router/benches/request_processing.rs
View file @
0b9915c1
...
...
@@ -28,15 +28,38 @@ fn get_bootstrap_info(worker: &BasicWorker) -> (String, Option<u16>) {
fn
default_generate_request
()
->
GenerateRequest
{
GenerateRequest
{
text
:
None
,
prompt
:
None
,
input_ids
:
None
,
stream
:
false
,
input_embeds
:
None
,
image_data
:
None
,
video_data
:
None
,
audio_data
:
None
,
sampling_params
:
None
,
return_logprob
:
false
,
// SGLang Extensions
lora_path
:
None
,
session_params
:
None
,
return_logprob
:
None
,
logprob_start_len
:
None
,
top_logprobs_num
:
None
,
token_ids_logprob
:
None
,
return_text_in_logprobs
:
false
,
stream
:
false
,
log_metrics
:
true
,
return_hidden_states
:
false
,
modalities
:
None
,
session_params
:
None
,
lora_path
:
None
,
lora_id
:
None
,
custom_logit_processor
:
None
,
bootstrap_host
:
None
,
bootstrap_port
:
None
,
bootstrap_room
:
None
,
bootstrap_pair_key
:
None
,
data_parallel_rank
:
None
,
background
:
false
,
conversation_id
:
None
,
priority
:
None
,
extra_key
:
None
,
no_logs
:
false
,
custom_labels
:
None
,
return_bytes
:
false
,
return_entropy
:
false
,
rid
:
None
,
}
}
...
...
@@ -101,6 +124,7 @@ fn create_sample_generate_request() -> GenerateRequest {
GenerateRequest
{
text
:
Some
(
"Write a story about artificial intelligence"
.to_string
()),
sampling_params
:
Some
(
SamplingParams
{
max_new_tokens
:
Some
(
100
),
temperature
:
Some
(
0.8
),
top_p
:
Some
(
0.9
),
top_k
:
Some
(
50
),
...
...
sgl-router/src/grpc_client/sglang_scheduler.rs
View file @
0b9915c1
...
...
@@ -280,13 +280,13 @@ impl SglangSchedulerClient {
input_ids
:
token_ids
,
}),
sampling_params
:
Some
(
sampling_params
),
return_logprob
:
body
.return_logprob
,
logprob_start_len
:
-
1
,
top_logprobs_num
:
0
,
token_ids_logprob
:
vec!
[]
,
return_logprob
:
body
.return_logprob
.unwrap_or
(
false
)
,
logprob_start_len
:
body
.logprob_start_len
.unwrap_or
(
-
1
)
,
top_logprobs_num
:
body
.top_logprobs_num
.unwrap_or
(
0
)
,
token_ids_logprob
:
body
.token_ids_logprob
.clone
()
.unwrap_or_default
()
,
return_hidden_states
:
body
.return_hidden_states
,
stream
:
body
.stream
,
log_metrics
:
true
,
log_metrics
:
body
.log_metrics
,
..
Default
::
default
()
};
...
...
sgl-router/src/protocols/spec.rs
View file @
0b9915c1
...
...
@@ -356,7 +356,7 @@ pub struct ChatCompletionRequest {
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
lora_path
:
Option
<
String
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -905,7 +905,7 @@ pub struct CompletionRequest {
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
lora_path
:
Option
<
String
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -2309,10 +2309,6 @@ fn validate_sampling_params(params: &SamplingParams) -> Result<(), validator::Va
#[derive(Clone,
Debug,
Serialize,
Deserialize,
Validate)]
#[validate(schema(function
=
"validate_generate_request"
))]
pub
struct
GenerateRequest
{
/// The prompt to generate from (OpenAI style)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
prompt
:
Option
<
StringOrArray
>
,
/// Text input - SGLang native format
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
text
:
Option
<
String
>
,
...
...
@@ -2321,31 +2317,144 @@ pub struct GenerateRequest {
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
input_ids
:
Option
<
InputIds
>
,
/// Input embeddings for direct embedding input
/// Can be a 2D array (single request) or 3D array (batch of requests)
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
input_embeds
:
Option
<
Value
>
,
/// Image input data
/// Can be an image instance, file name, URL, or base64 encoded string
/// Supports single images, lists of images, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
image_data
:
Option
<
Value
>
,
/// Video input data
/// Can be a file name, URL, or base64 encoded string
/// Supports single videos, lists of videos, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
video_data
:
Option
<
Value
>
,
/// Audio input data
/// Can be a file name, URL, or base64 encoded string
/// Supports single audio files, lists of audio, or nested lists for batch processing
/// Placeholder for future use
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
audio_data
:
Option
<
Value
>
,
/// Sampling parameters (sglang style)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
sampling_params
:
Option
<
SamplingParams
>
,
/// Whether to return logprobs
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
return_logprob
:
Option
<
bool
>
,
/// If return logprobs, the start location in the prompt for returning logprobs.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
logprob_start_len
:
Option
<
i32
>
,
/// If return logprobs, the number of top logprobs to return at each position.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
top_logprobs_num
:
Option
<
i32
>
,
/// If return logprobs, the token ids to return logprob for.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
token_ids_logprob
:
Option
<
Vec
<
u32
>>
,
/// Whether to detokenize tokens in text in the returned logprobs.
#[serde(default)]
pub
return_text_in_logprobs
:
bool
,
/// Whether to stream the response
#[serde(default)]
pub
stream
:
bool
,
/// Whether to return logprobs
/// Whether to log metrics for this request (e.g. health_generate calls do not log metrics)
#[serde(default
=
"default_true"
)]
pub
log_metrics
:
bool
,
/// Return model hidden states
#[serde(default)]
pub
return_
logprob
:
bool
,
pub
return_
hidden_states
:
bool
,
///
Path to LoRA adapter(s) for model customization
///
The modalities of the image data [image, multi-images, video]
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
LoRAPath
>
,
pub
modalities
:
Option
<
Vec
<
String
>
>
,
/// Session parameters for continual prompting
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
session_params
:
Option
<
HashMap
<
String
,
Value
>>
,
/// Return model hidden states
/// Path to LoRA adapter(s) for model customization
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_path
:
Option
<
String
>
,
/// LoRA adapter ID (if pre-loaded)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
lora_id
:
Option
<
String
>
,
/// Custom logit processor for advanced sampling control. Must be a serialized instance
/// of `CustomLogitProcessor` in python/sglang/srt/sampling/custom_logit_processor.py
/// Use the processor's `to_str()` method to generate the serialized string.
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
custom_logit_processor
:
Option
<
String
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_host
:
Option
<
String
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_port
:
Option
<
i32
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_room
:
Option
<
i32
>
,
/// For disaggregated inference
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
bootstrap_pair_key
:
Option
<
String
>
,
/// Data parallel rank routing
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
data_parallel_rank
:
Option
<
i32
>
,
/// Background response
#[serde(default)]
pub
return_hidden_states
:
bool
,
pub
background
:
bool
,
/// Request ID for tracking
/// Conversation ID for tracking
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
conversation_id
:
Option
<
String
>
,
/// Priority for the request
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
priority
:
Option
<
i32
>
,
/// Extra key for classifying the request (e.g. cache_salt)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
extra_key
:
Option
<
String
>
,
/// Whether to disallow logging for this request (e.g. due to ZDR)
#[serde(default)]
pub
no_logs
:
bool
,
/// Custom metric labels
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
custom_labels
:
Option
<
HashMap
<
String
,
String
>>
,
/// Whether to return bytes for image generation
#[serde(default)]
pub
return_bytes
:
bool
,
/// Whether to return entropy
#[serde(default)]
pub
return_entropy
:
bool
,
/// Request ID for tracking (inherited from BaseReq in Python)
#[serde(skip_serializing_if
=
"Option::is_none"
)]
pub
rid
:
Option
<
String
>
,
}
...
...
@@ -2358,7 +2467,7 @@ impl Normalizable for GenerateRequest {
fn
validate_generate_request
(
req
:
&
GenerateRequest
)
->
Result
<
(),
validator
::
ValidationError
>
{
// Exactly one of text or input_ids must be provided
// Note: input_embeds not yet supported in Rust implementation
let
has_text
=
req
.text
.is_some
()
||
req
.prompt
.is_some
()
;
let
has_text
=
req
.text
.is_some
();
let
has_input_ids
=
req
.input_ids
.is_some
();
let
count
=
[
has_text
,
has_input_ids
]
.iter
()
.filter
(|
&&
x
|
x
)
.count
();
...
...
@@ -2389,18 +2498,11 @@ impl GenerationRequest for GenerateRequest {
}
fn
extract_text_for_routing
(
&
self
)
->
String
{
// Check fields in priority order: text,
prompt,
inputs
// Check fields in priority order: text, input
_id
s
if
let
Some
(
ref
text
)
=
self
.text
{
return
text
.clone
();
}
if
let
Some
(
ref
prompt
)
=
self
.prompt
{
return
match
prompt
{
StringOrArray
::
String
(
s
)
=>
s
.clone
(),
StringOrArray
::
Array
(
v
)
=>
v
.join
(
" "
),
};
}
if
let
Some
(
ref
input_ids
)
=
self
.input_ids
{
return
match
input_ids
{
InputIds
::
Single
(
ids
)
=>
ids
...
...
sgl-router/src/routers/grpc/pipeline.rs
View file @
0b9915c1
...
...
@@ -877,7 +877,7 @@ impl ResponseProcessingStage {
}
// Non-streaming: Delegate to ResponseProcessor
let
request_logprobs
=
ctx
.generate_request
()
.return_logprob
;
let
request_logprobs
=
ctx
.generate_request
()
.return_logprob
.unwrap_or
(
false
)
;
let
generate_request
=
ctx
.generate_request_arc
();
let
stop_decoder
=
ctx
...
...
sgl-router/src/routers/grpc/streaming.rs
View file @
0b9915c1
...
...
@@ -616,7 +616,7 @@ impl StreamingProcessor {
generate_request
:
Arc
<
GenerateRequest
>
,
dispatch
:
context
::
DispatchMetadata
,
)
->
Response
{
let
return_logprob
=
generate_request
.return_logprob
;
let
return_logprob
=
generate_request
.return_logprob
.unwrap_or
(
false
)
;
// Create SSE channel
let
(
tx
,
rx
)
=
mpsc
::
unbounded_channel
::
<
Result
<
Bytes
,
io
::
Error
>>
();
...
...
sgl-router/src/routers/http/pd_router.rs
View file @
0b9915c1
...
...
@@ -150,11 +150,6 @@ impl PDRouter {
}
fn
get_generate_batch_size
(
req
:
&
GenerateRequest
)
->
Option
<
usize
>
{
if
let
Some
(
StringOrArray
::
Array
(
arr
))
=
&
req
.prompt
{
if
!
arr
.is_empty
()
{
return
Some
(
arr
.len
());
}
}
if
let
Some
(
text
)
=
&
req
.text
{
if
text
.contains
(
"["
)
&&
text
.contains
(
"]"
)
{
return
None
;
...
...
@@ -1061,18 +1056,10 @@ impl RouterTrait for PDRouter {
model_id
:
Option
<&
str
>
,
)
->
Response
{
let
is_stream
=
body
.stream
;
let
return_logprob
=
body
.return_logprob
;
let
return_logprob
=
body
.return_logprob
.unwrap_or
(
false
)
;
let
request_text
=
if
self
.policies_need_request_text
()
{
body
.text
.as_deref
()
.or_else
(||
{
body
.prompt
.as_ref
()
.and_then
(|
p
|
match
p
{
StringOrArray
::
String
(
s
)
=>
Some
(
s
.as_str
()),
StringOrArray
::
Array
(
v
)
=>
v
.first
()
.map
(|
s
|
s
.as_str
()),
})
})
.map
(|
s
|
s
.to_string
())
body
.text
.as_deref
()
.map
(|
s
|
s
.to_string
())
}
else
{
None
};
...
...
sgl-router/tests/test_openai_routing.rs
View file @
0b9915c1
...
...
@@ -598,15 +598,39 @@ async fn test_unsupported_endpoints() {
.unwrap
();
let
generate_request
=
GenerateRequest
{
prompt
:
None
,
text
:
Some
(
"Hello world"
.to_string
()),
input_ids
:
None
,
input_embeds
:
None
,
image_data
:
None
,
video_data
:
None
,
audio_data
:
None
,
sampling_params
:
None
,
stream
:
false
,
return_logprob
:
false
,
lora_path
:
None
,
session_params
:
None
,
return_logprob
:
Some
(
false
),
logprob_start_len
:
None
,
top_logprobs_num
:
None
,
token_ids_logprob
:
None
,
return_text_in_logprobs
:
false
,
log_metrics
:
true
,
return_hidden_states
:
false
,
modalities
:
None
,
session_params
:
None
,
lora_path
:
None
,
lora_id
:
None
,
custom_logit_processor
:
None
,
bootstrap_host
:
None
,
bootstrap_port
:
None
,
bootstrap_room
:
None
,
bootstrap_pair_key
:
None
,
data_parallel_rank
:
None
,
background
:
false
,
conversation_id
:
None
,
priority
:
None
,
extra_key
:
None
,
no_logs
:
false
,
custom_labels
:
None
,
return_bytes
:
false
,
return_entropy
:
false
,
rid
:
None
,
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment