Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
be059b83
Unverified
Commit
be059b83
authored
Sep 26, 2025
by
Simo Lin
Committed by
GitHub
Sep 26, 2025
Browse files
[router] grpc router regular mode import cleanup (#10963)
parent
5d4fe1ce
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
32 additions
and
26 deletions
+32
-26
sgl-router/src/routers/grpc/router.rs
sgl-router/src/routers/grpc/router.rs
+32
-26
No files found.
sgl-router/src/routers/grpc/router.rs
View file @
be059b83
...
@@ -12,19 +12,25 @@ use axum::{
...
@@ -12,19 +12,25 @@ use axum::{
use
tracing
::{
debug
,
error
,
info
,
warn
};
use
tracing
::{
debug
,
error
,
info
,
warn
};
use
crate
::
config
::
types
::
RetryConfig
;
use
crate
::
config
::
types
::
RetryConfig
;
use
crate
::
core
::{
WorkerRegistry
,
WorkerType
};
use
crate
::
core
::{
ConnectionMode
,
Worker
,
WorkerRegistry
,
WorkerType
};
use
crate
::
grpc_client
::{
proto
,
SglangSchedulerClient
};
use
crate
::
grpc_client
::{
proto
,
SglangSchedulerClient
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
spec
::
ChatMessage
;
use
crate
::
protocols
::
spec
::{
ChatCompletionRequest
,
StringOrArray
};
use
crate
::
protocols
::
spec
::{
ChatCompletionRequest
,
StringOrArray
};
use
crate
::
protocols
::
spec
::{
CompletionRequest
,
EmbeddingRequest
,
GenerateRequest
,
RerankRequest
,
ResponsesGetParams
,
ResponsesRequest
,
Tool
,
ToolChoice
,
};
use
crate
::
reasoning_parser
::
ParserFactory
;
use
crate
::
reasoning_parser
::
ParserFactory
;
use
crate
::
routers
::
RouterTrait
;
use
crate
::
routers
::
RouterTrait
;
use
crate
::
server
::
AppContext
;
use
crate
::
tokenizer
::
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
};
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tokenizer
::
HuggingFaceTokenizer
;
use
crate
::
tool_parser
::
ParserRegistry
;
use
crate
::
tool_parser
::
ParserRegistry
;
use
uuid
::
Uuid
;
use
crate
::
tokenizer
::
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
uuid
::
Uuid
;
// Data structures for processing
// Data structures for processing
#[derive(Debug)]
#[derive(Debug)]
...
@@ -49,7 +55,7 @@ pub struct GrpcRouter {
...
@@ -49,7 +55,7 @@ pub struct GrpcRouter {
impl
GrpcRouter
{
impl
GrpcRouter
{
/// Create a new gRPC router
/// Create a new gRPC router
pub
async
fn
new
(
ctx
:
&
Arc
<
crate
::
server
::
AppContext
>
)
->
Result
<
Self
,
String
>
{
pub
async
fn
new
(
ctx
:
&
Arc
<
AppContext
>
)
->
Result
<
Self
,
String
>
{
// Extract necessary components from context
// Extract necessary components from context
let
tokenizer
=
ctx
let
tokenizer
=
ctx
.tokenizer
.tokenizer
...
@@ -71,7 +77,7 @@ impl GrpcRouter {
...
@@ -71,7 +77,7 @@ impl GrpcRouter {
let
workers
=
worker_registry
.get_workers_filtered
(
let
workers
=
worker_registry
.get_workers_filtered
(
None
,
None
,
Some
(
WorkerType
::
Regular
),
Some
(
WorkerType
::
Regular
),
Some
(
crate
::
core
::
ConnectionMode
::
Grpc
{
port
:
None
}),
Some
(
ConnectionMode
::
Grpc
{
port
:
None
}),
false
,
false
,
);
);
...
@@ -207,17 +213,17 @@ impl GrpcRouter {
...
@@ -207,17 +213,17 @@ impl GrpcRouter {
&
self
,
&
self
,
model_id
:
Option
<&
str
>
,
model_id
:
Option
<&
str
>
,
text
:
Option
<&
str
>
,
text
:
Option
<&
str
>
,
)
->
Option
<
Arc
<
dyn
crate
::
core
::
Worker
>>
{
)
->
Option
<
Arc
<
dyn
Worker
>>
{
// Get workers for the specified model, filtered by connection mode
// Get workers for the specified model, filtered by connection mode
let
workers
=
self
.worker_registry
.get_workers_filtered
(
let
workers
=
self
.worker_registry
.get_workers_filtered
(
model_id
,
model_id
,
Some
(
WorkerType
::
Regular
),
Some
(
WorkerType
::
Regular
),
Some
(
crate
::
core
::
ConnectionMode
::
Grpc
{
port
:
None
}),
Some
(
ConnectionMode
::
Grpc
{
port
:
None
}),
false
,
// get all workers, we'll filter by is_available() next
false
,
// get all workers, we'll filter by is_available() next
);
);
// Filter by availability (health + circuit breaker)
// Filter by availability (health + circuit breaker)
let
available
:
Vec
<
Arc
<
dyn
crate
::
core
::
Worker
>>
=
workers
let
available
:
Vec
<
Arc
<
dyn
Worker
>>
=
workers
.iter
()
.iter
()
.filter
(|
w
|
w
.is_available
())
.filter
(|
w
|
w
.is_available
())
.cloned
()
.cloned
()
...
@@ -244,10 +250,10 @@ impl GrpcRouter {
...
@@ -244,10 +250,10 @@ impl GrpcRouter {
request
:
&
ChatCompletionRequest
,
request
:
&
ChatCompletionRequest
,
)
->
Result
<
ProcessedMessages
,
String
>
{
)
->
Result
<
ProcessedMessages
,
String
>
{
// Use the tokenizer's chat template - we require HuggingFace tokenizer for gRPC
// Use the tokenizer's chat template - we require HuggingFace tokenizer for gRPC
let
formatted_text
=
if
let
Some
(
hf_tokenizer
)
=
let
formatted_text
=
if
let
Some
(
hf_tokenizer
)
=
self
self
.tokenizer
.tokenizer
.as_any
()
.as_any
()
.downcast_ref
::
<
crate
::
tokenizer
::
HuggingFaceTokenizer
>
()
.downcast_ref
::
<
HuggingFaceTokenizer
>
()
{
{
// Get content format and transform messages accordingly
// Get content format and transform messages accordingly
let
content_format
=
hf_tokenizer
.chat_template_content_format
();
let
content_format
=
hf_tokenizer
.chat_template_content_format
();
...
@@ -350,9 +356,9 @@ impl GrpcRouter {
...
@@ -350,9 +356,9 @@ impl GrpcRouter {
/// Process messages based on content format for ANY message type
/// Process messages based on content format for ANY message type
fn
process_content_format
(
fn
process_content_format
(
messages
:
&
[
crate
::
protocols
::
spec
::
ChatMessage
],
messages
:
&
[
ChatMessage
],
content_format
:
crate
::
tokenizer
::
chat_template
::
ChatTemplateContentFormat
,
content_format
:
ChatTemplateContentFormat
,
)
->
Result
<
Vec
<
serde_json
::
Value
>
,
String
>
{
)
->
Result
<
Vec
<
Value
>
,
String
>
{
messages
messages
.iter
()
.iter
()
.map
(|
message
|
{
.map
(|
message
|
{
...
@@ -422,7 +428,7 @@ impl GrpcRouter {
...
@@ -422,7 +428,7 @@ impl GrpcRouter {
/// Process tool call arguments in messages
/// Process tool call arguments in messages
/// Per Transformers docs, tool call arguments in assistant messages should be dicts
/// Per Transformers docs, tool call arguments in assistant messages should be dicts
fn
process_tool_call_arguments
(
messages
:
&
mut
[
serde_json
::
Value
])
->
Result
<
(),
String
>
{
fn
process_tool_call_arguments
(
messages
:
&
mut
[
Value
])
->
Result
<
(),
String
>
{
for
msg
in
messages
{
for
msg
in
messages
{
// Early return if not assistant message
// Early return if not assistant message
let
role
=
msg
.get
(
"role"
)
.and_then
(|
v
|
v
.as_str
());
let
role
=
msg
.get
(
"role"
)
.and_then
(|
v
|
v
.as_str
());
...
@@ -466,8 +472,8 @@ impl GrpcRouter {
...
@@ -466,8 +472,8 @@ impl GrpcRouter {
/// Generate tool constraints for structured generation
/// Generate tool constraints for structured generation
fn
generate_tool_constraints
(
fn
generate_tool_constraints
(
&
self
,
&
self
,
_
tools
:
&
[
crate
::
protocols
::
spec
::
Tool
],
_
tools
:
&
[
Tool
],
_
tool_choice
:
&
Option
<
crate
::
protocols
::
spec
::
ToolChoice
>
,
_
tool_choice
:
&
Option
<
ToolChoice
>
,
model
:
&
str
,
model
:
&
str
,
)
->
Option
<
(
String
,
String
)
>
{
)
->
Option
<
(
String
,
String
)
>
{
let
_
parser
=
self
.tool_parser_registry
.get_parser
(
model
)
?
;
let
_
parser
=
self
.tool_parser_registry
.get_parser
(
model
)
?
;
...
@@ -541,7 +547,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -541,7 +547,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_generate
(
async
fn
route_generate
(
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
body
:
&
crate
::
protocols
::
spec
::
GenerateRequest
,
_
body
:
&
GenerateRequest
,
_
model_id
:
Option
<&
str
>
,
_
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
...
@@ -550,7 +556,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -550,7 +556,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_chat
(
async
fn
route_chat
(
&
self
,
&
self
,
headers
:
Option
<&
HeaderMap
>
,
headers
:
Option
<&
HeaderMap
>
,
body
:
&
crate
::
protocols
::
spec
::
ChatCompletionRequest
,
body
:
&
ChatCompletionRequest
,
model_id
:
Option
<&
str
>
,
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
self
.route_chat_impl
(
headers
,
body
,
model_id
)
.await
self
.route_chat_impl
(
headers
,
body
,
model_id
)
.await
...
@@ -559,7 +565,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -559,7 +565,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_completion
(
async
fn
route_completion
(
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
body
:
&
crate
::
protocols
::
spec
::
CompletionRequest
,
_
body
:
&
CompletionRequest
,
_
model_id
:
Option
<&
str
>
,
_
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
...
@@ -568,7 +574,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -568,7 +574,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_responses
(
async
fn
route_responses
(
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
body
:
&
crate
::
protocols
::
spec
::
ResponsesRequest
,
_
body
:
&
ResponsesRequest
,
_
model_id
:
Option
<&
str
>
,
_
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
...
@@ -578,7 +584,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -578,7 +584,7 @@ impl RouterTrait for GrpcRouter {
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
response_id
:
&
str
,
_
response_id
:
&
str
,
_
params
:
&
crate
::
protocols
::
spec
::
ResponsesGetParams
,
_
params
:
&
ResponsesGetParams
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
}
}
...
@@ -590,7 +596,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -590,7 +596,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_embeddings
(
async
fn
route_embeddings
(
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
body
:
&
crate
::
protocols
::
spec
::
EmbeddingRequest
,
_
body
:
&
EmbeddingRequest
,
_
model_id
:
Option
<&
str
>
,
_
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
...
@@ -599,7 +605,7 @@ impl RouterTrait for GrpcRouter {
...
@@ -599,7 +605,7 @@ impl RouterTrait for GrpcRouter {
async
fn
route_rerank
(
async
fn
route_rerank
(
&
self
,
&
self
,
_
headers
:
Option
<&
HeaderMap
>
,
_
headers
:
Option
<&
HeaderMap
>
,
_
body
:
&
crate
::
protocols
::
spec
::
RerankRequest
,
_
body
:
&
RerankRequest
,
_
model_id
:
Option
<&
str
>
,
_
model_id
:
Option
<&
str
>
,
)
->
Response
{
)
->
Response
{
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
(
StatusCode
::
NOT_IMPLEMENTED
)
.into_response
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment