Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
b93acd70
Unverified
Commit
b93acd70
authored
Sep 15, 2025
by
Chang Su
Committed by
GitHub
Sep 15, 2025
Browse files
[router] minor code clean up in server startup (#10470)
parent
86a32bb5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
79 additions
and
114 deletions
+79
-114
sgl-router/src/server.rs
sgl-router/src/server.rs
+79
-114
No files found.
sgl-router/src/server.rs
View file @
b93acd70
use
crate
::
config
::
RouterConfig
;
use
crate
::
core
::
WorkerRegistry
;
use
crate
::
logging
::{
self
,
LoggingConfig
};
use
crate
::
metrics
::{
self
,
PrometheusConfig
};
use
crate
::
middleware
::
TokenBucket
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
spec
::{
ChatCompletionRequest
,
CompletionRequest
,
EmbeddingRequest
,
GenerateRequest
,
RerankRequest
,
ResponsesRequest
,
V1RerankReqInput
,
use
crate
::{
config
::{
ConnectionMode
,
RouterConfig
},
core
::{
WorkerRegistry
,
WorkerType
},
logging
::{
self
,
LoggingConfig
},
metrics
::{
self
,
PrometheusConfig
},
middleware
::{
self
,
QueuedRequest
,
TokenBucket
},
policies
::
PolicyRegistry
,
protocols
::{
spec
::{
ChatCompletionRequest
,
CompletionRequest
,
EmbeddingRequest
,
GenerateRequest
,
RerankRequest
,
ResponsesRequest
,
V1RerankReqInput
,
},
worker_spec
::{
WorkerApiResponse
,
WorkerConfigRequest
,
WorkerErrorResponse
},
},
reasoning_parser
::
ParserFactory
,
routers
::{
router_manager
::{
RouterId
,
RouterManager
},
RouterFactory
,
RouterTrait
,
},
service_discovery
::{
start_service_discovery
,
ServiceDiscoveryConfig
},
tokenizer
::{
factory
as
tokenizer_factory
,
traits
::
Tokenizer
},
tool_parser
::
ParserRegistry
,
};
use
crate
::
protocols
::
worker_spec
::{
WorkerApiResponse
,
WorkerConfigRequest
,
WorkerErrorResponse
};
use
crate
::
reasoning_parser
::
ParserFactory
;
use
crate
::
routers
::
router_manager
::{
RouterId
,
RouterManager
};
use
crate
::
routers
::{
RouterFactory
,
RouterTrait
};
use
crate
::
service_discovery
::{
start_service_discovery
,
ServiceDiscoveryConfig
};
use
crate
::
tokenizer
::{
factory
as
tokenizer_factory
,
traits
::
Tokenizer
};
use
crate
::
tool_parser
::
ParserRegistry
;
use
axum
::{
extract
::{
Path
,
Query
,
Request
,
State
},
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
routing
::{
delete
,
get
,
post
},
Json
,
Router
,
serve
,
Json
,
Router
,
};
use
reqwest
::
Client
;
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
atomic
::{
AtomicBool
,
Ordering
};
use
std
::
sync
::
Arc
;
use
std
::
time
::
Duration
;
use
tokio
::
net
::
TcpListener
;
use
tokio
::
signal
;
use
tokio
::
spawn
;
use
serde
::
Deserialize
;
use
serde_json
::
json
;
use
std
::{
sync
::
atomic
::{
AtomicBool
,
Ordering
},
sync
::
Arc
,
time
::
Duration
,
};
use
tokio
::{
net
::
TcpListener
,
signal
,
spawn
};
use
tracing
::{
error
,
info
,
warn
,
Level
};
#[derive(Clone)]
...
...
@@ -40,9 +47,9 @@ pub struct AppContext {
pub
tokenizer
:
Option
<
Arc
<
dyn
Tokenizer
>>
,
pub
reasoning_parser_factory
:
Option
<
ParserFactory
>
,
pub
tool_parser_registry
:
Option
<&
'static
ParserRegistry
>
,
pub
worker_registry
:
Arc
<
WorkerRegistry
>
,
// Shared worker registry
pub
policy_registry
:
Arc
<
PolicyRegistry
>
,
// Shared policy registry
pub
router_manager
:
Option
<
Arc
<
RouterManager
>>
,
// Only present when enable_igw=true
pub
worker_registry
:
Arc
<
WorkerRegistry
>
,
pub
policy_registry
:
Arc
<
PolicyRegistry
>
,
pub
router_manager
:
Option
<
Arc
<
RouterManager
>>
,
}
impl
AppContext
{
...
...
@@ -57,7 +64,7 @@ impl AppContext {
// Initialize gRPC-specific components only when in gRPC mode
let
(
tokenizer
,
reasoning_parser_factory
,
tool_parser_registry
)
=
if
router_config
.connection_mode
==
crate
::
config
::
ConnectionMode
::
Grpc
{
if
router_config
.connection_mode
==
ConnectionMode
::
Grpc
{
// Get tokenizer path (required for gRPC mode)
let
tokenizer_path
=
router_config
.tokenizer_path
...
...
@@ -71,7 +78,7 @@ impl AppContext {
// Initialize all gRPC components
let
tokenizer
=
Some
(
tokenizer_factory
::
create_tokenizer
(
&
tokenizer_path
)
.map_err
(|
e
|
format!
(
"Failed to create tokenizer: {}"
,
e
))
?
,
.map_err
(|
e
|
format!
(
"Failed to create tokenizer: {
e
}"
))
?
,
);
let
reasoning_parser_factory
=
Some
(
ParserFactory
::
new
());
let
tool_parser_registry
=
Some
(
ParserRegistry
::
new
());
...
...
@@ -82,14 +89,10 @@ impl AppContext {
(
None
,
None
,
None
)
};
// Initialize shared registries
let
worker_registry
=
Arc
::
new
(
WorkerRegistry
::
new
());
let
policy_registry
=
Arc
::
new
(
PolicyRegistry
::
new
(
router_config
.policy
.clone
(),
// Use default policy from config
));
let
policy_registry
=
Arc
::
new
(
PolicyRegistry
::
new
(
router_config
.policy
.clone
()));
// Initialize RouterManager only when enable_igw is true
let
router_manager
=
None
;
// Will be initialized in startup() based on config
let
router_manager
=
None
;
Ok
(
Self
{
client
,
...
...
@@ -109,7 +112,7 @@ impl AppContext {
pub
struct
AppState
{
pub
router
:
Arc
<
dyn
RouterTrait
>
,
pub
context
:
Arc
<
AppContext
>
,
pub
concurrency_queue_tx
:
Option
<
tokio
::
sync
::
mpsc
::
Sender
<
crate
::
middleware
::
QueuedRequest
>>
,
pub
concurrency_queue_tx
:
Option
<
tokio
::
sync
::
mpsc
::
Sender
<
QueuedRequest
>>
,
}
// Fallback handler for unmatched routes
...
...
@@ -265,23 +268,18 @@ async fn v1_responses_list_input_items(
.await
}
// Worker management endpoints
// ---------- Worker management endpoints (Legacy) ----------
#[derive(Deserialize)]
struct
UrlQuery
{
url
:
String
,
}
async
fn
add_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
Query
(
params
):
Query
<
HashMap
<
String
,
String
>
>
,
Query
(
UrlQuery
{
url
}):
Query
<
UrlQuery
>
,
)
->
Response
{
let
worker_url
=
match
params
.get
(
"url"
)
{
Some
(
url
)
=>
url
.to_string
(),
None
=>
{
return
(
StatusCode
::
BAD_REQUEST
,
"Worker URL required. Provide 'url' query parameter"
,
)
.into_response
();
}
};
match
state
.router
.add_worker
(
&
worker_url
)
.await
{
match
state
.router
.add_worker
(
&
url
)
.await
{
Ok
(
message
)
=>
(
StatusCode
::
OK
,
message
)
.into_response
(),
Err
(
error
)
=>
(
StatusCode
::
BAD_REQUEST
,
error
)
.into_response
(),
}
...
...
@@ -294,17 +292,12 @@ async fn list_workers(State(state): State<Arc<AppState>>) -> Response {
async
fn
remove_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
Query
(
params
):
Query
<
HashMap
<
String
,
String
>
>
,
Query
(
UrlQuery
{
url
}):
Query
<
UrlQuery
>
,
)
->
Response
{
let
worker_url
=
match
params
.get
(
"url"
)
{
Some
(
url
)
=>
url
.to_string
(),
None
=>
return
StatusCode
::
BAD_REQUEST
.into_response
(),
};
state
.router
.remove_worker
(
&
worker_url
);
state
.router
.remove_worker
(
&
url
);
(
StatusCode
::
OK
,
format!
(
"Successfully removed worker: {
}"
,
worker_
url
),
format!
(
"Successfully removed worker: {url
}"
),
)
.into_response
()
}
...
...
@@ -317,7 +310,7 @@ async fn get_loads(State(state): State<Arc<AppState>>, _req: Request) -> Respons
state
.router
.get_worker_loads
()
.await
}
//
New RESTful w
orker management endpoints (
when enable_igw=true)
//
---------- W
orker management endpoints (
RESTful) ----------
/// POST /workers - Add a new worker with full configuration
async
fn
create_worker
(
...
...
@@ -374,7 +367,7 @@ async fn list_workers_rest(State(state): State<Arc<AppState>>) -> Response {
});
// Add bootstrap_port for Prefill workers
if
let
crate
::
core
::
WorkerType
::
Prefill
{
bootstrap_port
}
=
worker
.worker_type
()
{
if
let
WorkerType
::
Prefill
{
bootstrap_port
}
=
worker
.worker_type
()
{
worker_info
[
"bootstrap_port"
]
=
serde_json
::
json!
(
bootstrap_port
);
}
...
...
@@ -384,7 +377,7 @@ async fn list_workers_rest(State(state): State<Arc<AppState>>) -> Response {
"stats"
:
{
"prefill_count"
:
state
.context.worker_registry
.get_prefill_workers
()
.len
(),
"decode_count"
:
state
.context.worker_registry
.get_decode_workers
()
.len
(),
"regular_count"
:
state
.context.worker_registry
.get_by_type
(
&
crate
::
core
::
WorkerType
::
Regular
)
.len
(),
"regular_count"
:
state
.context.worker_registry
.get_by_type
(
&
WorkerType
::
Regular
)
.len
(),
}
});
Json
(
response
)
.into_response
()
...
...
@@ -392,33 +385,29 @@ async fn list_workers_rest(State(state): State<Arc<AppState>>) -> Response {
}
/// GET /workers/{url} - Get specific worker info
async
fn
get_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
axum
::
extract
::
Path
(
url
):
axum
::
extract
::
Path
<
String
>
,
)
->
Response
{
async
fn
get_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
Path
(
url
):
Path
<
String
>
)
->
Response
{
if
let
Some
(
router_manager
)
=
&
state
.context.router_manager
{
if
let
Some
(
worker
)
=
router_manager
.get_worker
(
&
url
)
{
Json
(
worker
)
.into_response
()
}
else
{
let
error
=
WorkerErrorResponse
{
error
:
format!
(
"Worker {} not found"
,
url
),
error
:
format!
(
"Worker {
url
} not found"
),
code
:
"WORKER_NOT_FOUND"
.to_string
(),
};
(
StatusCode
::
NOT_FOUND
,
Json
(
error
))
.into_response
()
}
}
else
{
// In single router mode, check if worker exists
let
workers
=
state
.router
.get_worker_urls
();
if
workers
.contains
(
&
url
)
{
let
worker_info
=
serde_j
son
::
json!
({
J
son
(
json!
({
"url"
:
url
,
"model_id"
:
"unknown"
,
"is_healthy"
:
true
})
;
Json
(
worker_info
)
.into_response
()
})
)
.into_response
()
}
else
{
let
error
=
WorkerErrorResponse
{
error
:
format!
(
"Worker {} not found"
,
url
),
error
:
format!
(
"Worker {
url
} not found"
),
code
:
"WORKER_NOT_FOUND"
.to_string
(),
};
(
StatusCode
::
NOT_FOUND
,
Json
(
error
))
.into_response
()
...
...
@@ -427,10 +416,7 @@ async fn get_worker(
}
/// DELETE /workers/{url} - Remove a worker
async
fn
delete_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
axum
::
extract
::
Path
(
url
):
axum
::
extract
::
Path
<
String
>
,
)
->
Response
{
async
fn
delete_worker
(
State
(
state
):
State
<
Arc
<
AppState
>>
,
Path
(
url
):
Path
<
String
>
)
->
Response
{
if
let
Some
(
router_manager
)
=
&
state
.context.router_manager
{
match
router_manager
.remove_worker_from_registry
(
&
url
)
{
Ok
(
response
)
=>
(
StatusCode
::
OK
,
Json
(
response
))
.into_response
(),
...
...
@@ -441,7 +427,7 @@ async fn delete_worker(
state
.router
.remove_worker
(
&
url
);
let
response
=
WorkerApiResponse
{
success
:
true
,
message
:
format!
(
"Worker {} removed successfully"
,
url
),
message
:
format!
(
"Worker {
url
} removed successfully"
),
worker
:
None
,
};
(
StatusCode
::
OK
,
Json
(
response
))
.into_response
()
...
...
@@ -489,7 +475,7 @@ pub fn build_app(
)
.route_layer
(
axum
::
middleware
::
from_fn_with_state
(
app_state
.clone
(),
crate
::
middleware
::
concurrency_limit_middleware
,
middleware
::
concurrency_limit_middleware
,
));
let
public_routes
=
Router
::
new
()
...
...
@@ -513,7 +499,7 @@ pub fn build_app(
.route
(
"/workers"
,
post
(
create_worker
))
.route
(
"/workers"
,
get
(
list_workers_rest
))
.route
(
"/workers/{url}"
,
get
(
get_worker
))
.route
(
"/workers/{url}"
,
axum
::
routing
::
delete
(
delete_worker
));
.route
(
"/workers/{url}"
,
delete
(
delete_worker
));
// Build app with all routes and middleware
Router
::
new
()
...
...
@@ -525,17 +511,10 @@ pub fn build_app(
.layer
(
tower_http
::
limit
::
RequestBodyLimitLayer
::
new
(
max_payload_size
,
))
// Logging layer - must be added BEFORE request ID layer in the code
// so it executes AFTER request ID layer at runtime (layers execute bottom-up)
// This way the TraceLayer can see the request ID that was added to extensions
.layer
(
crate
::
middleware
::
create_logging_layer
())
// Request ID layer - adds request ID to extensions first
.layer
(
crate
::
middleware
::
RequestIdLayer
::
new
(
request_id_headers
))
// CORS (should be outermost)
.layer
(
middleware
::
create_logging_layer
())
.layer
(
middleware
::
RequestIdLayer
::
new
(
request_id_headers
))
.layer
(
create_cors_layer
(
cors_allowed_origins
))
// Fallback
.fallback
(
sink_handler
)
// State - apply last to get Router<Arc<AppState>>
.with_state
(
app_state
)
}
...
...
@@ -551,7 +530,7 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
.and_then
(|
s
|
match
s
.to_uppercase
()
.parse
::
<
Level
>
()
{
Ok
(
l
)
=>
Some
(
l
),
Err
(
_
)
=>
{
warn!
(
"Invalid log level string: '{}'. Defaulting to INFO."
,
s
);
warn!
(
"Invalid log level string: '{
s
}'. Defaulting to INFO."
);
None
}
})
...
...
@@ -582,11 +561,11 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
let
client
=
Client
::
builder
()
.pool_idle_timeout
(
Some
(
Duration
::
from_secs
(
50
)))
.pool_max_idle_per_host
(
500
)
// Increase to 500 connections per host
.pool_max_idle_per_host
(
500
)
.timeout
(
Duration
::
from_secs
(
config
.request_timeout_secs
))
.connect_timeout
(
Duration
::
from_secs
(
10
))
// Separate connection timeout
.connect_timeout
(
Duration
::
from_secs
(
10
))
.tcp_nodelay
(
true
)
.tcp_keepalive
(
Some
(
Duration
::
from_secs
(
30
)))
// Keep connections alive
.tcp_keepalive
(
Some
(
Duration
::
from_secs
(
30
)))
.build
()
.expect
(
"Failed to create HTTP client"
);
...
...
@@ -612,9 +591,6 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
app_context
.policy_registry
.clone
(),
);
// Create HTTP routers at startup (with empty worker lists)
// Workers will be added to these routers dynamically via RouterManager's worker registry
// 1. HTTP Regular Router
match
RouterFactory
::
create_regular_router
(
&
[],
// Empty worker list - workers added later
...
...
@@ -631,16 +607,16 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
);
}
Err
(
e
)
=>
{
warn!
(
"Failed to create HTTP Regular router: {}"
,
e
);
warn!
(
"Failed to create HTTP Regular router: {
e
}"
);
}
}
// 2. HTTP PD Router
match
RouterFactory
::
create_pd_router
(
&
[],
// Empty prefill URLs
&
[],
// Empty decode URLs
None
,
// Use default prefill policy
None
,
// Use default decode policy
&
[],
&
[],
None
,
None
,
&
config
.router_config.policy
,
&
app_context
,
)
...
...
@@ -655,16 +631,11 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
);
}
Err
(
e
)
=>
{
warn!
(
"Failed to create HTTP PD router: {}"
,
e
);
warn!
(
"Failed to create HTTP PD router: {
e
}"
);
}
}
// TODO: Add gRPC routers once we have dynamic tokenizer loading
// Currently gRPC routers require tokenizer to be initialized first,
// but each model needs its own tokenizer. Once we implement dynamic
// tokenizer loading per model, we can enable gRPC routers here:
// - RouterType::GrpcRegular (RouterId: "grpc-regular")
// - RouterType::GrpcPd (RouterId: "grpc-pd")
info!
(
"RouterManager initialized with {} routers"
,
...
...
@@ -687,7 +658,7 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
);
// Set up concurrency limiter with queue if configured
let
(
limiter
,
processor
)
=
crate
::
middleware
::
ConcurrencyLimiter
::
new
(
let
(
limiter
,
processor
)
=
middleware
::
ConcurrencyLimiter
::
new
(
app_context
.rate_limiter
.clone
(),
config
.router_config.queue_size
,
Duration
::
from_secs
(
config
.router_config.queue_timeout_secs
),
...
...
@@ -724,7 +695,7 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
});
}
Err
(
e
)
=>
{
error!
(
"Failed to start service discovery: {}"
,
e
);
error!
(
"Failed to start service discovery: {
e
}"
);
warn!
(
"Continuing without service discovery"
);
}
}
...
...
@@ -736,7 +707,6 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
app_state
.router
.get_worker_urls
()
);
// Configure request ID headers
let
request_id_headers
=
config
.request_id_headers
.clone
()
.unwrap_or_else
(||
{
vec!
[
"x-request-id"
.to_string
(),
...
...
@@ -754,15 +724,10 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
config
.router_config.cors_allowed_origins
.clone
(),
);
// Create TCP listener - use the configured host
let
addr
=
format!
(
"{}:{}"
,
config
.host
,
config
.port
);
let
listener
=
TcpListener
::
bind
(
&
addr
)
.await
?
;
// Start server with graceful shutdown
info!
(
"Starting server on {}"
,
addr
);
// Serve the application with graceful shutdown
axum
::
serve
(
listener
,
app
)
serve
(
listener
,
app
)
.with_graceful_shutdown
(
shutdown_signal
())
.await
.map_err
(|
e
|
Box
::
new
(
e
)
as
Box
<
dyn
std
::
error
::
Error
>
)
?
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment