Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
21b88460
Unverified
Commit
21b88460
authored
Aug 15, 2025
by
Simo Lin
Committed by
GitHub
Aug 15, 2025
Browse files
[router] allow more health check configuration (#9198)
parent
0c8594e6
Changes
15
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
397 additions
and
165 deletions
+397
-165
sgl-router/py_src/sglang_router/launch_router.py
sgl-router/py_src/sglang_router/launch_router.py
+84
-19
sgl-router/py_src/sglang_router/router.py
sgl-router/py_src/sglang_router/router.py
+34
-19
sgl-router/src/config/types.rs
sgl-router/src/config/types.rs
+55
-21
sgl-router/src/core/mod.rs
sgl-router/src/core/mod.rs
+2
-2
sgl-router/src/core/worker.rs
sgl-router/src/core/worker.rs
+54
-50
sgl-router/src/lib.rs
sgl-router/src/lib.rs
+48
-19
sgl-router/src/main.rs
sgl-router/src/main.rs
+49
-21
sgl-router/src/routers/factory.rs
sgl-router/src/routers/factory.rs
+2
-0
sgl-router/src/routers/pd_router.rs
sgl-router/src/routers/pd_router.rs
+43
-10
sgl-router/src/routers/router.rs
sgl-router/src/routers/router.rs
+18
-4
sgl-router/src/service_discovery.rs
sgl-router/src/service_discovery.rs
+1
-0
sgl-router/tests/api_endpoints_test.rs
sgl-router/tests/api_endpoints_test.rs
+4
-0
sgl-router/tests/request_formats_test.rs
sgl-router/tests/request_formats_test.rs
+1
-0
sgl-router/tests/streaming_tests.rs
sgl-router/tests/streaming_tests.rs
+1
-0
sgl-router/tests/test_pd_routing.rs
sgl-router/tests/test_pd_routing.rs
+1
-0
No files found.
sgl-router/py_src/sglang_router/launch_router.py
View file @
21b88460
...
@@ -42,14 +42,14 @@ class RouterArgs:
...
@@ -42,14 +42,14 @@ class RouterArgs:
policy
:
str
=
"cache_aware"
policy
:
str
=
"cache_aware"
prefill_policy
:
Optional
[
str
]
=
None
# Specific policy for prefill nodes in PD mode
prefill_policy
:
Optional
[
str
]
=
None
# Specific policy for prefill nodes in PD mode
decode_policy
:
Optional
[
str
]
=
None
# Specific policy for decode nodes in PD mode
decode_policy
:
Optional
[
str
]
=
None
# Specific policy for decode nodes in PD mode
worker_startup_timeout_secs
:
int
=
3
00
worker_startup_timeout_secs
:
int
=
6
00
worker_startup_check_interval
:
int
=
1
0
worker_startup_check_interval
:
int
=
3
0
cache_threshold
:
float
=
0.
5
cache_threshold
:
float
=
0.
3
balance_abs_threshold
:
int
=
32
balance_abs_threshold
:
int
=
64
balance_rel_threshold
:
float
=
1.
0001
balance_rel_threshold
:
float
=
1.
5
eviction_interval
:
int
=
6
0
eviction_interval
:
int
=
12
0
max_tree_size
:
int
=
2
**
2
4
max_tree_size
:
int
=
2
**
2
6
max_payload_size
:
int
=
256
*
1024
*
1024
#
256
MB default for large batches
max_payload_size
:
int
=
512
*
1024
*
1024
#
512
MB default for large batches
dp_aware
:
bool
=
False
dp_aware
:
bool
=
False
api_key
:
Optional
[
str
]
=
None
api_key
:
Optional
[
str
]
=
None
log_dir
:
Optional
[
str
]
=
None
log_dir
:
Optional
[
str
]
=
None
...
@@ -69,23 +69,29 @@ class RouterArgs:
...
@@ -69,23 +69,29 @@ class RouterArgs:
# Request ID headers configuration
# Request ID headers configuration
request_id_headers
:
Optional
[
List
[
str
]]
=
None
request_id_headers
:
Optional
[
List
[
str
]]
=
None
# Request timeout in seconds
# Request timeout in seconds
request_timeout_secs
:
int
=
6
00
request_timeout_secs
:
int
=
18
00
# Max concurrent requests for rate limiting
# Max concurrent requests for rate limiting
max_concurrent_requests
:
int
=
6
4
max_concurrent_requests
:
int
=
25
6
# CORS allowed origins
# CORS allowed origins
cors_allowed_origins
:
List
[
str
]
=
dataclasses
.
field
(
default_factory
=
list
)
cors_allowed_origins
:
List
[
str
]
=
dataclasses
.
field
(
default_factory
=
list
)
# Retry configuration
# Retry configuration
retry_max_retries
:
int
=
3
retry_max_retries
:
int
=
5
retry_initial_backoff_ms
:
int
=
10
0
retry_initial_backoff_ms
:
int
=
5
0
retry_max_backoff_ms
:
int
=
1
0_000
retry_max_backoff_ms
:
int
=
3
0_000
retry_backoff_multiplier
:
float
=
2.0
retry_backoff_multiplier
:
float
=
1.5
retry_jitter_factor
:
float
=
0.
1
retry_jitter_factor
:
float
=
0.
2
disable_retries
:
bool
=
False
disable_retries
:
bool
=
False
# Health check configuration
health_failure_threshold
:
int
=
3
health_success_threshold
:
int
=
2
health_check_timeout_secs
:
int
=
5
health_check_interval_secs
:
int
=
60
health_check_endpoint
:
str
=
"/health"
# Circuit breaker configuration
# Circuit breaker configuration
cb_failure_threshold
:
int
=
5
cb_failure_threshold
:
int
=
10
cb_success_threshold
:
int
=
2
cb_success_threshold
:
int
=
3
cb_timeout_duration_secs
:
int
=
3
0
cb_timeout_duration_secs
:
int
=
6
0
cb_window_duration_secs
:
int
=
6
0
cb_window_duration_secs
:
int
=
12
0
disable_circuit_breaker
:
bool
=
False
disable_circuit_breaker
:
bool
=
False
@
staticmethod
@
staticmethod
...
@@ -359,6 +365,37 @@ class RouterArgs:
...
@@ -359,6 +365,37 @@ class RouterArgs:
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Disable circuit breaker (equivalent to setting cb_failure_threshold to u32::MAX)"
,
help
=
"Disable circuit breaker (equivalent to setting cb_failure_threshold to u32::MAX)"
,
)
)
# Health check configuration
parser
.
add_argument
(
f
"--
{
prefix
}
health-failure-threshold"
,
type
=
int
,
default
=
RouterArgs
.
health_failure_threshold
,
help
=
"Number of consecutive health check failures before marking worker unhealthy"
,
)
parser
.
add_argument
(
f
"--
{
prefix
}
health-success-threshold"
,
type
=
int
,
default
=
RouterArgs
.
health_success_threshold
,
help
=
"Number of consecutive health check successes before marking worker healthy"
,
)
parser
.
add_argument
(
f
"--
{
prefix
}
health-check-timeout-secs"
,
type
=
int
,
default
=
RouterArgs
.
health_check_timeout_secs
,
help
=
"Timeout in seconds for health check requests"
,
)
parser
.
add_argument
(
f
"--
{
prefix
}
health-check-interval-secs"
,
type
=
int
,
default
=
RouterArgs
.
health_check_interval_secs
,
help
=
"Interval in seconds between runtime health checks"
,
)
parser
.
add_argument
(
f
"--
{
prefix
}
health-check-endpoint"
,
type
=
str
,
default
=
RouterArgs
.
health_check_endpoint
,
help
=
"Health check endpoint path"
,
)
parser
.
add_argument
(
parser
.
add_argument
(
f
"--
{
prefix
}
max-concurrent-requests"
,
f
"--
{
prefix
}
max-concurrent-requests"
,
type
=
int
,
type
=
int
,
...
@@ -455,6 +492,29 @@ class RouterArgs:
...
@@ -455,6 +492,29 @@ class RouterArgs:
disable_circuit_breaker
=
getattr
(
disable_circuit_breaker
=
getattr
(
args
,
f
"
{
prefix
}
disable_circuit_breaker"
,
False
args
,
f
"
{
prefix
}
disable_circuit_breaker"
,
False
),
),
health_failure_threshold
=
getattr
(
args
,
f
"
{
prefix
}
health_failure_threshold"
,
RouterArgs
.
health_failure_threshold
,
),
health_success_threshold
=
getattr
(
args
,
f
"
{
prefix
}
health_success_threshold"
,
RouterArgs
.
health_success_threshold
,
),
health_check_timeout_secs
=
getattr
(
args
,
f
"
{
prefix
}
health_check_timeout_secs"
,
RouterArgs
.
health_check_timeout_secs
,
),
health_check_interval_secs
=
getattr
(
args
,
f
"
{
prefix
}
health_check_interval_secs"
,
RouterArgs
.
health_check_interval_secs
,
),
health_check_endpoint
=
getattr
(
args
,
f
"
{
prefix
}
health_check_endpoint"
,
RouterArgs
.
health_check_endpoint
),
)
)
@
staticmethod
@
staticmethod
...
@@ -652,6 +712,11 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
...
@@ -652,6 +712,11 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
cb_window_duration_secs
=
router_args
.
cb_window_duration_secs
,
cb_window_duration_secs
=
router_args
.
cb_window_duration_secs
,
disable_retries
=
router_args
.
disable_retries
,
disable_retries
=
router_args
.
disable_retries
,
disable_circuit_breaker
=
router_args
.
disable_circuit_breaker
,
disable_circuit_breaker
=
router_args
.
disable_circuit_breaker
,
health_failure_threshold
=
router_args
.
health_failure_threshold
,
health_success_threshold
=
router_args
.
health_success_threshold
,
health_check_timeout_secs
=
router_args
.
health_check_timeout_secs
,
health_check_interval_secs
=
router_args
.
health_check_interval_secs
,
health_check_endpoint
=
router_args
.
health_check_endpoint
,
)
)
router
.
start
()
router
.
start
()
...
...
sgl-router/py_src/sglang_router/router.py
View file @
21b88460
...
@@ -66,6 +66,11 @@ class Router:
...
@@ -66,6 +66,11 @@ class Router:
request_timeout_secs: Request timeout in seconds. Default: 600
request_timeout_secs: Request timeout in seconds. Default: 600
max_concurrent_requests: Maximum number of concurrent requests allowed for rate limiting. Default: 64
max_concurrent_requests: Maximum number of concurrent requests allowed for rate limiting. Default: 64
cors_allowed_origins: List of allowed origins for CORS. Empty list allows all origins. Default: []
cors_allowed_origins: List of allowed origins for CORS. Empty list allows all origins. Default: []
health_failure_threshold: Number of consecutive health check failures before marking worker unhealthy. Default: 3
health_success_threshold: Number of consecutive health check successes before marking worker healthy. Default: 2
health_check_timeout_secs: Timeout in seconds for health check requests. Default: 5
health_check_interval_secs: Interval in seconds between runtime health checks. Default: 60
health_check_endpoint: Health check endpoint path. Default: '/health'
"""
"""
def
__init__
(
def
__init__
(
...
@@ -74,14 +79,14 @@ class Router:
...
@@ -74,14 +79,14 @@ class Router:
policy
:
PolicyType
=
PolicyType
.
RoundRobin
,
policy
:
PolicyType
=
PolicyType
.
RoundRobin
,
host
:
str
=
"127.0.0.1"
,
host
:
str
=
"127.0.0.1"
,
port
:
int
=
3001
,
port
:
int
=
3001
,
worker_startup_timeout_secs
:
int
=
3
00
,
worker_startup_timeout_secs
:
int
=
6
00
,
worker_startup_check_interval
:
int
=
1
0
,
worker_startup_check_interval
:
int
=
3
0
,
cache_threshold
:
float
=
0.
50
,
cache_threshold
:
float
=
0.
3
,
balance_abs_threshold
:
int
=
32
,
balance_abs_threshold
:
int
=
64
,
balance_rel_threshold
:
float
=
1.
0001
,
balance_rel_threshold
:
float
=
1.
5
,
eviction_interval_secs
:
int
=
6
0
,
eviction_interval_secs
:
int
=
12
0
,
max_tree_size
:
int
=
2
**
2
4
,
max_tree_size
:
int
=
2
**
2
6
,
max_payload_size
:
int
=
256
*
1024
*
1024
,
#
256
MB
max_payload_size
:
int
=
512
*
1024
*
1024
,
#
512
MB
dp_aware
:
bool
=
False
,
dp_aware
:
bool
=
False
,
api_key
:
Optional
[
str
]
=
None
,
api_key
:
Optional
[
str
]
=
None
,
log_dir
:
Optional
[
str
]
=
None
,
log_dir
:
Optional
[
str
]
=
None
,
...
@@ -95,26 +100,31 @@ class Router:
...
@@ -95,26 +100,31 @@ class Router:
bootstrap_port_annotation
:
str
=
"sglang.ai/bootstrap-port"
,
bootstrap_port_annotation
:
str
=
"sglang.ai/bootstrap-port"
,
prometheus_port
:
Optional
[
int
]
=
None
,
prometheus_port
:
Optional
[
int
]
=
None
,
prometheus_host
:
Optional
[
str
]
=
None
,
prometheus_host
:
Optional
[
str
]
=
None
,
request_timeout_secs
:
int
=
6
00
,
request_timeout_secs
:
int
=
18
00
,
request_id_headers
:
Optional
[
List
[
str
]]
=
None
,
request_id_headers
:
Optional
[
List
[
str
]]
=
None
,
pd_disaggregation
:
bool
=
False
,
pd_disaggregation
:
bool
=
False
,
prefill_urls
:
Optional
[
List
[
tuple
]]
=
None
,
prefill_urls
:
Optional
[
List
[
tuple
]]
=
None
,
decode_urls
:
Optional
[
List
[
str
]]
=
None
,
decode_urls
:
Optional
[
List
[
str
]]
=
None
,
prefill_policy
:
Optional
[
PolicyType
]
=
None
,
prefill_policy
:
Optional
[
PolicyType
]
=
None
,
decode_policy
:
Optional
[
PolicyType
]
=
None
,
decode_policy
:
Optional
[
PolicyType
]
=
None
,
max_concurrent_requests
:
int
=
6
4
,
max_concurrent_requests
:
int
=
25
6
,
cors_allowed_origins
:
List
[
str
]
=
None
,
cors_allowed_origins
:
List
[
str
]
=
None
,
retry_max_retries
:
int
=
3
,
retry_max_retries
:
int
=
5
,
retry_initial_backoff_ms
:
int
=
10
0
,
retry_initial_backoff_ms
:
int
=
5
0
,
retry_max_backoff_ms
:
int
=
1
0_000
,
retry_max_backoff_ms
:
int
=
3
0_000
,
retry_backoff_multiplier
:
float
=
2.0
,
retry_backoff_multiplier
:
float
=
1.5
,
retry_jitter_factor
:
float
=
0.
1
,
retry_jitter_factor
:
float
=
0.
2
,
cb_failure_threshold
:
int
=
5
,
cb_failure_threshold
:
int
=
10
,
cb_success_threshold
:
int
=
2
,
cb_success_threshold
:
int
=
3
,
cb_timeout_duration_secs
:
int
=
3
0
,
cb_timeout_duration_secs
:
int
=
6
0
,
cb_window_duration_secs
:
int
=
6
0
,
cb_window_duration_secs
:
int
=
12
0
,
disable_retries
:
bool
=
False
,
disable_retries
:
bool
=
False
,
disable_circuit_breaker
:
bool
=
False
,
disable_circuit_breaker
:
bool
=
False
,
health_failure_threshold
:
int
=
3
,
health_success_threshold
:
int
=
2
,
health_check_timeout_secs
:
int
=
5
,
health_check_interval_secs
:
int
=
60
,
health_check_endpoint
:
str
=
"/health"
,
):
):
if
selector
is
None
:
if
selector
is
None
:
selector
=
{}
selector
=
{}
...
@@ -171,6 +181,11 @@ class Router:
...
@@ -171,6 +181,11 @@ class Router:
cb_window_duration_secs
=
cb_window_duration_secs
,
cb_window_duration_secs
=
cb_window_duration_secs
,
disable_retries
=
disable_retries
,
disable_retries
=
disable_retries
,
disable_circuit_breaker
=
disable_circuit_breaker
,
disable_circuit_breaker
=
disable_circuit_breaker
,
health_failure_threshold
=
health_failure_threshold
,
health_success_threshold
=
health_success_threshold
,
health_check_timeout_secs
=
health_check_timeout_secs
,
health_check_interval_secs
=
health_check_interval_secs
,
health_check_endpoint
=
health_check_endpoint
,
)
)
def
start
(
self
)
->
None
:
def
start
(
self
)
->
None
:
...
...
sgl-router/src/config/types.rs
View file @
21b88460
...
@@ -49,6 +49,8 @@ pub struct RouterConfig {
...
@@ -49,6 +49,8 @@ pub struct RouterConfig {
/// Disable circuit breaker (overrides circuit_breaker.failure_threshold to u32::MAX when true)
/// Disable circuit breaker (overrides circuit_breaker.failure_threshold to u32::MAX when true)
#[serde(default)]
#[serde(default)]
pub
disable_circuit_breaker
:
bool
,
pub
disable_circuit_breaker
:
bool
,
/// Health check configuration
pub
health_check
:
HealthCheckConfig
,
}
}
/// Routing mode configuration
/// Routing mode configuration
...
@@ -183,7 +185,7 @@ impl Default for DiscoveryConfig {
...
@@ -183,7 +185,7 @@ impl Default for DiscoveryConfig {
enabled
:
false
,
enabled
:
false
,
namespace
:
None
,
namespace
:
None
,
port
:
8000
,
port
:
8000
,
check_interval_secs
:
6
0
,
check_interval_secs
:
12
0
,
selector
:
HashMap
::
new
(),
selector
:
HashMap
::
new
(),
prefill_selector
:
HashMap
::
new
(),
prefill_selector
:
HashMap
::
new
(),
decode_selector
:
HashMap
::
new
(),
decode_selector
:
HashMap
::
new
(),
...
@@ -212,17 +214,44 @@ pub struct RetryConfig {
...
@@ -212,17 +214,44 @@ pub struct RetryConfig {
impl
Default
for
RetryConfig
{
impl
Default
for
RetryConfig
{
fn
default
()
->
Self
{
fn
default
()
->
Self
{
Self
{
Self
{
max_retries
:
3
,
max_retries
:
5
,
initial_backoff_ms
:
10
0
,
initial_backoff_ms
:
5
0
,
max_backoff_ms
:
1
0000
,
max_backoff_ms
:
3
0000
,
backoff_multiplier
:
2.0
,
backoff_multiplier
:
1.5
,
jitter_factor
:
0.
1
,
jitter_factor
:
0.
2
,
}
}
}
}
}
}
fn
default_retry_jitter_factor
()
->
f32
{
fn
default_retry_jitter_factor
()
->
f32
{
0.1
0.2
}
/// Health check configuration for worker monitoring
#[derive(Debug,
Clone,
Serialize,
Deserialize)]
pub
struct
HealthCheckConfig
{
/// Number of consecutive failures before marking unhealthy
pub
failure_threshold
:
u32
,
/// Number of consecutive successes before marking healthy
pub
success_threshold
:
u32
,
/// Timeout for health check requests in seconds
pub
timeout_secs
:
u64
,
/// Interval between health checks in seconds
pub
check_interval_secs
:
u64
,
/// Health check endpoint path
pub
endpoint
:
String
,
}
impl
Default
for
HealthCheckConfig
{
fn
default
()
->
Self
{
Self
{
failure_threshold
:
3
,
success_threshold
:
2
,
timeout_secs
:
5
,
check_interval_secs
:
60
,
endpoint
:
"/health"
.to_string
(),
}
}
}
}
/// Circuit breaker configuration for worker reliability
/// Circuit breaker configuration for worker reliability
...
@@ -241,10 +270,10 @@ pub struct CircuitBreakerConfig {
...
@@ -241,10 +270,10 @@ pub struct CircuitBreakerConfig {
impl
Default
for
CircuitBreakerConfig
{
impl
Default
for
CircuitBreakerConfig
{
fn
default
()
->
Self
{
fn
default
()
->
Self
{
Self
{
Self
{
failure_threshold
:
5
,
failure_threshold
:
10
,
success_threshold
:
2
,
success_threshold
:
3
,
timeout_duration_secs
:
3
0
,
timeout_duration_secs
:
6
0
,
window_duration_secs
:
6
0
,
window_duration_secs
:
12
0
,
}
}
}
}
}
}
...
@@ -276,10 +305,10 @@ impl Default for RouterConfig {
...
@@ -276,10 +305,10 @@ impl Default for RouterConfig {
policy
:
PolicyConfig
::
Random
,
policy
:
PolicyConfig
::
Random
,
host
:
"127.0.0.1"
.to_string
(),
host
:
"127.0.0.1"
.to_string
(),
port
:
3001
,
port
:
3001
,
max_payload_size
:
268_435_456
,
//
256
MB
max_payload_size
:
536_870_912
,
//
512
MB
request_timeout_secs
:
36
00
,
//
1 hour to match Python mini LB
request_timeout_secs
:
18
00
,
//
30 minutes
worker_startup_timeout_secs
:
3
00
,
worker_startup_timeout_secs
:
6
00
,
worker_startup_check_interval_secs
:
1
0
,
worker_startup_check_interval_secs
:
3
0
,
dp_aware
:
false
,
dp_aware
:
false
,
api_key
:
None
,
api_key
:
None
,
discovery
:
None
,
discovery
:
None
,
...
@@ -287,12 +316,13 @@ impl Default for RouterConfig {
...
@@ -287,12 +316,13 @@ impl Default for RouterConfig {
log_dir
:
None
,
log_dir
:
None
,
log_level
:
None
,
log_level
:
None
,
request_id_headers
:
None
,
request_id_headers
:
None
,
max_concurrent_requests
:
6
4
,
max_concurrent_requests
:
25
6
,
cors_allowed_origins
:
vec!
[],
cors_allowed_origins
:
vec!
[],
retry
:
RetryConfig
::
default
(),
retry
:
RetryConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
}
}
}
}
}
}
...
@@ -365,10 +395,10 @@ mod tests {
...
@@ -365,10 +395,10 @@ mod tests {
assert
!
(
matches!
(
config
.policy
,
PolicyConfig
::
Random
));
assert
!
(
matches!
(
config
.policy
,
PolicyConfig
::
Random
));
assert_eq!
(
config
.host
,
"127.0.0.1"
);
assert_eq!
(
config
.host
,
"127.0.0.1"
);
assert_eq!
(
config
.port
,
3001
);
assert_eq!
(
config
.port
,
3001
);
assert_eq!
(
config
.max_payload_size
,
268_435_456
);
assert_eq!
(
config
.max_payload_size
,
536_870_912
);
assert_eq!
(
config
.request_timeout_secs
,
36
00
);
assert_eq!
(
config
.request_timeout_secs
,
18
00
);
assert_eq!
(
config
.worker_startup_timeout_secs
,
3
00
);
assert_eq!
(
config
.worker_startup_timeout_secs
,
6
00
);
assert_eq!
(
config
.worker_startup_check_interval_secs
,
1
0
);
assert_eq!
(
config
.worker_startup_check_interval_secs
,
3
0
);
assert
!
(
config
.discovery
.is_none
());
assert
!
(
config
.discovery
.is_none
());
assert
!
(
config
.metrics
.is_none
());
assert
!
(
config
.metrics
.is_none
());
assert
!
(
config
.log_dir
.is_none
());
assert
!
(
config
.log_dir
.is_none
());
...
@@ -425,6 +455,7 @@ mod tests {
...
@@ -425,6 +455,7 @@ mod tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
};
};
let
json
=
serde_json
::
to_string
(
&
config
)
.unwrap
();
let
json
=
serde_json
::
to_string
(
&
config
)
.unwrap
();
...
@@ -614,7 +645,7 @@ mod tests {
...
@@ -614,7 +645,7 @@ mod tests {
assert
!
(
!
config
.enabled
);
assert
!
(
!
config
.enabled
);
assert
!
(
config
.namespace
.is_none
());
assert
!
(
config
.namespace
.is_none
());
assert_eq!
(
config
.port
,
8000
);
assert_eq!
(
config
.port
,
8000
);
assert_eq!
(
config
.check_interval_secs
,
6
0
);
assert_eq!
(
config
.check_interval_secs
,
12
0
);
assert
!
(
config
.selector
.is_empty
());
assert
!
(
config
.selector
.is_empty
());
assert
!
(
config
.prefill_selector
.is_empty
());
assert
!
(
config
.prefill_selector
.is_empty
());
assert
!
(
config
.decode_selector
.is_empty
());
assert
!
(
config
.decode_selector
.is_empty
());
...
@@ -856,6 +887,7 @@ mod tests {
...
@@ -856,6 +887,7 @@ mod tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
};
};
assert
!
(
config
.mode
.is_pd_mode
());
assert
!
(
config
.mode
.is_pd_mode
());
...
@@ -911,6 +943,7 @@ mod tests {
...
@@ -911,6 +943,7 @@ mod tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
};
};
assert
!
(
!
config
.mode
.is_pd_mode
());
assert
!
(
!
config
.mode
.is_pd_mode
());
...
@@ -962,6 +995,7 @@ mod tests {
...
@@ -962,6 +995,7 @@ mod tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
};
};
assert
!
(
config
.has_service_discovery
());
assert
!
(
config
.has_service_discovery
());
...
...
sgl-router/src/core/mod.rs
View file @
21b88460
...
@@ -18,6 +18,6 @@ pub use circuit_breaker::{
...
@@ -18,6 +18,6 @@ pub use circuit_breaker::{
pub
use
error
::{
WorkerError
,
WorkerResult
};
pub
use
error
::{
WorkerError
,
WorkerResult
};
pub
use
retry
::{
is_retryable_status
,
BackoffCalculator
,
RetryError
,
RetryExecutor
};
pub
use
retry
::{
is_retryable_status
,
BackoffCalculator
,
RetryError
,
RetryExecutor
};
pub
use
worker
::{
pub
use
worker
::{
start_health_checker
,
BasicWorker
,
DPAwareWorker
,
HealthChecker
,
Worker
,
WorkerCollection
,
start_health_checker
,
BasicWorker
,
DPAwareWorker
,
HealthChecker
,
HealthConfig
,
Worker
,
WorkerFactory
,
WorkerLoadGuard
,
WorkerType
,
WorkerCollection
,
WorkerFactory
,
WorkerLoadGuard
,
WorkerType
,
};
};
sgl-router/src/core/worker.rs
View file @
21b88460
...
@@ -182,6 +182,10 @@ pub struct HealthConfig {
...
@@ -182,6 +182,10 @@ pub struct HealthConfig {
pub
check_interval_secs
:
u64
,
pub
check_interval_secs
:
u64
,
/// Health check endpoint path
/// Health check endpoint path
pub
endpoint
:
String
,
pub
endpoint
:
String
,
/// Number of consecutive failures before marking unhealthy
pub
failure_threshold
:
u32
,
/// Number of consecutive successes before marking healthy
pub
success_threshold
:
u32
,
}
}
impl
Default
for
HealthConfig
{
impl
Default
for
HealthConfig
{
...
@@ -190,6 +194,8 @@ impl Default for HealthConfig {
...
@@ -190,6 +194,8 @@ impl Default for HealthConfig {
timeout_secs
:
5
,
timeout_secs
:
5
,
check_interval_secs
:
30
,
check_interval_secs
:
30
,
endpoint
:
"/health"
.to_string
(),
endpoint
:
"/health"
.to_string
(),
failure_threshold
:
3
,
success_threshold
:
2
,
}
}
}
}
}
}
...
@@ -214,6 +220,8 @@ pub struct BasicWorker {
...
@@ -214,6 +220,8 @@ pub struct BasicWorker {
load_counter
:
Arc
<
AtomicUsize
>
,
load_counter
:
Arc
<
AtomicUsize
>
,
processed_counter
:
Arc
<
AtomicUsize
>
,
processed_counter
:
Arc
<
AtomicUsize
>
,
healthy
:
Arc
<
AtomicBool
>
,
healthy
:
Arc
<
AtomicBool
>
,
consecutive_failures
:
Arc
<
AtomicUsize
>
,
consecutive_successes
:
Arc
<
AtomicUsize
>
,
circuit_breaker
:
CircuitBreaker
,
circuit_breaker
:
CircuitBreaker
,
}
}
...
@@ -231,6 +239,8 @@ impl BasicWorker {
...
@@ -231,6 +239,8 @@ impl BasicWorker {
load_counter
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
load_counter
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
processed_counter
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
processed_counter
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
healthy
:
Arc
::
new
(
AtomicBool
::
new
(
true
)),
healthy
:
Arc
::
new
(
AtomicBool
::
new
(
true
)),
consecutive_failures
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
consecutive_successes
:
Arc
::
new
(
AtomicUsize
::
new
(
0
)),
circuit_breaker
:
CircuitBreaker
::
new
(),
circuit_breaker
:
CircuitBreaker
::
new
(),
}
}
}
}
...
@@ -300,28 +310,49 @@ impl Worker for BasicWorker {
...
@@ -300,28 +310,49 @@ impl Worker for BasicWorker {
let
timeout
=
Duration
::
from_secs
(
self
.metadata.health_config.timeout_secs
);
let
timeout
=
Duration
::
from_secs
(
self
.metadata.health_config.timeout_secs
);
// Use the shared client with a custom timeout for this request
// Use the shared client with a custom timeout for this request
match
WORKER_CLIENT
.get
(
&
health_url
)
.timeout
(
timeout
)
.send
()
.await
{
let
health_result
=
match
WORKER_CLIENT
.get
(
&
health_url
)
.timeout
(
timeout
)
.send
()
.await
{
Ok
(
response
)
=>
{
Ok
(
response
)
=>
{
if
response
.status
()
.is_success
()
{
if
response
.status
()
.is_success
()
{
true
}
else
{
false
}
}
Err
(
_
)
=>
false
,
};
if
health_result
{
// Health check succeeded
self
.consecutive_failures
.store
(
0
,
Ordering
::
Release
);
let
successes
=
self
.consecutive_successes
.fetch_add
(
1
,
Ordering
::
AcqRel
)
+
1
;
// Mark healthy if we've reached the success threshold
if
!
self
.is_healthy
()
&&
successes
>=
self
.metadata.health_config.success_threshold
as
usize
{
self
.set_healthy
(
true
);
self
.set_healthy
(
true
);
self
.consecutive_successes
.store
(
0
,
Ordering
::
Release
);
}
Ok
(())
Ok
(())
}
else
{
}
else
{
// Health check failed
self
.consecutive_successes
.store
(
0
,
Ordering
::
Release
);
let
failures
=
self
.consecutive_failures
.fetch_add
(
1
,
Ordering
::
AcqRel
)
+
1
;
// Mark unhealthy if we've reached the failure threshold
if
self
.is_healthy
()
&&
failures
>=
self
.metadata.health_config.failure_threshold
as
usize
{
self
.set_healthy
(
false
);
self
.set_healthy
(
false
);
Err
(
WorkerError
::
HealthCheckFailed
{
self
.consecutive_failures
.store
(
0
,
Ordering
::
Release
);
url
:
url
.to_string
(),
reason
:
format!
(
"Health check returned status: {}"
,
response
.status
()),
})
}
}
}
Err
(
e
)
=>
{
self
.set_healthy
(
false
);
Err
(
WorkerError
::
HealthCheckFailed
{
Err
(
WorkerError
::
HealthCheckFailed
{
url
:
url
.to_string
(),
url
:
url
.to_string
(),
reason
:
format!
(
"Health check
request
fail
ed
: {}"
,
e
),
reason
:
format!
(
"Health check
failed (consecutive
fail
ures
: {}
)
"
,
failures
),
})
})
}
}
}
}
}
fn
load
(
&
self
)
->
usize
{
fn
load
(
&
self
)
->
usize
{
self
.load_counter
.load
(
Ordering
::
Relaxed
)
self
.load_counter
.load
(
Ordering
::
Relaxed
)
...
@@ -408,43 +439,8 @@ impl Worker for DPAwareWorker {
...
@@ -408,43 +439,8 @@ impl Worker for DPAwareWorker {
}
}
async
fn
check_health_async
(
&
self
)
->
WorkerResult
<
()
>
{
async
fn
check_health_async
(
&
self
)
->
WorkerResult
<
()
>
{
// Use base URL for health checks
// Delegate to the base worker's health check logic
let
health_url
=
format!
(
"{}/health"
,
self
.base_url
);
self
.base_worker
.check_health_async
()
.await
let
timeout
=
std
::
time
::
Duration
::
from_secs
(
self
.base_worker.metadata.health_config.timeout_secs
);
let
health_result
=
async
{
let
response
=
WORKER_CLIENT
.get
(
&
health_url
)
.timeout
(
timeout
)
.send
()
.await
.map_err
(|
e
|
format!
(
"Health check request failed: {}"
,
e
))
?
;
if
response
.status
()
.is_success
()
{
Ok
(())
}
else
{
Err
(
format!
(
"Health check returned status: {}"
,
response
.status
()
))
}
}
.await
;
match
health_result
{
Ok
(())
=>
{
self
.set_healthy
(
true
);
Ok
(())
}
Err
(
reason
)
=>
{
self
.set_healthy
(
false
);
Err
(
WorkerError
::
HealthCheckFailed
{
url
:
self
.base_url
.clone
(),
reason
,
})
}
}
}
}
fn
load
(
&
self
)
->
usize
{
fn
load
(
&
self
)
->
usize
{
...
@@ -951,6 +947,8 @@ mod tests {
...
@@ -951,6 +947,8 @@ mod tests {
assert_eq!
(
config
.timeout_secs
,
5
);
assert_eq!
(
config
.timeout_secs
,
5
);
assert_eq!
(
config
.check_interval_secs
,
30
);
assert_eq!
(
config
.check_interval_secs
,
30
);
assert_eq!
(
config
.endpoint
,
"/health"
);
assert_eq!
(
config
.endpoint
,
"/health"
);
assert_eq!
(
config
.failure_threshold
,
3
);
assert_eq!
(
config
.success_threshold
,
2
);
}
}
#[test]
#[test]
...
@@ -959,10 +957,14 @@ mod tests {
...
@@ -959,10 +957,14 @@ mod tests {
timeout_secs
:
10
,
timeout_secs
:
10
,
check_interval_secs
:
60
,
check_interval_secs
:
60
,
endpoint
:
"/healthz"
.to_string
(),
endpoint
:
"/healthz"
.to_string
(),
failure_threshold
:
5
,
success_threshold
:
3
,
};
};
assert_eq!
(
config
.timeout_secs
,
10
);
assert_eq!
(
config
.timeout_secs
,
10
);
assert_eq!
(
config
.check_interval_secs
,
60
);
assert_eq!
(
config
.check_interval_secs
,
60
);
assert_eq!
(
config
.endpoint
,
"/healthz"
);
assert_eq!
(
config
.endpoint
,
"/healthz"
);
assert_eq!
(
config
.failure_threshold
,
5
);
assert_eq!
(
config
.success_threshold
,
3
);
}
}
// Test BasicWorker
// Test BasicWorker
...
@@ -994,6 +996,8 @@ mod tests {
...
@@ -994,6 +996,8 @@ mod tests {
timeout_secs
:
15
,
timeout_secs
:
15
,
check_interval_secs
:
45
,
check_interval_secs
:
45
,
endpoint
:
"/custom-health"
.to_string
(),
endpoint
:
"/custom-health"
.to_string
(),
failure_threshold
:
4
,
success_threshold
:
2
,
};
};
let
worker
=
BasicWorker
::
new
(
"http://test:8080"
.to_string
(),
WorkerType
::
Regular
)
let
worker
=
BasicWorker
::
new
(
"http://test:8080"
.to_string
(),
WorkerType
::
Regular
)
...
...
sgl-router/src/lib.rs
View file @
21b88460
...
@@ -72,6 +72,12 @@ struct Router {
...
@@ -72,6 +72,12 @@ struct Router {
cb_timeout_duration_secs
:
u64
,
cb_timeout_duration_secs
:
u64
,
cb_window_duration_secs
:
u64
,
cb_window_duration_secs
:
u64
,
disable_circuit_breaker
:
bool
,
disable_circuit_breaker
:
bool
,
// Health check configuration
health_failure_threshold
:
u32
,
health_success_threshold
:
u32
,
health_check_timeout_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_endpoint
:
String
,
}
}
impl
Router
{
impl
Router
{
...
@@ -174,6 +180,13 @@ impl Router {
...
@@ -174,6 +180,13 @@ impl Router {
},
},
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
config
::
HealthCheckConfig
{
failure_threshold
:
self
.health_failure_threshold
,
success_threshold
:
self
.health_success_threshold
,
timeout_secs
:
self
.health_check_timeout_secs
,
check_interval_secs
:
self
.health_check_interval_secs
,
endpoint
:
self
.health_check_endpoint
.clone
(),
},
})
})
}
}
}
}
...
@@ -186,14 +199,14 @@ impl Router {
...
@@ -186,14 +199,14 @@ impl Router {
policy
=
PolicyType::RoundRobin,
policy
=
PolicyType::RoundRobin,
host
=
String::from(
"127.0.0.1"
),
host
=
String::from(
"127.0.0.1"
),
port
=
3001
,
port
=
3001
,
worker_startup_timeout_secs
=
3
00
,
worker_startup_timeout_secs
=
6
00
,
worker_startup_check_interval
=
1
0
,
worker_startup_check_interval
=
3
0
,
cache_threshold
=
0.
50
,
cache_threshold
=
0.
3
,
balance_abs_threshold
=
32
,
balance_abs_threshold
=
64
,
balance_rel_threshold
=
1.
0001
,
balance_rel_threshold
=
1.
5
,
eviction_interval_secs
=
6
0
,
eviction_interval_secs
=
12
0
,
max_tree_size
=
2u
size
.
pow(
2
4
),
max_tree_size
=
2u
size
.
pow(
2
6
),
max_payload_size
=
256
*
1024
*
1024
,
//
256
MB default for large batches
max_payload_size
=
512
*
1024
*
1024
,
//
512
MB default for large batches
dp_aware
=
false
,
dp_aware
=
false
,
api_key
=
None,
api_key
=
None,
log_dir
=
None,
log_dir
=
None,
...
@@ -207,28 +220,34 @@ impl Router {
...
@@ -207,28 +220,34 @@ impl Router {
bootstrap_port_annotation
=
String::from(
"sglang.ai/bootstrap-port"
),
bootstrap_port_annotation
=
String::from(
"sglang.ai/bootstrap-port"
),
prometheus_port
=
None,
prometheus_port
=
None,
prometheus_host
=
None,
prometheus_host
=
None,
request_timeout_secs
=
6
00
,
// Add configurable request timeout
request_timeout_secs
=
18
00
,
// Add configurable request timeout
request_id_headers
=
None,
// Custom request ID headers
request_id_headers
=
None,
// Custom request ID headers
pd_disaggregation
=
false
,
// New flag for PD mode
pd_disaggregation
=
false
,
// New flag for PD mode
prefill_urls
=
None,
prefill_urls
=
None,
decode_urls
=
None,
decode_urls
=
None,
prefill_policy
=
None,
prefill_policy
=
None,
decode_policy
=
None,
decode_policy
=
None,
max_concurrent_requests
=
6
4
,
max_concurrent_requests
=
25
6
,
cors_allowed_origins
=
vec
![
]
,
cors_allowed_origins
=
vec
![
]
,
// Retry defaults
// Retry defaults
retry_max_retries
=
3
,
retry_max_retries
=
5
,
retry_initial_backoff_ms
=
10
0
,
retry_initial_backoff_ms
=
5
0
,
retry_max_backoff_ms
=
1
0_000
,
retry_max_backoff_ms
=
3
0_000
,
retry_backoff_multiplier
=
2.0
,
retry_backoff_multiplier
=
1.5
,
retry_jitter_factor
=
0.
1
,
retry_jitter_factor
=
0.
2
,
disable_retries
=
false
,
disable_retries
=
false
,
// Circuit breaker defaults
// Circuit breaker defaults
cb_failure_threshold
=
5
,
cb_failure_threshold
=
10
,
cb_success_threshold
=
2
,
cb_success_threshold
=
3
,
cb_timeout_duration_secs
=
3
0
,
cb_timeout_duration_secs
=
6
0
,
cb_window_duration_secs
=
6
0
,
cb_window_duration_secs
=
12
0
,
disable_circuit_breaker
=
false
,
disable_circuit_breaker
=
false
,
// Health check defaults
health_failure_threshold
=
3
,
health_success_threshold
=
2
,
health_check_timeout_secs
=
5
,
health_check_interval_secs
=
60
,
health_check_endpoint
=
String
::
from
(
"/health"
),
))]
))]
fn
new
(
fn
new
(
worker_urls
:
Vec
<
String
>
,
worker_urls
:
Vec
<
String
>
,
...
@@ -276,6 +295,11 @@ impl Router {
...
@@ -276,6 +295,11 @@ impl Router {
cb_timeout_duration_secs
:
u64
,
cb_timeout_duration_secs
:
u64
,
cb_window_duration_secs
:
u64
,
cb_window_duration_secs
:
u64
,
disable_circuit_breaker
:
bool
,
disable_circuit_breaker
:
bool
,
health_failure_threshold
:
u32
,
health_success_threshold
:
u32
,
health_check_timeout_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_endpoint
:
String
,
)
->
PyResult
<
Self
>
{
)
->
PyResult
<
Self
>
{
Ok
(
Router
{
Ok
(
Router
{
host
,
host
,
...
@@ -323,6 +347,11 @@ impl Router {
...
@@ -323,6 +347,11 @@ impl Router {
cb_timeout_duration_secs
,
cb_timeout_duration_secs
,
cb_window_duration_secs
,
cb_window_duration_secs
,
disable_circuit_breaker
,
disable_circuit_breaker
,
health_failure_threshold
,
health_success_threshold
,
health_check_timeout_secs
,
health_check_interval_secs
,
health_check_endpoint
,
})
})
}
}
...
...
sgl-router/src/main.rs
View file @
21b88460
use
clap
::{
ArgAction
,
Parser
};
use
clap
::{
ArgAction
,
Parser
};
use
sglang_router_rs
::
config
::{
use
sglang_router_rs
::
config
::{
CircuitBreakerConfig
,
ConfigError
,
ConfigResult
,
DiscoveryConfig
,
MetricsConfig
,
Policy
Config
,
CircuitBreakerConfig
,
ConfigError
,
ConfigResult
,
DiscoveryConfig
,
HealthCheck
Config
,
RetryConfig
,
RouterConfig
,
RoutingMode
,
MetricsConfig
,
PolicyConfig
,
RetryConfig
,
RouterConfig
,
RoutingMode
,
};
};
use
sglang_router_rs
::
metrics
::
PrometheusConfig
;
use
sglang_router_rs
::
metrics
::
PrometheusConfig
;
use
sglang_router_rs
::
server
::{
self
,
ServerConfig
};
use
sglang_router_rs
::
server
::{
self
,
ServerConfig
};
...
@@ -105,35 +105,35 @@ struct CliArgs {
...
@@ -105,35 +105,35 @@ struct CliArgs {
decode_policy
:
Option
<
String
>
,
decode_policy
:
Option
<
String
>
,
/// Timeout in seconds for worker startup
/// Timeout in seconds for worker startup
#[arg(long,
default_value_t
=
3
00
)]
#[arg(long,
default_value_t
=
6
00
)]
worker_startup_timeout_secs
:
u64
,
worker_startup_timeout_secs
:
u64
,
/// Interval in seconds between checks for worker startup
/// Interval in seconds between checks for worker startup
#[arg(long,
default_value_t
=
1
0
)]
#[arg(long,
default_value_t
=
3
0
)]
worker_startup_check_interval
:
u64
,
worker_startup_check_interval
:
u64
,
/// Cache threshold (0.0-1.0) for cache-aware routing
/// Cache threshold (0.0-1.0) for cache-aware routing
#[arg(long,
default_value_t
=
0.
5
)]
#[arg(long,
default_value_t
=
0.
3
)]
cache_threshold
:
f32
,
cache_threshold
:
f32
,
/// Absolute threshold for load balancing
/// Absolute threshold for load balancing
#[arg(long,
default_value_t
=
32
)]
#[arg(long,
default_value_t
=
64
)]
balance_abs_threshold
:
usize
,
balance_abs_threshold
:
usize
,
/// Relative threshold for load balancing
/// Relative threshold for load balancing
#[arg(long,
default_value_t
=
1.
0001
)]
#[arg(long,
default_value_t
=
1.
5
)]
balance_rel_threshold
:
f32
,
balance_rel_threshold
:
f32
,
/// Interval in seconds between cache eviction operations
/// Interval in seconds between cache eviction operations
#[arg(long,
default_value_t
=
6
0
)]
#[arg(long,
default_value_t
=
12
0
)]
eviction_interval
:
u64
,
eviction_interval
:
u64
,
/// Maximum size of the approximation tree for cache-aware routing
/// Maximum size of the approximation tree for cache-aware routing
#[arg(long,
default_value_t
=
1
67
77216
)]
// 2^2
4
#[arg(long,
default_value_t
=
67
108864
)]
// 2^2
6
max_tree_size
:
usize
,
max_tree_size
:
usize
,
/// Maximum payload size in bytes
/// Maximum payload size in bytes
#[arg(long,
default_value_t
=
268435456
)]
//
256
MB
#[arg(long,
default_value_t
=
536870912
)]
//
512
MB
max_payload_size
:
usize
,
max_payload_size
:
usize
,
/// Enable data parallelism aware schedule
/// Enable data parallelism aware schedule
...
@@ -189,11 +189,11 @@ struct CliArgs {
...
@@ -189,11 +189,11 @@ struct CliArgs {
request_id_headers
:
Vec
<
String
>
,
request_id_headers
:
Vec
<
String
>
,
/// Request timeout in seconds
/// Request timeout in seconds
#[arg(long,
default_value_t
=
6
00
)]
#[arg(long,
default_value_t
=
18
00
)]
request_timeout_secs
:
u64
,
request_timeout_secs
:
u64
,
/// Maximum number of concurrent requests allowed
/// Maximum number of concurrent requests allowed
#[arg(long,
default_value_t
=
6
4
)]
#[arg(long,
default_value_t
=
25
6
)]
max_concurrent_requests
:
usize
,
max_concurrent_requests
:
usize
,
/// CORS allowed origins
/// CORS allowed origins
...
@@ -202,23 +202,23 @@ struct CliArgs {
...
@@ -202,23 +202,23 @@ struct CliArgs {
// Retry configuration
// Retry configuration
/// Maximum number of retries
/// Maximum number of retries
#[arg(long,
default_value_t
=
3
)]
#[arg(long,
default_value_t
=
5
)]
retry_max_retries
:
u32
,
retry_max_retries
:
u32
,
/// Initial backoff in milliseconds for retries
/// Initial backoff in milliseconds for retries
#[arg(long,
default_value_t
=
10
0
)]
#[arg(long,
default_value_t
=
5
0
)]
retry_initial_backoff_ms
:
u64
,
retry_initial_backoff_ms
:
u64
,
/// Maximum backoff in milliseconds for retries
/// Maximum backoff in milliseconds for retries
#[arg(long,
default_value_t
=
1
0000
)]
#[arg(long,
default_value_t
=
3
0000
)]
retry_max_backoff_ms
:
u64
,
retry_max_backoff_ms
:
u64
,
/// Backoff multiplier for exponential backoff
/// Backoff multiplier for exponential backoff
#[arg(long,
default_value_t
=
2.0
)]
#[arg(long,
default_value_t
=
1.5
)]
retry_backoff_multiplier
:
f32
,
retry_backoff_multiplier
:
f32
,
/// Jitter factor for retry backoff
/// Jitter factor for retry backoff
#[arg(long,
default_value_t
=
0.
1
)]
#[arg(long,
default_value_t
=
0.
2
)]
retry_jitter_factor
:
f32
,
retry_jitter_factor
:
f32
,
/// Disable retries
/// Disable retries
...
@@ -227,24 +227,45 @@ struct CliArgs {
...
@@ -227,24 +227,45 @@ struct CliArgs {
// Circuit breaker configuration
// Circuit breaker configuration
/// Number of failures before circuit breaker opens
/// Number of failures before circuit breaker opens
#[arg(long,
default_value_t
=
5
)]
#[arg(long,
default_value_t
=
10
)]
cb_failure_threshold
:
u32
,
cb_failure_threshold
:
u32
,
/// Number of successes before circuit breaker closes
/// Number of successes before circuit breaker closes
#[arg(long,
default_value_t
=
2
)]
#[arg(long,
default_value_t
=
3
)]
cb_success_threshold
:
u32
,
cb_success_threshold
:
u32
,
/// Timeout duration in seconds for circuit breaker
/// Timeout duration in seconds for circuit breaker
#[arg(long,
default_value_t
=
3
0
)]
#[arg(long,
default_value_t
=
6
0
)]
cb_timeout_duration_secs
:
u64
,
cb_timeout_duration_secs
:
u64
,
/// Window duration in seconds for circuit breaker
/// Window duration in seconds for circuit breaker
#[arg(long,
default_value_t
=
6
0
)]
#[arg(long,
default_value_t
=
12
0
)]
cb_window_duration_secs
:
u64
,
cb_window_duration_secs
:
u64
,
/// Disable circuit breaker
/// Disable circuit breaker
#[arg(long,
default_value_t
=
false
)]
#[arg(long,
default_value_t
=
false
)]
disable_circuit_breaker
:
bool
,
disable_circuit_breaker
:
bool
,
// Health check configuration
/// Number of consecutive health check failures before marking worker unhealthy
#[arg(long,
default_value_t
=
3
)]
health_failure_threshold
:
u32
,
/// Number of consecutive health check successes before marking worker healthy
#[arg(long,
default_value_t
=
2
)]
health_success_threshold
:
u32
,
/// Timeout in seconds for health check requests
#[arg(long,
default_value_t
=
5
)]
health_check_timeout_secs
:
u64
,
/// Interval in seconds between runtime health checks
#[arg(long,
default_value_t
=
60
)]
health_check_interval_secs
:
u64
,
/// Health check endpoint path
#[arg(long,
default_value
=
"/health"
)]
health_check_endpoint
:
String
,
}
}
impl
CliArgs
{
impl
CliArgs
{
...
@@ -378,6 +399,13 @@ impl CliArgs {
...
@@ -378,6 +399,13 @@ impl CliArgs {
},
},
disable_retries
:
self
.disable_retries
,
disable_retries
:
self
.disable_retries
,
disable_circuit_breaker
:
self
.disable_circuit_breaker
,
disable_circuit_breaker
:
self
.disable_circuit_breaker
,
health_check
:
HealthCheckConfig
{
failure_threshold
:
self
.health_failure_threshold
,
success_threshold
:
self
.health_success_threshold
,
timeout_secs
:
self
.health_check_timeout_secs
,
check_interval_secs
:
self
.health_check_interval_secs
,
endpoint
:
self
.health_check_endpoint
.clone
(),
},
})
})
}
}
...
...
sgl-router/src/routers/factory.rs
View file @
21b88460
...
@@ -55,6 +55,7 @@ impl RouterFactory {
...
@@ -55,6 +55,7 @@ impl RouterFactory {
ctx
.router_config.api_key
.clone
(),
ctx
.router_config.api_key
.clone
(),
ctx
.router_config.retry
.clone
(),
ctx
.router_config.retry
.clone
(),
ctx
.router_config.circuit_breaker
.clone
(),
ctx
.router_config.circuit_breaker
.clone
(),
ctx
.router_config.health_check
.clone
(),
)
)
.await
?
;
.await
?
;
...
@@ -87,6 +88,7 @@ impl RouterFactory {
...
@@ -87,6 +88,7 @@ impl RouterFactory {
ctx
.router_config.worker_startup_check_interval_secs
,
ctx
.router_config.worker_startup_check_interval_secs
,
ctx
.router_config.retry
.clone
(),
ctx
.router_config.retry
.clone
(),
ctx
.router_config.circuit_breaker
.clone
(),
ctx
.router_config.circuit_breaker
.clone
(),
ctx
.router_config.health_check
.clone
(),
)
)
.await
?
;
.await
?
;
...
...
sgl-router/src/routers/pd_router.rs
View file @
21b88460
// PD (Prefill-Decode) Router Implementation
// PD (Prefill-Decode) Router Implementation
// This module handles routing for disaggregated prefill-decode systems
// This module handles routing for disaggregated prefill-decode systems
use
super
::
pd_types
::{
api_path
,
PDRouterError
};
use
super
::
pd_types
::{
api_path
,
PDRouterError
};
use
crate
::
config
::
types
::{
CircuitBreakerConfig
as
ConfigCircuitBreakerConfig
,
RetryConfig
};
use
crate
::
config
::
types
::{
CircuitBreakerConfig
as
ConfigCircuitBreakerConfig
,
HealthCheckConfig
as
ConfigHealthCheckConfig
,
RetryConfig
,
};
use
crate
::
core
::{
use
crate
::
core
::{
is_retryable_status
,
CircuitBreakerConfig
,
HealthChecker
,
RetryExecutor
,
Worker
,
WorkerFactory
,
is_retryable_status
,
BasicWorker
,
CircuitBreakerConfig
,
HealthChecker
,
HealthConfig
,
WorkerLoadGuard
,
RetryExecutor
,
Worker
,
WorkerFactory
,
WorkerLoadGuard
,
WorkerType
,
};
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
...
@@ -360,6 +363,7 @@ impl PDRouter {
...
@@ -360,6 +363,7 @@ impl PDRouter {
interval_secs
:
u64
,
interval_secs
:
u64
,
retry_config
:
RetryConfig
,
retry_config
:
RetryConfig
,
circuit_breaker_config
:
ConfigCircuitBreakerConfig
,
circuit_breaker_config
:
ConfigCircuitBreakerConfig
,
health_check_config
:
ConfigHealthCheckConfig
,
)
->
Result
<
Self
,
String
>
{
)
->
Result
<
Self
,
String
>
{
// Convert config CircuitBreakerConfig to core CircuitBreakerConfig
// Convert config CircuitBreakerConfig to core CircuitBreakerConfig
let
core_cb_config
=
CircuitBreakerConfig
{
let
core_cb_config
=
CircuitBreakerConfig
{
...
@@ -369,17 +373,42 @@ impl PDRouter {
...
@@ -369,17 +373,42 @@ impl PDRouter {
window_duration
:
Duration
::
from_secs
(
circuit_breaker_config
.window_duration_secs
),
window_duration
:
Duration
::
from_secs
(
circuit_breaker_config
.window_duration_secs
),
};
};
// Convert URLs to Worker trait objects
// Convert URLs to Worker trait objects
with health check config
let
prefill_workers
:
Vec
<
Box
<
dyn
Worker
>>
=
prefill_urls
let
prefill_workers
:
Vec
<
Box
<
dyn
Worker
>>
=
prefill_urls
.into_iter
()
.into_iter
()
.map
(|(
url
,
port
)|
{
.map
(|(
url
,
port
)|
{
WorkerFactory
::
create_prefill_with_config
(
url
,
port
,
core_cb_config
.clone
())
let
worker
=
BasicWorker
::
new
(
url
,
WorkerType
::
Prefill
{
bootstrap_port
:
port
,
},
)
.with_circuit_breaker_config
(
core_cb_config
.clone
())
.with_health_config
(
HealthConfig
{
timeout_secs
:
health_check_config
.timeout_secs
,
check_interval_secs
:
health_check_config
.check_interval_secs
,
endpoint
:
health_check_config
.endpoint
.clone
(),
failure_threshold
:
health_check_config
.failure_threshold
,
success_threshold
:
health_check_config
.success_threshold
,
});
Box
::
new
(
worker
)
as
Box
<
dyn
Worker
>
})
})
.collect
();
.collect
();
let
decode_workers
:
Vec
<
Box
<
dyn
Worker
>>
=
decode_urls
let
decode_workers
:
Vec
<
Box
<
dyn
Worker
>>
=
decode_urls
.into_iter
()
.into_iter
()
.map
(|
url
|
WorkerFactory
::
create_decode_with_config
(
url
,
core_cb_config
.clone
()))
.map
(|
url
|
{
let
worker
=
BasicWorker
::
new
(
url
,
WorkerType
::
Decode
)
.with_circuit_breaker_config
(
core_cb_config
.clone
())
.with_health_config
(
HealthConfig
{
timeout_secs
:
health_check_config
.timeout_secs
,
check_interval_secs
:
health_check_config
.check_interval_secs
,
endpoint
:
health_check_config
.endpoint
.clone
(),
failure_threshold
:
health_check_config
.failure_threshold
,
success_threshold
:
health_check_config
.success_threshold
,
});
Box
::
new
(
worker
)
as
Box
<
dyn
Worker
>
})
.collect
();
.collect
();
// Wait for PD workers to be healthy (skip if empty - for service discovery mode)
// Wait for PD workers to be healthy (skip if empty - for service discovery mode)
...
@@ -443,10 +472,14 @@ impl PDRouter {
...
@@ -443,10 +472,14 @@ impl PDRouter {
let
decode_workers
=
Arc
::
new
(
RwLock
::
new
(
decode_workers
));
let
decode_workers
=
Arc
::
new
(
RwLock
::
new
(
decode_workers
));
// Start health checkers for both worker pools
// Start health checkers for both worker pools
let
prefill_health_checker
=
let
prefill_health_checker
=
crate
::
core
::
start_health_checker
(
crate
::
core
::
start_health_checker
(
Arc
::
clone
(
&
prefill_workers
),
interval_secs
);
Arc
::
clone
(
&
prefill_workers
),
let
decode_health_checker
=
health_check_config
.check_interval_secs
,
crate
::
core
::
start_health_checker
(
Arc
::
clone
(
&
decode_workers
),
interval_secs
);
);
let
decode_health_checker
=
crate
::
core
::
start_health_checker
(
Arc
::
clone
(
&
decode_workers
),
health_check_config
.check_interval_secs
,
);
// Build a dedicated prefill client for fire-and-forget semantics
// Build a dedicated prefill client for fire-and-forget semantics
let
prefill_client
=
reqwest
::
Client
::
builder
()
let
prefill_client
=
reqwest
::
Client
::
builder
()
...
...
sgl-router/src/routers/router.rs
View file @
21b88460
use
crate
::
config
::
types
::{
CircuitBreakerConfig
as
ConfigCircuitBreakerConfig
,
RetryConfig
};
use
crate
::
config
::
types
::{
CircuitBreakerConfig
as
ConfigCircuitBreakerConfig
,
HealthCheckConfig
as
ConfigHealthCheckConfig
,
RetryConfig
,
};
use
crate
::
core
::{
use
crate
::
core
::{
is_retryable_status
,
CircuitBreakerConfig
,
HealthChecker
,
RetryExecutor
,
Worker
,
WorkerFactory
,
is_retryable_status
,
BasicWorker
,
CircuitBreakerConfig
,
HealthChecker
,
HealthConfig
,
RetryExecutor
,
Worker
,
WorkerFactory
,
WorkerType
,
};
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
use
crate
::
openai_api_types
::{
ChatCompletionRequest
,
CompletionRequest
,
GenerateRequest
};
...
@@ -61,6 +65,7 @@ impl Router {
...
@@ -61,6 +65,7 @@ impl Router {
api_key
:
Option
<
String
>
,
api_key
:
Option
<
String
>
,
retry_config
:
RetryConfig
,
retry_config
:
RetryConfig
,
circuit_breaker_config
:
ConfigCircuitBreakerConfig
,
circuit_breaker_config
:
ConfigCircuitBreakerConfig
,
health_check_config
:
ConfigHealthCheckConfig
,
)
->
Result
<
Self
,
String
>
{
)
->
Result
<
Self
,
String
>
{
// Update active workers gauge
// Update active workers gauge
RouterMetrics
::
set_active_workers
(
worker_urls
.len
());
RouterMetrics
::
set_active_workers
(
worker_urls
.len
());
...
@@ -86,11 +91,20 @@ impl Router {
...
@@ -86,11 +91,20 @@ impl Router {
window_duration
:
Duration
::
from_secs
(
circuit_breaker_config
.window_duration_secs
),
window_duration
:
Duration
::
from_secs
(
circuit_breaker_config
.window_duration_secs
),
};
};
// Create Worker trait objects from URLs
// Create Worker trait objects from URLs
with health check config
let
workers
:
Vec
<
Box
<
dyn
Worker
>>
=
worker_urls
let
workers
:
Vec
<
Box
<
dyn
Worker
>>
=
worker_urls
.iter
()
.iter
()
.map
(|
url
|
{
.map
(|
url
|
{
WorkerFactory
::
create_regular_with_config
(
url
.clone
(),
core_cb_config
.clone
())
let
worker
=
BasicWorker
::
new
(
url
.clone
(),
WorkerType
::
Regular
)
.with_circuit_breaker_config
(
core_cb_config
.clone
())
.with_health_config
(
HealthConfig
{
timeout_secs
:
health_check_config
.timeout_secs
,
check_interval_secs
:
health_check_config
.check_interval_secs
,
endpoint
:
health_check_config
.endpoint
.clone
(),
failure_threshold
:
health_check_config
.failure_threshold
,
success_threshold
:
health_check_config
.success_threshold
,
});
Box
::
new
(
worker
)
as
Box
<
dyn
Worker
>
})
})
.collect
();
.collect
();
...
...
sgl-router/src/service_discovery.rs
View file @
21b88460
...
@@ -592,6 +592,7 @@ mod tests {
...
@@ -592,6 +592,7 @@ mod tests {
None
,
None
,
crate
::
config
::
types
::
RetryConfig
::
default
(),
crate
::
config
::
types
::
RetryConfig
::
default
(),
crate
::
config
::
types
::
CircuitBreakerConfig
::
default
(),
crate
::
config
::
types
::
CircuitBreakerConfig
::
default
(),
crate
::
config
::
types
::
HealthCheckConfig
::
default
(),
)
)
.await
.await
.unwrap
();
.unwrap
();
...
...
sgl-router/tests/api_endpoints_test.rs
View file @
21b88460
...
@@ -50,6 +50,7 @@ impl TestContext {
...
@@ -50,6 +50,7 @@ impl TestContext {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
Self
::
new_with_config
(
config
,
worker_configs
)
.await
Self
::
new_with_config
(
config
,
worker_configs
)
.await
...
@@ -1091,6 +1092,7 @@ mod error_tests {
...
@@ -1091,6 +1092,7 @@ mod error_tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
let
ctx
=
TestContext
::
new_with_config
(
let
ctx
=
TestContext
::
new_with_config
(
...
@@ -1441,6 +1443,7 @@ mod pd_mode_tests {
...
@@ -1441,6 +1443,7 @@ mod pd_mode_tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
// Create app context
// Create app context
...
@@ -1595,6 +1598,7 @@ mod request_id_tests {
...
@@ -1595,6 +1598,7 @@ mod request_id_tests {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
let
ctx
=
TestContext
::
new_with_config
(
let
ctx
=
TestContext
::
new_with_config
(
...
...
sgl-router/tests/request_formats_test.rs
View file @
21b88460
...
@@ -41,6 +41,7 @@ impl TestContext {
...
@@ -41,6 +41,7 @@ impl TestContext {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
let
mut
workers
=
Vec
::
new
();
let
mut
workers
=
Vec
::
new
();
...
...
sgl-router/tests/streaming_tests.rs
View file @
21b88460
...
@@ -42,6 +42,7 @@ impl TestContext {
...
@@ -42,6 +42,7 @@ impl TestContext {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
let
mut
workers
=
Vec
::
new
();
let
mut
workers
=
Vec
::
new
();
...
...
sgl-router/tests/test_pd_routing.rs
View file @
21b88460
...
@@ -184,6 +184,7 @@ mod test_pd_routing {
...
@@ -184,6 +184,7 @@ mod test_pd_routing {
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
circuit_breaker
:
CircuitBreakerConfig
::
default
(),
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
};
};
// Router creation will fail due to health checks, but config should be valid
// Router creation will fail due to health checks, but config should be valid
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment