Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
3828db43
Unverified
Commit
3828db43
authored
Aug 20, 2025
by
Keyang Ru
Committed by
GitHub
Aug 20, 2025
Browse files
[router] Add IGW (Inference Gateway) Feature Flag (#9371)
Co-authored-by:
Yineng Zhang
<
me@zhyncs.com
>
parent
88fbc31b
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
74 additions
and
8 deletions
+74
-8
.github/workflows/pr-test-rust.yml
.github/workflows/pr-test-rust.yml
+1
-1
sgl-router/src/config/types.rs
sgl-router/src/config/types.rs
+13
-0
sgl-router/src/config/validation.rs
sgl-router/src/config/validation.rs
+5
-0
sgl-router/src/lib.rs
sgl-router/src/lib.rs
+13
-1
sgl-router/src/main.rs
sgl-router/src/main.rs
+23
-6
sgl-router/src/routers/factory.rs
sgl-router/src/routers/factory.rs
+12
-0
sgl-router/tests/api_endpoints_test.rs
sgl-router/tests/api_endpoints_test.rs
+4
-0
sgl-router/tests/request_formats_test.rs
sgl-router/tests/request_formats_test.rs
+1
-0
sgl-router/tests/streaming_tests.rs
sgl-router/tests/streaming_tests.rs
+1
-0
sgl-router/tests/test_pd_routing.rs
sgl-router/tests/test_pd_routing.rs
+1
-0
No files found.
.github/workflows/pr-test-rust.yml
View file @
3828db43
...
@@ -53,7 +53,7 @@ jobs:
...
@@ -53,7 +53,7 @@ jobs:
cargo check --benches
cargo check --benches
-
name
:
Quick benchmark sanity check
-
name
:
Quick benchmark sanity check
timeout-minutes
:
1
0
timeout-minutes
:
1
5
run
:
|
run
:
|
source "$HOME/.cargo/env"
source "$HOME/.cargo/env"
cd sgl-router/
cd sgl-router/
...
...
sgl-router/src/config/types.rs
View file @
3828db43
...
@@ -51,6 +51,9 @@ pub struct RouterConfig {
...
@@ -51,6 +51,9 @@ pub struct RouterConfig {
pub
disable_circuit_breaker
:
bool
,
pub
disable_circuit_breaker
:
bool
,
/// Health check configuration
/// Health check configuration
pub
health_check
:
HealthCheckConfig
,
pub
health_check
:
HealthCheckConfig
,
/// Enable Inference Gateway mode (false = proxy mode, true = IGW mode)
#[serde(default)]
pub
enable_igw
:
bool
,
}
}
/// Routing mode configuration
/// Routing mode configuration
...
@@ -323,6 +326,7 @@ impl Default for RouterConfig {
...
@@ -323,6 +326,7 @@ impl Default for RouterConfig {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
health_check
:
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
}
}
}
}
}
}
...
@@ -377,6 +381,11 @@ impl RouterConfig {
...
@@ -377,6 +381,11 @@ impl RouterConfig {
}
}
cfg
cfg
}
}
/// Check if running in IGW (Inference Gateway) mode
pub
fn
is_igw_mode
(
&
self
)
->
bool
{
self
.enable_igw
}
}
}
#[cfg(test)]
#[cfg(test)]
...
@@ -456,6 +465,7 @@ mod tests {
...
@@ -456,6 +465,7 @@ mod tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
health_check
:
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
let
json
=
serde_json
::
to_string
(
&
config
)
.unwrap
();
let
json
=
serde_json
::
to_string
(
&
config
)
.unwrap
();
...
@@ -888,6 +898,7 @@ mod tests {
...
@@ -888,6 +898,7 @@ mod tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
health_check
:
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
assert
!
(
config
.mode
.is_pd_mode
());
assert
!
(
config
.mode
.is_pd_mode
());
...
@@ -944,6 +955,7 @@ mod tests {
...
@@ -944,6 +955,7 @@ mod tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
health_check
:
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
assert
!
(
!
config
.mode
.is_pd_mode
());
assert
!
(
!
config
.mode
.is_pd_mode
());
...
@@ -996,6 +1008,7 @@ mod tests {
...
@@ -996,6 +1008,7 @@ mod tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
HealthCheckConfig
::
default
(),
health_check
:
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
assert
!
(
config
.has_service_discovery
());
assert
!
(
config
.has_service_discovery
());
...
...
sgl-router/src/config/validation.rs
View file @
3828db43
...
@@ -344,6 +344,11 @@ impl ConfigValidator {
...
@@ -344,6 +344,11 @@ impl ConfigValidator {
/// Validate compatibility between different configuration sections
/// Validate compatibility between different configuration sections
fn
validate_compatibility
(
config
:
&
RouterConfig
)
->
ConfigResult
<
()
>
{
fn
validate_compatibility
(
config
:
&
RouterConfig
)
->
ConfigResult
<
()
>
{
// IGW mode is independent - skip other compatibility checks when enabled
if
config
.enable_igw
{
return
Ok
(());
}
// All policies are now supported for both router types thanks to the unified trait design
// All policies are now supported for both router types thanks to the unified trait design
// No mode/policy restrictions needed anymore
// No mode/policy restrictions needed anymore
...
...
sgl-router/src/lib.rs
View file @
3828db43
...
@@ -82,6 +82,8 @@ struct Router {
...
@@ -82,6 +82,8 @@ struct Router {
health_check_timeout_secs
:
u64
,
health_check_timeout_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_endpoint
:
String
,
health_check_endpoint
:
String
,
// IGW (Inference Gateway) configuration
enable_igw
:
bool
,
}
}
impl
Router
{
impl
Router
{
...
@@ -110,7 +112,12 @@ impl Router {
...
@@ -110,7 +112,12 @@ impl Router {
};
};
// Determine routing mode
// Determine routing mode
let
mode
=
if
self
.pd_disaggregation
{
let
mode
=
if
self
.enable_igw
{
// IGW mode - routing mode is not used in IGW, but we need to provide a placeholder
RoutingMode
::
Regular
{
worker_urls
:
vec!
[],
}
}
else
if
self
.pd_disaggregation
{
RoutingMode
::
PrefillDecode
{
RoutingMode
::
PrefillDecode
{
prefill_urls
:
self
.prefill_urls
.clone
()
.unwrap_or_default
(),
prefill_urls
:
self
.prefill_urls
.clone
()
.unwrap_or_default
(),
decode_urls
:
self
.decode_urls
.clone
()
.unwrap_or_default
(),
decode_urls
:
self
.decode_urls
.clone
()
.unwrap_or_default
(),
...
@@ -191,6 +198,7 @@ impl Router {
...
@@ -191,6 +198,7 @@ impl Router {
check_interval_secs
:
self
.health_check_interval_secs
,
check_interval_secs
:
self
.health_check_interval_secs
,
endpoint
:
self
.health_check_endpoint
.clone
(),
endpoint
:
self
.health_check_endpoint
.clone
(),
},
},
enable_igw
:
self
.enable_igw
,
})
})
}
}
}
}
...
@@ -252,6 +260,8 @@ impl Router {
...
@@ -252,6 +260,8 @@ impl Router {
health_check_timeout_secs
=
5
,
health_check_timeout_secs
=
5
,
health_check_interval_secs
=
60
,
health_check_interval_secs
=
60
,
health_check_endpoint
=
String
::
from
(
"/health"
),
health_check_endpoint
=
String
::
from
(
"/health"
),
// IGW defaults
enable_igw
=
false
,
))]
))]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::too_many_arguments)]
fn
new
(
fn
new
(
...
@@ -305,6 +315,7 @@ impl Router {
...
@@ -305,6 +315,7 @@ impl Router {
health_check_timeout_secs
:
u64
,
health_check_timeout_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_interval_secs
:
u64
,
health_check_endpoint
:
String
,
health_check_endpoint
:
String
,
enable_igw
:
bool
,
)
->
PyResult
<
Self
>
{
)
->
PyResult
<
Self
>
{
Ok
(
Router
{
Ok
(
Router
{
host
,
host
,
...
@@ -357,6 +368,7 @@ impl Router {
...
@@ -357,6 +368,7 @@ impl Router {
health_check_timeout_secs
,
health_check_timeout_secs
,
health_check_interval_secs
,
health_check_interval_secs
,
health_check_endpoint
,
health_check_endpoint
,
enable_igw
,
})
})
}
}
...
...
sgl-router/src/main.rs
View file @
3828db43
...
@@ -70,6 +70,7 @@ Examples:
...
@@ -70,6 +70,7 @@ Examples:
--decode http://127.0.0.3:30003 \
--decode http://127.0.0.3:30003 \
--decode http://127.0.0.4:30004 \
--decode http://127.0.0.4:30004 \
--prefill-policy cache_aware --decode-policy power_of_two
--prefill-policy cache_aware --decode-policy power_of_two
"#
)]
"#
)]
struct
CliArgs
{
struct
CliArgs
{
/// Host address to bind the router server
/// Host address to bind the router server
...
@@ -266,6 +267,11 @@ struct CliArgs {
...
@@ -266,6 +267,11 @@ struct CliArgs {
/// Health check endpoint path
/// Health check endpoint path
#[arg(long,
default_value
=
"/health"
)]
#[arg(long,
default_value
=
"/health"
)]
health_check_endpoint
:
String
,
health_check_endpoint
:
String
,
// IGW (Inference Gateway) configuration
/// Enable Inference Gateway mode
#[arg(long,
default_value_t
=
false
)]
enable_igw
:
bool
,
}
}
impl
CliArgs
{
impl
CliArgs
{
...
@@ -307,7 +313,12 @@ impl CliArgs {
...
@@ -307,7 +313,12 @@ impl CliArgs {
prefill_urls
:
Vec
<
(
String
,
Option
<
u16
>
)
>
,
prefill_urls
:
Vec
<
(
String
,
Option
<
u16
>
)
>
,
)
->
ConfigResult
<
RouterConfig
>
{
)
->
ConfigResult
<
RouterConfig
>
{
// Determine routing mode
// Determine routing mode
let
mode
=
if
self
.pd_disaggregation
{
let
mode
=
if
self
.enable_igw
{
// IGW mode - routing mode is not used in IGW, but we need to provide a placeholder
RoutingMode
::
Regular
{
worker_urls
:
vec!
[],
}
}
else
if
self
.pd_disaggregation
{
let
decode_urls
=
self
.decode
.clone
();
let
decode_urls
=
self
.decode
.clone
();
// Validate PD configuration if not using service discovery
// Validate PD configuration if not using service discovery
...
@@ -406,6 +417,7 @@ impl CliArgs {
...
@@ -406,6 +417,7 @@ impl CliArgs {
check_interval_secs
:
self
.health_check_interval_secs
,
check_interval_secs
:
self
.health_check_interval_secs
,
endpoint
:
self
.health_check_endpoint
.clone
(),
endpoint
:
self
.health_check_endpoint
.clone
(),
},
},
enable_igw
:
self
.enable_igw
,
})
})
}
}
...
@@ -487,17 +499,22 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
...
@@ -487,17 +499,22 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
println!
(
"Host: {}:{}"
,
cli_args
.host
,
cli_args
.port
);
println!
(
"Host: {}:{}"
,
cli_args
.host
,
cli_args
.port
);
println!
(
println!
(
"Mode: {}"
,
"Mode: {}"
,
if
cli_args
.pd_disaggregation
{
if
cli_args
.enable_igw
{
"IGW (Inference Gateway)"
}
else
if
cli_args
.pd_disaggregation
{
"PD Disaggregated"
"PD Disaggregated"
}
else
{
}
else
{
"Regular"
"Regular"
}
}
);
);
println!
(
"Policy: {}"
,
cli_args
.policy
);
if
cli_args
.pd_disaggregation
&&
!
prefill_urls
.is_empty
()
{
if
!
cli_args
.enable_igw
{
println!
(
"Prefill nodes: {:?}"
,
prefill_urls
);
println!
(
"Policy: {}"
,
cli_args
.policy
);
println!
(
"Decode nodes: {:?}"
,
cli_args
.decode
);
if
cli_args
.pd_disaggregation
&&
!
prefill_urls
.is_empty
()
{
println!
(
"Prefill nodes: {:?}"
,
prefill_urls
);
println!
(
"Decode nodes: {:?}"
,
cli_args
.decode
);
}
}
}
// Convert to RouterConfig
// Convert to RouterConfig
...
...
sgl-router/src/routers/factory.rs
View file @
3828db43
...
@@ -12,6 +12,12 @@ pub struct RouterFactory;
...
@@ -12,6 +12,12 @@ pub struct RouterFactory;
impl
RouterFactory
{
impl
RouterFactory
{
/// Create a router instance from application context
/// Create a router instance from application context
pub
async
fn
create_router
(
ctx
:
&
Arc
<
AppContext
>
)
->
Result
<
Box
<
dyn
RouterTrait
>
,
String
>
{
pub
async
fn
create_router
(
ctx
:
&
Arc
<
AppContext
>
)
->
Result
<
Box
<
dyn
RouterTrait
>
,
String
>
{
// Check if IGW mode is enabled
if
ctx
.router_config.enable_igw
{
return
Self
::
create_igw_router
(
ctx
)
.await
;
}
// Default to proxy mode
match
&
ctx
.router_config.mode
{
match
&
ctx
.router_config.mode
{
RoutingMode
::
Regular
{
worker_urls
}
=>
{
RoutingMode
::
Regular
{
worker_urls
}
=>
{
Self
::
create_regular_router
(
worker_urls
,
&
ctx
.router_config.policy
,
ctx
)
.await
Self
::
create_regular_router
(
worker_urls
,
&
ctx
.router_config.policy
,
ctx
)
.await
...
@@ -94,4 +100,10 @@ impl RouterFactory {
...
@@ -94,4 +100,10 @@ impl RouterFactory {
Ok
(
Box
::
new
(
router
))
Ok
(
Box
::
new
(
router
))
}
}
/// Create an IGW router (placeholder for future implementation)
async
fn
create_igw_router
(
_
ctx
:
&
Arc
<
AppContext
>
)
->
Result
<
Box
<
dyn
RouterTrait
>
,
String
>
{
// For now, return an error indicating IGW is not yet implemented
Err
(
"IGW mode is not yet implemented"
.to_string
())
}
}
}
sgl-router/tests/api_endpoints_test.rs
View file @
3828db43
...
@@ -51,6 +51,7 @@ impl TestContext {
...
@@ -51,6 +51,7 @@ impl TestContext {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
Self
::
new_with_config
(
config
,
worker_configs
)
.await
Self
::
new_with_config
(
config
,
worker_configs
)
.await
...
@@ -1093,6 +1094,7 @@ mod error_tests {
...
@@ -1093,6 +1094,7 @@ mod error_tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
let
ctx
=
TestContext
::
new_with_config
(
let
ctx
=
TestContext
::
new_with_config
(
...
@@ -1444,6 +1446,7 @@ mod pd_mode_tests {
...
@@ -1444,6 +1446,7 @@ mod pd_mode_tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
// Create app context
// Create app context
...
@@ -1599,6 +1602,7 @@ mod request_id_tests {
...
@@ -1599,6 +1602,7 @@ mod request_id_tests {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
let
ctx
=
TestContext
::
new_with_config
(
let
ctx
=
TestContext
::
new_with_config
(
...
...
sgl-router/tests/request_formats_test.rs
View file @
3828db43
...
@@ -42,6 +42,7 @@ impl TestContext {
...
@@ -42,6 +42,7 @@ impl TestContext {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
let
mut
workers
=
Vec
::
new
();
let
mut
workers
=
Vec
::
new
();
...
...
sgl-router/tests/streaming_tests.rs
View file @
3828db43
...
@@ -43,6 +43,7 @@ impl TestContext {
...
@@ -43,6 +43,7 @@ impl TestContext {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
let
mut
workers
=
Vec
::
new
();
let
mut
workers
=
Vec
::
new
();
...
...
sgl-router/tests/test_pd_routing.rs
View file @
3828db43
...
@@ -184,6 +184,7 @@ mod test_pd_routing {
...
@@ -184,6 +184,7 @@ mod test_pd_routing {
disable_retries
:
false
,
disable_retries
:
false
,
disable_circuit_breaker
:
false
,
disable_circuit_breaker
:
false
,
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
health_check
:
sglang_router_rs
::
config
::
HealthCheckConfig
::
default
(),
enable_igw
:
false
,
};
};
// Router creation will fail due to health checks, but config should be valid
// Router creation will fail due to health checks, but config should be valid
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment