Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
212f5e48
Unverified
Commit
212f5e48
authored
Oct 25, 2025
by
Simo Lin
Committed by
GitHub
Oct 25, 2025
Browse files
[router] MCP Manager Refactoring - Flat Architecture with Connection Pooling (#12097)
parent
fe527812
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
490 additions
and
277 deletions
+490
-277
sgl-router/src/routers/grpc/responses/tool_loop.rs
sgl-router/src/routers/grpc/responses/tool_loop.rs
+6
-7
sgl-router/src/routers/grpc/router.rs
sgl-router/src/routers/grpc/router.rs
+9
-21
sgl-router/src/routers/openai/mcp.rs
sgl-router/src/routers/openai/mcp.rs
+37
-21
sgl-router/src/routers/openai/router.rs
sgl-router/src/routers/openai/router.rs
+34
-48
sgl-router/src/routers/openai/streaming.rs
sgl-router/src/routers/openai/streaming.rs
+18
-11
sgl-router/src/server.rs
sgl-router/src/server.rs
+25
-3
sgl-router/src/service_discovery.rs
sgl-router/src/service_discovery.rs
+1
-0
sgl-router/tests/api_endpoints_test.rs
sgl-router/tests/api_endpoints_test.rs
+2
-2
sgl-router/tests/common/mod.rs
sgl-router/tests/common/mod.rs
+128
-2
sgl-router/tests/common/test_app.rs
sgl-router/tests/common/test_app.rs
+56
-0
sgl-router/tests/mcp_test.rs
sgl-router/tests/mcp_test.rs
+115
-32
sgl-router/tests/request_formats_test.rs
sgl-router/tests/request_formats_test.rs
+1
-1
sgl-router/tests/responses_api_test.rs
sgl-router/tests/responses_api_test.rs
+13
-11
sgl-router/tests/streaming_tests.rs
sgl-router/tests/streaming_tests.rs
+1
-1
sgl-router/tests/test_openai_routing.rs
sgl-router/tests/test_openai_routing.rs
+41
-116
sgl-router/tests/test_pd_routing.rs
sgl-router/tests/test_pd_routing.rs
+3
-1
No files found.
sgl-router/src/routers/grpc/responses/tool_loop.rs
View file @
212f5e48
...
...
@@ -24,12 +24,11 @@ use super::{
types
::
BackgroundTaskInfo
,
};
/// This is a re-export of the shared implementation from openai::mcp
pub
(
super
)
use
crate
::
routers
::
openai
::
mcp
::
mcp_manager_from_request_tools
as
create_mcp_manager_from_request
;
pub
(
super
)
use
crate
::
routers
::
openai
::
mcp
::
ensure_request_mcp_client
as
create_mcp_manager_from_request
;
use
crate
::{
data_connector
::{
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
},
mcp
::
McpClientManager
,
protocols
::{
chat
::
ChatCompletionResponse
,
common
::{
Tool
,
ToolChoice
,
ToolChoiceValue
},
...
...
@@ -102,7 +101,7 @@ fn extract_all_tool_calls_from_chat(
/// Execute an MCP tool call
async
fn
execute_mcp_call
(
mcp_mgr
:
&
Arc
<
McpClient
Manager
>
,
mcp_mgr
:
&
Arc
<
crate
::
mcp
::
Mcp
Manager
>
,
tool_name
:
&
str
,
args_json_str
:
&
str
,
)
->
Result
<
String
,
String
>
{
...
...
@@ -222,7 +221,7 @@ fn generate_mcp_id(prefix: &str) -> String {
/// Build mcp_list_tools output item
fn
build_mcp_list_tools_item
(
mcp
:
&
Arc
<
McpClient
Manager
>
,
mcp
:
&
Arc
<
crate
::
mcp
::
Mcp
Manager
>
,
server_label
:
&
str
,
)
->
ResponseOutputItem
{
let
tools
=
mcp
.list_tools
();
...
...
@@ -287,7 +286,7 @@ pub(super) async fn execute_tool_loop(
headers
:
Option
<
http
::
HeaderMap
>
,
model_id
:
Option
<
String
>
,
components
:
Arc
<
SharedComponents
>
,
mcp_manager
:
Arc
<
McpClient
Manager
>
,
mcp_manager
:
Arc
<
crate
::
mcp
::
Mcp
Manager
>
,
response_id
:
Option
<
String
>
,
background_tasks
:
Option
<
Arc
<
RwLock
<
HashMap
<
String
,
BackgroundTaskInfo
>>>>
,
)
->
Result
<
ResponsesResponse
,
String
>
{
...
...
@@ -507,7 +506,7 @@ pub(super) async fn execute_tool_loop_streaming(
headers
:
Option
<
http
::
HeaderMap
>
,
model_id
:
Option
<
String
>
,
components
:
Arc
<
SharedComponents
>
,
mcp_manager
:
Arc
<
McpClient
Manager
>
,
mcp_manager
:
Arc
<
crate
::
mcp
::
Mcp
Manager
>
,
response_storage
:
SharedResponseStorage
,
conversation_storage
:
SharedConversationStorage
,
conversation_item_storage
:
SharedConversationItemStorage
,
...
...
@@ -598,7 +597,7 @@ async fn execute_tool_loop_streaming_internal(
headers
:
Option
<
http
::
HeaderMap
>
,
model_id
:
Option
<
String
>
,
components
:
Arc
<
SharedComponents
>
,
mcp_manager
:
Arc
<
McpClient
Manager
>
,
mcp_manager
:
Arc
<
crate
::
mcp
::
Mcp
Manager
>
,
server_label
:
String
,
_
response_storage
:
SharedResponseStorage
,
_
conversation_storage
:
SharedConversationStorage
,
...
...
sgl-router/src/routers/grpc/router.rs
View file @
212f5e48
...
...
@@ -24,6 +24,7 @@ use crate::{
data_connector
::{
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
},
mcp
::
McpManager
,
policies
::
PolicyRegistry
,
protocols
::{
chat
::
ChatCompletionRequest
,
...
...
@@ -60,8 +61,7 @@ pub struct GrpcRouter {
response_storage
:
SharedResponseStorage
,
conversation_storage
:
SharedConversationStorage
,
conversation_item_storage
:
SharedConversationItemStorage
,
// Optional MCP manager for tool execution (enabled via SGLANG_MCP_CONFIG env var)
mcp_manager
:
Option
<
Arc
<
crate
::
mcp
::
McpClientManager
>>
,
mcp_manager
:
Arc
<
McpManager
>
,
// Background task handles for cancellation support (includes gRPC client for Python abort)
background_tasks
:
Arc
<
RwLock
<
HashMap
<
String
,
BackgroundTaskInfo
>>>
,
}
...
...
@@ -94,25 +94,12 @@ impl GrpcRouter {
let
conversation_storage
=
ctx
.conversation_storage
.clone
();
let
conversation_item_storage
=
ctx
.conversation_item_storage
.clone
();
// Optional MCP manager activation via env var path (config-driven gate)
let
mcp_manager
=
match
std
::
env
::
var
(
"SGLANG_MCP_CONFIG"
)
.ok
()
{
Some
(
path
)
if
!
path
.trim
()
.is_empty
()
=>
{
match
crate
::
mcp
::
McpConfig
::
from_file
(
&
path
)
.await
{
Ok
(
cfg
)
=>
match
crate
::
mcp
::
McpClientManager
::
new
(
cfg
)
.await
{
Ok
(
mgr
)
=>
Some
(
Arc
::
new
(
mgr
)),
Err
(
err
)
=>
{
tracing
::
warn!
(
"Failed to initialize MCP manager: {}"
,
err
);
None
}
},
Err
(
err
)
=>
{
tracing
::
warn!
(
"Failed to load MCP config from '{}': {}"
,
path
,
err
);
None
}
}
}
_
=>
None
,
};
// Get MCP manager from app context
let
mcp_manager
=
ctx
.mcp_manager
.get
()
.ok_or_else
(||
"gRPC router requires MCP manager"
.to_string
())
?
.clone
();
// Create shared components for pipeline
let
shared_components
=
Arc
::
new
(
SharedComponents
{
...
...
@@ -285,6 +272,7 @@ impl RouterTrait for GrpcRouter {
self
.response_storage
.clone
(),
self
.conversation_storage
.clone
(),
self
.conversation_item_storage
.clone
(),
self
.mcp_manager
.clone
(),
self
.background_tasks
.clone
(),
)
.await
...
...
sgl-router/src/routers/openai/mcp.rs
View file @
212f5e48
...
...
@@ -18,7 +18,7 @@ use tracing::{info, warn};
use
super
::
utils
::{
event_types
,
generate_id
};
use
crate
::{
mcp
::
McpClientManager
,
mcp
,
protocols
::
responses
::{
ResponseInput
,
ResponseTool
,
ResponseToolType
,
ResponsesRequest
},
routers
::
header_utils
::
apply_request_headers
,
};
...
...
@@ -128,10 +128,19 @@ impl FunctionCallInProgress {
// MCP Manager Integration
// ============================================================================
/// Build a request-scoped MCP manager from request tools, if present.
pub
async
fn
mcp_manager_from_request_tools
(
/// Ensure a dynamic MCP client exists for request-scoped tools.
///
/// This function parses request tools to extract MCP server configuration,
/// then ensures a dynamic client exists in the McpManager via `get_or_create_client()`.
/// The McpManager itself is returned (cloned Arc) for convenience, though the main
/// purpose is the side effect of registering the dynamic client.
///
/// Returns Some(manager) if a dynamic MCP tool was found and client was created/retrieved,
/// None if no MCP tools were found or connection failed.
pub
async
fn
ensure_request_mcp_client
(
mcp_manager
:
&
Arc
<
mcp
::
McpManager
>
,
tools
:
&
[
ResponseTool
],
)
->
Option
<
Arc
<
McpClient
Manager
>>
{
)
->
Option
<
Arc
<
mcp
::
Mcp
Manager
>>
{
let
tool
=
tools
.iter
()
.find
(|
t
|
matches!
(
t
.r
#
type
,
ResponseToolType
::
Mcp
)
&&
t
.server_url
.is_some
())
?
;
...
...
@@ -149,23 +158,30 @@ pub async fn mcp_manager_from_request_tools(
.unwrap_or_else
(||
"request-mcp"
.to_string
());
let
token
=
tool
.authorization
.clone
();
let
transport
=
if
server_url
.contains
(
"/sse"
)
{
crate
::
mcp
::
McpTransport
::
Sse
{
url
:
server_url
,
mcp
::
McpTransport
::
Sse
{
url
:
server_url
.clone
()
,
token
,
}
}
else
{
crate
::
mcp
::
McpTransport
::
Streamable
{
url
:
server_url
,
mcp
::
McpTransport
::
Streamable
{
url
:
server_url
.clone
()
,
token
,
}
};
let
cfg
=
crate
::
mcp
::
McpConfig
{
servers
:
vec!
[
crate
::
mcp
::
McpServerConfig
{
name
,
transport
}],
// Create server config
let
server_config
=
mcp
::
McpServerConfig
{
name
,
transport
,
proxy
:
None
,
required
:
false
,
};
match
McpClientManager
::
new
(
cfg
)
.await
{
Ok
(
mgr
)
=>
Some
(
Arc
::
new
(
mgr
)),
// Use McpManager to get or create dynamic client
match
mcp_manager
.get_or_create_client
(
server_config
)
.await
{
Ok
(
_
client
)
=>
Some
(
mcp_manager
.clone
()),
Err
(
err
)
=>
{
warn!
(
"Failed to
initialize request-scoped MCP manager
: {}"
,
err
);
warn!
(
"Failed to
get/create MCP connection
: {}"
,
err
);
None
}
}
...
...
@@ -177,7 +193,7 @@ pub async fn mcp_manager_from_request_tools(
/// Execute an MCP tool call
pub
(
super
)
async
fn
execute_mcp_call
(
mcp_mgr
:
&
Arc
<
McpClient
Manager
>
,
mcp_mgr
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
tool_name
:
&
str
,
args_json_str
:
&
str
,
)
->
Result
<
(
String
,
String
),
String
>
{
...
...
@@ -204,7 +220,7 @@ pub(super) async fn execute_mcp_call(
/// Returns false if client disconnected during execution
pub
(
super
)
async
fn
execute_streaming_tool_calls
(
pending_calls
:
Vec
<
FunctionCallInProgress
>
,
active_mcp
:
&
Arc
<
McpClient
Manager
>
,
active_mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
tx
:
&
mpsc
::
UnboundedSender
<
Result
<
Bytes
,
io
::
Error
>>
,
state
:
&
mut
ToolLoopState
,
server_label
:
&
str
,
...
...
@@ -269,7 +285,7 @@ pub(super) async fn execute_streaming_tool_calls(
/// Transform payload to replace MCP tools with function tools for streaming
pub
(
super
)
fn
prepare_mcp_payload_for_streaming
(
payload
:
&
mut
Value
,
active_mcp
:
&
Arc
<
McpClient
Manager
>
,
active_mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
)
{
if
let
Some
(
obj
)
=
payload
.as_object_mut
()
{
// Remove any non-function tools from outgoing payload
...
...
@@ -377,7 +393,7 @@ pub(super) fn build_resume_payload(
/// Returns false if client disconnected
pub
(
super
)
fn
send_mcp_list_tools_events
(
tx
:
&
mpsc
::
UnboundedSender
<
Result
<
Bytes
,
io
::
Error
>>
,
mcp
:
&
Arc
<
McpClient
Manager
>
,
mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
server_label
:
&
str
,
output_index
:
usize
,
sequence_number
:
&
mut
u64
,
...
...
@@ -533,7 +549,7 @@ pub(super) fn send_mcp_call_completion_events_with_error(
pub
(
super
)
fn
inject_mcp_metadata_streaming
(
response
:
&
mut
Value
,
state
:
&
ToolLoopState
,
mcp
:
&
Arc
<
McpClient
Manager
>
,
mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
server_label
:
&
str
,
)
{
if
let
Some
(
output_array
)
=
response
.get_mut
(
"output"
)
.and_then
(|
v
|
v
.as_array_mut
())
{
...
...
@@ -573,7 +589,7 @@ pub(super) async fn execute_tool_loop(
headers
:
Option
<&
HeaderMap
>
,
initial_payload
:
Value
,
original_body
:
&
ResponsesRequest
,
active_mcp
:
&
Arc
<
McpClient
Manager
>
,
active_mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
config
:
&
McpLoopConfig
,
)
->
Result
<
Value
,
String
>
{
let
mut
state
=
ToolLoopState
::
new
(
original_body
.input
.clone
());
...
...
@@ -734,7 +750,7 @@ pub(super) fn build_incomplete_response(
mut
response
:
Value
,
state
:
ToolLoopState
,
reason
:
&
str
,
active_mcp
:
&
Arc
<
McpClient
Manager
>
,
active_mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
original_body
:
&
ResponsesRequest
,
)
->
Result
<
Value
,
String
>
{
let
obj
=
response
...
...
@@ -837,7 +853,7 @@ pub(super) fn build_incomplete_response(
// ============================================================================
/// Build an mcp_list_tools output item
pub
(
super
)
fn
build_mcp_list_tools_item
(
mcp
:
&
Arc
<
McpClient
Manager
>
,
server_label
:
&
str
)
->
Value
{
pub
(
super
)
fn
build_mcp_list_tools_item
(
mcp
:
&
Arc
<
mcp
::
Mcp
Manager
>
,
server_label
:
&
str
)
->
Value
{
let
tools
=
mcp
.list_tools
();
let
tools_json
:
Vec
<
Value
>
=
tools
.iter
()
...
...
sgl-router/src/routers/openai/router.rs
View file @
212f5e48
...
...
@@ -28,7 +28,7 @@ use super::conversations::{
};
use
super
::{
mcp
::{
e
xecute_tool_loop
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
e
nsure_request_mcp_client
,
execute_tool_loop
,
prepare_mcp_payload_for_streaming
,
McpLoopConfig
,
},
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
},
...
...
@@ -36,12 +36,12 @@ use super::{
utils
::{
apply_provider_headers
,
extract_auth_header
,
probe_endpoint_for_model
},
};
use
crate
::{
config
::
CircuitBreakerConfig
,
core
::{
CircuitBreaker
,
CircuitBreakerConfig
as
CoreCircuitBreakerConfig
},
data_connector
::{
ConversationId
,
ListParams
,
ResponseId
,
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
SortOrder
,
},
mcp
::
McpManager
,
protocols
::{
chat
::
ChatCompletionRequest
,
classify
::
ClassifyRequest
,
...
...
@@ -86,8 +86,8 @@ pub struct OpenAIRouter {
conversation_storage
:
SharedConversationStorage
,
/// Conversation item storage backend
conversation_item_storage
:
SharedConversationItemStorage
,
///
Optional
MCP manager (
enabled via config presence
)
mcp_manager
:
Option
<
Arc
<
crate
::
mcp
::
McpClient
Manager
>
>
,
/// MCP manager (
handles both static and dynamic servers
)
mcp_manager
:
Arc
<
Mcp
Manager
>
,
}
impl
std
::
fmt
::
Debug
for
OpenAIRouter
{
...
...
@@ -109,15 +109,10 @@ impl OpenAIRouter {
/// Create a new OpenAI router
pub
async
fn
new
(
worker_urls
:
Vec
<
String
>
,
circuit_breaker_config
:
Option
<
CircuitBreakerConfig
>
,
response_storage
:
SharedResponseStorage
,
conversation_storage
:
SharedConversationStorage
,
conversation_item_storage
:
SharedConversationItemStorage
,
ctx
:
&
Arc
<
crate
::
app_context
::
AppContext
>
,
)
->
Result
<
Self
,
String
>
{
let
client
=
reqwest
::
Client
::
builder
()
.timeout
(
Duration
::
from_secs
(
300
))
.build
()
.map_err
(|
e
|
format!
(
"Failed to create HTTP client: {}"
,
e
))
?
;
// Use HTTP client from AppContext
let
client
=
ctx
.client
.clone
();
// Normalize URLs (remove trailing slashes)
let
worker_urls
:
Vec
<
String
>
=
worker_urls
...
...
@@ -125,37 +120,23 @@ impl OpenAIRouter {
.map
(|
url
|
url
.trim_end_matches
(
'/'
)
.to_string
())
.collect
();
// Convert circuit breaker config
let
core_cb_config
=
circuit_breaker_config
.map
(|
cb
|
CoreCircuitBreakerConfig
{
failure_threshold
:
cb
.failure_threshold
,
success_threshold
:
cb
.success_threshold
,
timeout_duration
:
Duration
::
from_secs
(
cb
.timeout_duration_secs
),
window_duration
:
Duration
::
from_secs
(
cb
.window_duration_secs
),
})
.unwrap_or_default
();
// Convert circuit breaker config from AppContext
let
cb
=
&
ctx
.router_config.circuit_breaker
;
let
core_cb_config
=
CoreCircuitBreakerConfig
{
failure_threshold
:
cb
.failure_threshold
,
success_threshold
:
cb
.success_threshold
,
timeout_duration
:
Duration
::
from_secs
(
cb
.timeout_duration_secs
),
window_duration
:
Duration
::
from_secs
(
cb
.window_duration_secs
),
};
let
circuit_breaker
=
CircuitBreaker
::
with_config
(
core_cb_config
);
// Optional MCP manager activation via env var path (config-driven gate)
let
mcp_manager
=
match
std
::
env
::
var
(
"SGLANG_MCP_CONFIG"
)
.ok
()
{
Some
(
path
)
if
!
path
.trim
()
.is_empty
()
=>
{
match
crate
::
mcp
::
McpConfig
::
from_file
(
&
path
)
.await
{
Ok
(
cfg
)
=>
match
crate
::
mcp
::
McpClientManager
::
new
(
cfg
)
.await
{
Ok
(
mgr
)
=>
Some
(
Arc
::
new
(
mgr
)),
Err
(
err
)
=>
{
warn!
(
"Failed to initialize MCP manager: {}"
,
err
);
None
}
},
Err
(
err
)
=>
{
warn!
(
"Failed to load MCP config from '{}': {}"
,
path
,
err
);
None
}
}
}
_
=>
None
,
};
// Get MCP manager from AppContext (must be initialized)
let
mcp_manager
=
ctx
.mcp_manager
.get
()
.ok_or_else
(||
"MCP manager not initialized in AppContext"
.to_string
())
?
.clone
();
Ok
(
Self
{
client
,
...
...
@@ -163,9 +144,9 @@ impl OpenAIRouter {
model_cache
:
Arc
::
new
(
DashMap
::
new
()),
circuit_breaker
,
healthy
:
AtomicBool
::
new
(
true
),
response_storage
,
conversation_storage
,
conversation_item_storage
,
response_storage
:
ctx
.response_storage
.clone
()
,
conversation_storage
:
ctx
.conversation_storage
.clone
()
,
conversation_item_storage
:
ctx
.conversation_item_storage
.clone
()
,
mcp_manager
,
})
}
...
...
@@ -241,12 +222,17 @@ impl OpenAIRouter {
original_previous_response_id
:
Option
<
String
>
,
)
->
Response
{
// Check if MCP is active for this request
let
req_mcp_manager
=
if
let
Some
(
ref
tools
)
=
original_body
.tools
{
mcp_manager_from_request_tools
(
tools
.as_slice
())
.await
}
else
{
// Ensure dynamic client is created if needed
if
let
Some
(
ref
tools
)
=
original_body
.tools
{
ensure_request_mcp_client
(
&
self
.mcp_manager
,
tools
.as_slice
())
.await
;
}
// Use the tool loop if the manager has any tools available (static or dynamic).
let
active_mcp
=
if
self
.mcp_manager
.list_tools
()
.is_empty
()
{
None
}
else
{
Some
(
&
self
.mcp_manager
)
};
let
active_mcp
=
req_mcp_manager
.as_ref
()
.or
(
self
.mcp_manager
.as_ref
());
let
mut
response_json
:
Value
;
...
...
@@ -984,7 +970,7 @@ impl crate::routers::RouterTrait for OpenAIRouter {
handle_streaming_response
(
&
self
.client
,
&
self
.circuit_breaker
,
self
.mcp_manager
.as_ref
(
),
Some
(
&
self
.mcp_manager
),
self
.response_storage
.clone
(),
self
.conversation_storage
.clone
(),
self
.conversation_item_storage
.clone
(),
...
...
sgl-router/src/routers/openai/streaming.rs
View file @
212f5e48
...
...
@@ -25,8 +25,8 @@ use tracing::warn;
use
super
::
conversations
::
persist_conversation_items
;
use
super
::{
mcp
::{
build_resume_payload
,
execute_streaming_tool_calls
,
inject_mcp_metadata_streaming
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
build_resume_payload
,
ensure_request_mcp_client
,
execute_streaming_tool_calls
,
inject_mcp_metadata_streaming
,
prepare_mcp_payload_for_streaming
,
send_mcp_list_tools_events
,
McpLoopConfig
,
ToolLoopState
,
},
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
,
rewrite_streaming_block
},
...
...
@@ -907,7 +907,7 @@ pub(super) fn send_final_response_event(
tx
:
&
mpsc
::
UnboundedSender
<
Result
<
Bytes
,
io
::
Error
>>
,
sequence_number
:
&
mut
u64
,
state
:
&
ToolLoopState
,
active_mcp
:
Option
<&
Arc
<
crate
::
mcp
::
Mcp
Client
Manager
>>
,
active_mcp
:
Option
<&
Arc
<
crate
::
mcp
::
McpManager
>>
,
original_request
:
&
ResponsesRequest
,
previous_response_id
:
Option
<&
str
>
,
server_label
:
&
str
,
...
...
@@ -1138,7 +1138,7 @@ pub(super) async fn handle_streaming_with_tool_interception(
mut
payload
:
Value
,
original_body
:
&
ResponsesRequest
,
original_previous_response_id
:
Option
<
String
>
,
active_mcp
:
&
Arc
<
crate
::
mcp
::
Mcp
Client
Manager
>
,
active_mcp
:
&
Arc
<
crate
::
mcp
::
McpManager
>
,
)
->
Response
{
// Transform MCP tools to function tools in payload
prepare_mcp_payload_for_streaming
(
&
mut
payload
,
active_mcp
);
...
...
@@ -1491,7 +1491,7 @@ pub(super) async fn handle_streaming_with_tool_interception(
pub
(
super
)
async
fn
handle_streaming_response
(
client
:
&
reqwest
::
Client
,
circuit_breaker
:
&
crate
::
core
::
CircuitBreaker
,
mcp_manager
:
Option
<&
Arc
<
crate
::
mcp
::
Mcp
Client
Manager
>>
,
mcp_manager
:
Option
<&
Arc
<
crate
::
mcp
::
McpManager
>>
,
response_storage
:
SharedResponseStorage
,
conversation_storage
:
SharedConversationStorage
,
conversation_item_storage
:
SharedConversationItemStorage
,
...
...
@@ -1502,12 +1502,19 @@ pub(super) async fn handle_streaming_response(
original_previous_response_id
:
Option
<
String
>
,
)
->
Response
{
// Check if MCP is active for this request
let
req_mcp_manager
=
if
let
Some
(
ref
tools
)
=
original_body
.tools
{
mcp_manager_from_request_tools
(
tools
.as_slice
())
.await
}
else
{
None
};
let
active_mcp
=
req_mcp_manager
.as_ref
()
.or
(
mcp_manager
);
// Ensure dynamic client is created if needed
if
let
(
Some
(
manager
),
Some
(
ref
tools
))
=
(
mcp_manager
,
&
original_body
.tools
)
{
ensure_request_mcp_client
(
manager
,
tools
.as_slice
())
.await
;
}
// Use the tool loop if the manager has any tools available (static or dynamic).
let
active_mcp
=
mcp_manager
.and_then
(|
mgr
|
{
if
mgr
.list_tools
()
.is_empty
()
{
None
}
else
{
Some
(
mgr
)
}
});
// If no MCP is active, use simple pass-through streaming
if
active_mcp
.is_none
()
{
...
...
sgl-router/src/server.rs
View file @
212f5e48
...
...
@@ -24,8 +24,8 @@ use crate::{
core
::{
worker_to_info
,
workflow
::{
create_
worker
_registration_workflow
,
create_worker_re
moval_workflow
,
LoggingSubscriber
,
WorkflowEngine
,
create_
mcp
_registration_workflow
,
create_worker_re
gistration_workflow
,
create_worker_removal_workflow
,
LoggingSubscriber
,
WorkflowEngine
,
},
Job
,
JobQueue
,
JobQueueConfig
,
WorkerManager
,
WorkerType
,
},
...
...
@@ -739,11 +739,12 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
engine
.register_workflow
(
create_worker_registration_workflow
());
engine
.register_workflow
(
create_worker_removal_workflow
());
engine
.register_workflow
(
create_mcp_registration_workflow
());
app_context
.workflow_engine
.set
(
engine
)
.expect
(
"WorkflowEngine should only be initialized once"
);
info!
(
"Workflow engine initialized with worker registration
and removal
workflows"
);
info!
(
"Workflow engine initialized with worker
and MCP
registration workflows"
);
info!
(
"Initializing workers for routing mode: {:?}"
,
...
...
@@ -763,6 +764,27 @@ pub async fn startup(config: ServerConfig) -> Result<(), Box<dyn std::error::Err
.await
.map_err
(|
e
|
format!
(
"Failed to submit worker initialization job: {}"
,
e
))
?
;
if
let
Some
(
mcp_config
)
=
&
config
.router_config.mcp_config
{
info!
(
"Found {} MCP server(s) in config"
,
mcp_config
.servers
.len
());
let
mcp_job
=
Job
::
InitializeMcpServers
{
mcp_config
:
Box
::
new
(
mcp_config
.clone
()),
};
job_queue
.submit
(
mcp_job
)
.await
.map_err
(|
e
|
format!
(
"Failed to submit MCP initialization job: {}"
,
e
))
?
;
}
else
{
info!
(
"No MCP config provided, skipping MCP server initialization"
);
}
// Start background refresh for all registered static MCP servers
if
let
Some
(
mcp_manager
)
=
app_context
.mcp_manager
.get
()
{
let
refresh_interval
=
Duration
::
from_secs
(
300
);
// 5 minutes, matches default TTL
let
_
refresh_handle
=
Arc
::
clone
(
mcp_manager
)
.spawn_background_refresh_all
(
refresh_interval
);
info!
(
"Started background refresh for all static MCP servers"
);
}
let
worker_stats
=
app_context
.worker_registry
.stats
();
info!
(
"Workers initialized: {} total, {} healthy"
,
...
...
sgl-router/src/service_discovery.rs
View file @
212f5e48
...
...
@@ -593,6 +593,7 @@ mod tests {
configured_tool_parser
:
None
,
worker_job_queue
:
Arc
::
new
(
std
::
sync
::
OnceLock
::
new
()),
workflow_engine
:
Arc
::
new
(
std
::
sync
::
OnceLock
::
new
()),
mcp_manager
:
Arc
::
new
(
std
::
sync
::
OnceLock
::
new
()),
})
}
...
...
sgl-router/tests/api_endpoints_test.rs
View file @
212f5e48
...
...
@@ -90,7 +90,7 @@ impl TestContext {
.unwrap
();
// Create app context
let
app_context
=
common
::
create_test_context
(
config
.clone
());
let
app_context
=
common
::
create_test_context
(
config
.clone
())
.await
;
// Submit worker initialization job (same as real server does)
if
!
worker_urls
.is_empty
()
{
...
...
@@ -1538,7 +1538,7 @@ mod pd_mode_tests {
.build_unchecked
();
// Create app context
let
app_context
=
common
::
create_test_context
(
config
);
let
app_context
=
common
::
create_test_context
(
config
)
.await
;
// Create router - this might fail due to health check issues
let
router_result
=
RouterFactory
::
create_router
(
&
app_context
)
.await
;
...
...
sgl-router/tests/common/mod.rs
View file @
212f5e48
...
...
@@ -27,7 +27,7 @@ use sglang_router_rs::{
};
/// Helper function to create AppContext for tests
pub
fn
create_test_context
(
config
:
RouterConfig
)
->
Arc
<
AppContext
>
{
pub
async
fn
create_test_context
(
config
:
RouterConfig
)
->
Arc
<
AppContext
>
{
let
client
=
reqwest
::
Client
::
new
();
// Initialize rate limiter
...
...
@@ -62,9 +62,10 @@ pub fn create_test_context(config: RouterConfig) -> Arc<AppContext> {
config
.worker_startup_check_interval_secs
,
)));
// Create empty OnceLock for worker job queue
and
workflow engine
// Create empty OnceLock for worker job queue
,
workflow engine
, and mcp manager
let
worker_job_queue
=
Arc
::
new
(
OnceLock
::
new
());
let
workflow_engine
=
Arc
::
new
(
OnceLock
::
new
());
let
mcp_manager_lock
=
Arc
::
new
(
OnceLock
::
new
());
let
app_context
=
Arc
::
new
(
AppContext
::
builder
()
...
...
@@ -82,6 +83,7 @@ pub fn create_test_context(config: RouterConfig) -> Arc<AppContext> {
.load_monitor
(
load_monitor
)
.worker_job_queue
(
worker_job_queue
)
.workflow_engine
(
workflow_engine
)
.mcp_manager
(
mcp_manager_lock
)
.build
()
.unwrap
(),
);
...
...
@@ -109,6 +111,130 @@ pub fn create_test_context(config: RouterConfig) -> Arc<AppContext> {
.set
(
engine
)
.expect
(
"WorkflowEngine should only be initialized once"
);
// Initialize MCP manager with empty config
use
sglang_router_rs
::
mcp
::{
McpConfig
,
McpManager
};
let
empty_config
=
McpConfig
{
servers
:
vec!
[],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
vec!
[],
inventory
:
Default
::
default
(),
};
let
mcp_manager
=
McpManager
::
with_defaults
(
empty_config
)
.await
.expect
(
"Failed to create MCP manager"
);
app_context
.mcp_manager
.set
(
Arc
::
new
(
mcp_manager
))
.ok
()
.expect
(
"McpManager should only be initialized once"
);
app_context
}
/// Helper function to create AppContext for tests with MCP config from file
pub
async
fn
create_test_context_with_mcp_config
(
config
:
RouterConfig
,
mcp_config_path
:
&
str
,
)
->
Arc
<
AppContext
>
{
use
sglang_router_rs
::
mcp
::{
McpConfig
,
McpManager
};
let
client
=
reqwest
::
Client
::
new
();
// Initialize rate limiter
let
rate_limiter
=
match
config
.max_concurrent_requests
{
n
if
n
<=
0
=>
None
,
n
=>
{
let
rate_limit_tokens
=
config
.rate_limit_tokens_per_second
.filter
(|
&
t
|
t
>
0
)
.unwrap_or
(
n
);
Some
(
Arc
::
new
(
TokenBucket
::
new
(
n
as
usize
,
rate_limit_tokens
as
usize
,
)))
}
};
// Initialize registries
let
worker_registry
=
Arc
::
new
(
WorkerRegistry
::
new
());
let
policy_registry
=
Arc
::
new
(
PolicyRegistry
::
new
(
config
.policy
.clone
()));
// Initialize storage backends (Memory for tests)
let
response_storage
=
Arc
::
new
(
MemoryResponseStorage
::
new
());
let
conversation_storage
=
Arc
::
new
(
MemoryConversationStorage
::
new
());
let
conversation_item_storage
=
Arc
::
new
(
MemoryConversationItemStorage
::
new
());
// Initialize load monitor
let
load_monitor
=
Some
(
Arc
::
new
(
LoadMonitor
::
new
(
worker_registry
.clone
(),
policy_registry
.clone
(),
client
.clone
(),
config
.worker_startup_check_interval_secs
,
)));
// Create empty OnceLock for worker job queue, workflow engine, and mcp manager
let
worker_job_queue
=
Arc
::
new
(
OnceLock
::
new
());
let
workflow_engine
=
Arc
::
new
(
OnceLock
::
new
());
let
mcp_manager_lock
=
Arc
::
new
(
OnceLock
::
new
());
let
app_context
=
Arc
::
new
(
AppContext
::
builder
()
.router_config
(
config
)
.client
(
client
)
.rate_limiter
(
rate_limiter
)
.tokenizer
(
None
)
// tokenizer
.reasoning_parser_factory
(
None
)
// reasoning_parser_factory
.tool_parser_factory
(
None
)
// tool_parser_factory
.worker_registry
(
worker_registry
)
.policy_registry
(
policy_registry
)
.response_storage
(
response_storage
)
.conversation_storage
(
conversation_storage
)
.conversation_item_storage
(
conversation_item_storage
)
.load_monitor
(
load_monitor
)
.worker_job_queue
(
worker_job_queue
)
.workflow_engine
(
workflow_engine
)
.mcp_manager
(
mcp_manager_lock
)
.build
()
.unwrap
(),
);
// Initialize JobQueue after AppContext is created
let
weak_context
=
Arc
::
downgrade
(
&
app_context
);
let
job_queue
=
sglang_router_rs
::
core
::
JobQueue
::
new
(
sglang_router_rs
::
core
::
JobQueueConfig
::
default
(),
weak_context
,
);
app_context
.worker_job_queue
.set
(
job_queue
)
.expect
(
"JobQueue should only be initialized once"
);
// Initialize WorkflowEngine and register workflows
use
sglang_router_rs
::
core
::
workflow
::{
create_worker_registration_workflow
,
create_worker_removal_workflow
,
WorkflowEngine
,
};
let
engine
=
Arc
::
new
(
WorkflowEngine
::
new
());
engine
.register_workflow
(
create_worker_registration_workflow
());
engine
.register_workflow
(
create_worker_removal_workflow
());
app_context
.workflow_engine
.set
(
engine
)
.expect
(
"WorkflowEngine should only be initialized once"
);
// Initialize MCP manager from config file
let
mcp_config
=
McpConfig
::
from_file
(
mcp_config_path
)
.await
.expect
(
"Failed to load MCP config from file"
);
let
mcp_manager
=
McpManager
::
with_defaults
(
mcp_config
)
.await
.expect
(
"Failed to create MCP manager"
);
app_context
.mcp_manager
.set
(
Arc
::
new
(
mcp_manager
))
.ok
()
.expect
(
"McpManager should only be initialized once"
);
app_context
}
...
...
sgl-router/tests/common/test_app.rs
View file @
212f5e48
...
...
@@ -9,6 +9,7 @@ use sglang_router_rs::{
data_connector
::{
MemoryConversationItemStorage
,
MemoryConversationStorage
,
MemoryResponseStorage
,
},
mcp
::{
McpConfig
,
McpManager
},
middleware
::{
AuthConfig
,
TokenBucket
},
policies
::
PolicyRegistry
,
routers
::
RouterTrait
,
...
...
@@ -153,3 +154,58 @@ pub fn create_test_app_with_context(
router_config
.cors_allowed_origins
.clone
(),
)
}
/// Create a minimal test AppContext for unit tests
#[allow(dead_code)]
pub
async
fn
create_test_app_context
()
->
Arc
<
AppContext
>
{
let
router_config
=
RouterConfig
::
default
();
let
client
=
Client
::
new
();
// Initialize empty OnceLocks
let
worker_job_queue
=
Arc
::
new
(
OnceLock
::
new
());
let
workflow_engine
=
Arc
::
new
(
OnceLock
::
new
());
// Initialize MCP manager with empty config
let
mcp_manager_lock
=
Arc
::
new
(
OnceLock
::
new
());
let
empty_config
=
McpConfig
{
servers
:
vec!
[],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
vec!
[],
inventory
:
Default
::
default
(),
};
let
mcp_manager
=
McpManager
::
with_defaults
(
empty_config
)
.await
.expect
(
"Failed to create MCP manager"
);
mcp_manager_lock
.set
(
Arc
::
new
(
mcp_manager
))
.ok
();
// Initialize registries
let
worker_registry
=
Arc
::
new
(
WorkerRegistry
::
new
());
let
policy_registry
=
Arc
::
new
(
PolicyRegistry
::
new
(
router_config
.policy
.clone
()));
// Initialize storage backends
let
response_storage
=
Arc
::
new
(
MemoryResponseStorage
::
new
());
let
conversation_storage
=
Arc
::
new
(
MemoryConversationStorage
::
new
());
let
conversation_item_storage
=
Arc
::
new
(
MemoryConversationItemStorage
::
new
());
Arc
::
new
(
AppContext
::
builder
()
.router_config
(
router_config
)
.client
(
client
)
.rate_limiter
(
None
)
.tokenizer
(
None
)
.reasoning_parser_factory
(
None
)
.tool_parser_factory
(
None
)
.worker_registry
(
worker_registry
)
.policy_registry
(
policy_registry
)
.response_storage
(
response_storage
)
.conversation_storage
(
conversation_storage
)
.conversation_item_storage
(
conversation_item_storage
)
.load_monitor
(
None
)
.worker_job_queue
(
worker_job_queue
)
.workflow_engine
(
workflow_engine
)
.mcp_manager
(
mcp_manager_lock
)
.build
()
.unwrap
(),
)
}
sgl-router/tests/mcp_test.rs
View file @
212f5e48
...
...
@@ -13,7 +13,7 @@ use std::collections::HashMap;
use
common
::
mock_mcp_server
::
MockMCPServer
;
use
serde_json
::
json
;
use
sglang_router_rs
::
mcp
::{
McpClientManager
,
McpConfig
,
McpError
,
McpServerConfig
,
McpTransport
};
use
sglang_router_rs
::
mcp
::{
McpConfig
,
McpError
,
McpManager
,
McpServerConfig
,
McpTransport
};
/// Create a new mock server for testing (each test gets its own)
async
fn
create_mock_server
()
->
MockMCPServer
{
...
...
@@ -26,11 +26,23 @@ async fn create_mock_server() -> MockMCPServer {
#[tokio::test]
async
fn
test_mcp_server_initialization
()
{
let
config
=
McpConfig
{
servers
:
vec!
[]
};
let
config
=
McpConfig
{
servers
:
vec!
[],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
// Should fail with no servers
let
result
=
McpClientManager
::
new
(
config
)
.await
;
assert
!
(
result
.is_err
(),
"Should fail with no servers configured"
);
// Should succeed but with no connected servers (empty config is allowed)
let
result
=
McpManager
::
with_defaults
(
config
)
.await
;
assert
!
(
result
.is_ok
(),
"Should succeed with empty config"
);
let
manager
=
result
.unwrap
();
let
servers
=
manager
.list_servers
();
assert_eq!
(
servers
.len
(),
0
,
"Should have no servers"
);
let
tools
=
manager
.list_tools
();
assert_eq!
(
tools
.len
(),
0
,
"Should have no tools"
);
}
#[tokio::test]
...
...
@@ -44,13 +56,19 @@ async fn test_server_connection_with_mock() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
result
=
Mcp
Client
Manager
::
new
(
config
)
.await
;
let
result
=
McpManager
::
with_defaults
(
config
)
.await
;
assert
!
(
result
.is_ok
(),
"Should connect to mock server"
);
let
mut
manager
=
result
.unwrap
();
let
manager
=
result
.unwrap
();
let
servers
=
manager
.list_servers
();
assert_eq!
(
servers
.len
(),
1
);
...
...
@@ -76,10 +94,16 @@ async fn test_tool_availability_checking() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
mut
manager
=
Mcp
Client
Manager
::
new
(
config
)
.await
.unwrap
();
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.unwrap
();
let
test_tools
=
vec!
[
"brave_web_search"
,
"brave_local_search"
,
"calculator"
];
for
tool
in
test_tools
{
...
...
@@ -119,6 +143,8 @@ async fn test_multi_server_connection() {
url
:
mock_server1
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
},
McpServerConfig
{
name
:
"mock_server_2"
.to_string
(),
...
...
@@ -126,15 +152,21 @@ async fn test_multi_server_connection() {
url
:
mock_server2
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
},
],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
// Note: This will fail to connect to both servers in the current implementation
// since they return the same tools. The manager will connect to the first one.
let
result
=
Mcp
Client
Manager
::
new
(
config
)
.await
;
let
result
=
McpManager
::
with_defaults
(
config
)
.await
;
if
let
Ok
(
mut
manager
)
=
result
{
if
let
Ok
(
manager
)
=
result
{
let
servers
=
manager
.list_servers
();
assert
!
(
!
servers
.is_empty
(),
"Should have at least one server"
);
...
...
@@ -156,10 +188,16 @@ async fn test_tool_execution_with_mock() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
mut
manager
=
Mcp
Client
Manager
::
new
(
config
)
.await
.unwrap
();
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.unwrap
();
let
result
=
manager
.call_tool
(
...
...
@@ -207,10 +245,16 @@ async fn test_concurrent_tool_execution() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
mut
manager
=
Mcp
Client
Manager
::
new
(
config
)
.await
.unwrap
();
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.unwrap
();
// Execute tools sequentially (true concurrent execution would require Arc<Mutex>)
let
tool_calls
=
vec!
[
...
...
@@ -244,10 +288,16 @@ async fn test_tool_execution_errors() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
mut
manager
=
Mcp
Client
Manager
::
new
(
config
)
.await
.unwrap
();
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.unwrap
();
// Try to call unknown tool
let
result
=
manager
...
...
@@ -275,23 +325,25 @@ async fn test_connection_without_server() {
args
:
vec!
[],
envs
:
HashMap
::
new
(),
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
result
=
McpClientManager
::
new
(
config
)
.await
;
assert
!
(
result
.is_err
(),
"Should fail when no server is running"
);
if
let
Err
(
e
)
=
result
{
let
error_msg
=
e
.to_string
();
assert
!
(
error_msg
.contains
(
"Failed to connect"
)
||
error_msg
.contains
(
"Connection"
)
||
error_msg
.contains
(
"failed"
)
||
error_msg
.contains
(
"error"
),
"Error should indicate failure: {}"
,
error_msg
);
}
let
result
=
McpManager
::
with_defaults
(
config
)
.await
;
// Manager succeeds but no servers are connected (errors are logged)
assert
!
(
result
.is_ok
(),
"Manager should succeed even if servers fail to connect"
);
let
manager
=
result
.unwrap
();
let
servers
=
manager
.list_servers
();
assert_eq!
(
servers
.len
(),
0
,
"Should have no connected servers"
);
}
// Schema Validation Tests
...
...
@@ -307,10 +359,16 @@ async fn test_tool_info_structure() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
let
manager
=
Mcp
Client
Manager
::
new
(
config
)
.await
.unwrap
();
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.unwrap
();
let
tools
=
manager
.list_tools
();
let
brave_search
=
tools
...
...
@@ -337,12 +395,25 @@ async fn test_sse_connection() {
args
:
vec!
[
"--sse"
.to_string
()],
envs
:
HashMap
::
new
(),
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
// This will fail immediately without retry
let
result
=
McpClientManager
::
new
(
config
)
.await
;
assert
!
(
result
.is_err
(),
"Should fail for non-existent SSE server"
);
// Manager succeeds but no servers are connected (errors are logged)
let
result
=
McpManager
::
with_defaults
(
config
)
.await
;
assert
!
(
result
.is_ok
(),
"Manager should succeed even if SSE server fails to connect"
);
let
manager
=
result
.unwrap
();
let
servers
=
manager
.list_servers
();
assert_eq!
(
servers
.len
(),
0
,
"Should have no connected servers"
);
}
// Connection Type Tests
...
...
@@ -356,6 +427,8 @@ async fn test_transport_types() {
url
:
"http://localhost:8080/mcp"
.to_string
(),
token
:
Some
(
"auth_token"
.to_string
()),
},
proxy
:
None
,
required
:
false
,
};
assert_eq!
(
http_config
.name
,
"http_server"
);
...
...
@@ -366,6 +439,8 @@ async fn test_transport_types() {
url
:
"http://localhost:8081/sse"
.to_string
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
};
assert_eq!
(
sse_config
.name
,
"sse_server"
);
...
...
@@ -377,6 +452,8 @@ async fn test_transport_types() {
args
:
vec!
[
"--port"
.to_string
(),
"8082"
.to_string
()],
envs
:
HashMap
::
new
(),
},
proxy
:
None
,
required
:
false
,
};
assert_eq!
(
stdio_config
.name
,
"stdio_server"
);
}
...
...
@@ -395,11 +472,17 @@ async fn test_complete_workflow() {
url
:
mock_server
.url
(),
token
:
None
,
},
proxy
:
None
,
required
:
false
,
}],
pool
:
Default
::
default
(),
proxy
:
None
,
warmup
:
Vec
::
new
(),
inventory
:
Default
::
default
(),
};
// 2. Connect to server
let
mut
manager
=
Mcp
Client
Manager
::
new
(
config
)
let
manager
=
McpManager
::
with_defaults
(
config
)
.await
.expect
(
"Should connect to mock server"
);
...
...
sgl-router/tests/request_formats_test.rs
View file @
212f5e48
...
...
@@ -44,7 +44,7 @@ impl TestContext {
worker_urls
:
worker_urls
.clone
(),
};
let
app_context
=
common
::
create_test_context
(
config
.clone
());
let
app_context
=
common
::
create_test_context
(
config
.clone
())
.await
;
let
router
=
RouterFactory
::
create_router
(
&
app_context
)
.await
.unwrap
();
let
router
=
Arc
::
from
(
router
);
...
...
sgl-router/tests/responses_api_test.rs
View file @
212f5e48
...
...
@@ -55,8 +55,9 @@ async fn test_non_streaming_mcp_minimal_e2e_with_persistence() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
// Create router and context
let
ctx
=
common
::
create_test_context
(
router_cfg
);
// Create router and context with MCP config from file
let
ctx
=
common
::
create_test_context_with_mcp_config
(
router_cfg
,
cfg_path
.to_str
()
.unwrap
())
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Build a simple ResponsesRequest that will trigger the tool call
...
...
@@ -230,7 +231,7 @@ async fn test_conversations_crud_basic() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create
...
...
@@ -540,7 +541,7 @@ async fn test_multi_turn_loop_with_mcp() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Build request with MCP tools
...
...
@@ -691,7 +692,7 @@ async fn test_max_tool_calls_limit() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
let
req
=
ResponsesRequest
{
...
...
@@ -808,7 +809,8 @@ async fn setup_streaming_mcp_test() -> (
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context_with_mcp_config
(
router_cfg
,
cfg_path
.to_str
()
.unwrap
())
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
(
mcp
,
worker
,
router
,
dir
)
...
...
@@ -1224,7 +1226,7 @@ async fn test_conversation_items_create_and_get() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create conversation
...
...
@@ -1300,7 +1302,7 @@ async fn test_conversation_items_delete() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create conversation
...
...
@@ -1382,7 +1384,7 @@ async fn test_conversation_items_max_limit() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create conversation
...
...
@@ -1434,7 +1436,7 @@ async fn test_conversation_items_unsupported_type() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create conversation
...
...
@@ -1485,7 +1487,7 @@ async fn test_conversation_items_multi_conversation_sharing() {
.queue_timeout_secs
(
5
)
.build_unchecked
();
let
ctx
=
common
::
create_test_context
(
router_cfg
);
let
ctx
=
common
::
create_test_context
(
router_cfg
)
.await
;
let
router
=
RouterFactory
::
create_router
(
&
ctx
)
.await
.expect
(
"router"
);
// Create two conversations
...
...
sgl-router/tests/streaming_tests.rs
View file @
212f5e48
...
...
@@ -45,7 +45,7 @@ impl TestContext {
worker_urls
:
worker_urls
.clone
(),
};
let
app_context
=
common
::
create_test_context
(
config
.clone
());
let
app_context
=
common
::
create_test_context
(
config
.clone
())
.await
;
let
router
=
RouterFactory
::
create_router
(
&
app_context
)
.await
.unwrap
();
let
router
=
Arc
::
from
(
router
);
...
...
sgl-router/tests/test_openai_routing.rs
View file @
212f5e48
...
...
@@ -21,10 +21,7 @@ use sglang_router_rs::{
config
::{
ConfigError
,
ConfigValidator
,
HistoryBackend
,
OracleConfig
,
RouterConfig
,
RoutingMode
,
},
data_connector
::{
MemoryConversationItemStorage
,
MemoryConversationStorage
,
MemoryResponseStorage
,
ResponseId
,
ResponseStorage
,
StoredResponse
,
},
data_connector
::{
ResponseId
,
StoredResponse
},
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatMessage
,
UserMessageContent
},
common
::
StringOrArray
,
...
...
@@ -98,14 +95,8 @@ fn create_minimal_completion_request() -> CompletionRequest {
/// Test basic OpenAI router creation and configuration
#[tokio::test]
async
fn
test_openai_router_creation
()
{
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
;
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
&
ctx
)
.await
;
assert
!
(
router
.is_ok
(),
"Router creation should succeed"
);
...
...
@@ -117,15 +108,10 @@ async fn test_openai_router_creation() {
/// Test server info endpoint
#[tokio::test]
async
fn
test_openai_router_server_info
()
{
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
&
ctx
)
.await
.unwrap
();
let
req
=
Request
::
builder
()
.method
(
Method
::
GET
)
...
...
@@ -148,15 +134,10 @@ async fn test_openai_router_server_info() {
async
fn
test_openai_router_models
()
{
// Use mock server for deterministic models response
let
mock_server
=
MockOpenAIServer
::
new
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
mock_server
.base_url
()],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
mock_server
.base_url
()],
&
ctx
)
.await
.unwrap
();
let
req
=
Request
::
builder
()
.method
(
Method
::
GET
)
...
...
@@ -226,17 +207,12 @@ async fn test_openai_router_responses_with_mock() {
});
let
base_url
=
format!
(
"http://{}"
,
addr
);
let
storage
=
Arc
::
new
(
MemoryResponseStorage
::
new
());
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
None
,
storage
.clone
(),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
&
ctx
)
.await
.unwrap
();
// Get storage from context (router uses this, not a separate storage)
let
storage
=
ctx
.response_storage
.clone
();
let
request1
=
ResponsesRequest
{
model
:
"gpt-4o-mini"
.to_string
(),
...
...
@@ -495,25 +471,18 @@ async fn test_openai_router_responses_streaming_with_mock() {
});
let
base_url
=
format!
(
"http://{}"
,
addr
);
let
storage
=
Arc
::
new
(
MemoryResponseStorage
::
new
());
// Seed a previous response so previous_response_id logic has data to pull from.
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
&
ctx
)
.await
.unwrap
();
// Get storage from context and seed a previous response
let
storage
=
ctx
.response_storage
.clone
();
let
mut
previous
=
StoredResponse
::
new
(
None
);
previous
.id
=
ResponseId
::
from
(
"resp_prev_chain"
);
previous
.input
=
serde_json
::
json!
(
"Earlier bedtime question"
);
previous
.output
=
serde_json
::
json!
(
"Earlier answer"
);
storage
.store_response
(
previous
)
.await
.unwrap
();
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
None
,
storage
.clone
(),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
mut
metadata
=
HashMap
::
new
();
metadata
.insert
(
"topic"
.to_string
(),
json!
(
"unicorns"
));
...
...
@@ -611,7 +580,7 @@ async fn test_router_factory_openai_mode() {
let
router_config
=
RouterConfig
::
new
(
routing_mode
,
sglang_router_rs
::
config
::
PolicyConfig
::
Random
);
let
app_context
=
common
::
create_test_context
(
router_config
);
let
app_context
=
common
::
create_test_context
(
router_config
)
.await
;
let
router
=
sglang_router_rs
::
routers
::
RouterFactory
::
create_router
(
&
app_context
)
.await
;
assert
!
(
...
...
@@ -626,15 +595,10 @@ async fn test_router_factory_openai_mode() {
/// Test that unsupported endpoints return proper error codes
#[tokio::test]
async
fn
test_unsupported_endpoints
()
{
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
"https://api.openai.com"
.to_string
()],
&
ctx
)
.await
.unwrap
();
let
generate_request
=
GenerateRequest
{
text
:
Some
(
"Hello world"
.to_string
()),
...
...
@@ -690,16 +654,9 @@ async fn test_openai_router_chat_completion_with_mock() {
let
mock_server
=
MockOpenAIServer
::
new
()
.await
;
let
base_url
=
mock_server
.base_url
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
// Create router pointing to mock server
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
&
ctx
)
.await
.unwrap
();
// Create a minimal chat completion request
let
mut
chat_request
=
create_minimal_chat_request
();
...
...
@@ -732,16 +689,9 @@ async fn test_openai_e2e_with_server() {
let
mock_server
=
MockOpenAIServer
::
new
()
.await
;
let
base_url
=
mock_server
.base_url
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
// Create router
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
&
ctx
)
.await
.unwrap
();
// Create Axum app with chat completions endpoint
let
app
=
Router
::
new
()
.route
(
...
...
@@ -804,15 +754,8 @@ async fn test_openai_e2e_with_server() {
async
fn
test_openai_router_chat_streaming_with_mock
()
{
let
mock_server
=
MockOpenAIServer
::
new
()
.await
;
let
base_url
=
mock_server
.base_url
();
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
base_url
],
&
ctx
)
.await
.unwrap
();
// Build a streaming chat request
let
val
=
json!
({
...
...
@@ -850,23 +793,10 @@ async fn test_openai_router_chat_streaming_with_mock() {
/// Test circuit breaker functionality
#[tokio::test]
async
fn
test_openai_router_circuit_breaker
()
{
// Create router with circuit breaker config
let
cb_config
=
sglang_router_rs
::
config
::
CircuitBreakerConfig
{
failure_threshold
:
2
,
success_threshold
:
1
,
timeout_duration_secs
:
1
,
window_duration_secs
:
10
,
};
let
router
=
OpenAIRouter
::
new
(
vec!
[
"http://invalid-url-that-will-fail"
.to_string
()],
Some
(
cb_config
),
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
"http://invalid-url-that-will-fail"
.to_string
()],
&
ctx
)
.await
.unwrap
();
let
chat_request
=
create_minimal_chat_request
();
...
...
@@ -887,15 +817,10 @@ async fn test_openai_router_models_auth_forwarding() {
// Start a mock server that requires Authorization
let
expected_auth
=
"Bearer test-token"
.to_string
();
let
mock_server
=
MockOpenAIServer
::
new_with_auth
(
Some
(
expected_auth
.clone
()))
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
mock_server
.base_url
()],
None
,
Arc
::
new
(
MemoryResponseStorage
::
new
()),
Arc
::
new
(
MemoryConversationStorage
::
new
()),
Arc
::
new
(
MemoryConversationItemStorage
::
new
()),
)
.await
.unwrap
();
let
ctx
=
common
::
test_app
::
create_test_app_context
()
.await
;
let
router
=
OpenAIRouter
::
new
(
vec!
[
mock_server
.base_url
()],
&
ctx
)
.await
.unwrap
();
// 1) Without auth header -> expect 200 with empty model list
// (multi-endpoint aggregation silently skips failed endpoints)
...
...
sgl-router/tests/test_pd_routing.rs
View file @
212f5e48
...
...
@@ -218,9 +218,10 @@ mod test_pd_routing {
config
.worker_startup_check_interval_secs
,
)));
// Create empty OnceLock for worker job queue
and
workflow engine
// Create empty OnceLock for worker job queue
,
workflow engine
, and mcp manager
let
worker_job_queue
=
Arc
::
new
(
OnceLock
::
new
());
let
workflow_engine
=
Arc
::
new
(
OnceLock
::
new
());
let
mcp_manager
=
Arc
::
new
(
OnceLock
::
new
());
Arc
::
new
(
AppContext
::
builder
()
...
...
@@ -238,6 +239,7 @@ mod test_pd_routing {
.load_monitor
(
load_monitor
)
.worker_job_queue
(
worker_job_queue
)
.workflow_engine
(
workflow_engine
)
.mcp_manager
(
mcp_manager
)
.build
()
.unwrap
(),
)
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment