Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
9f5e7018
Unverified
Commit
9f5e7018
authored
Nov 04, 2025
by
Chang Su
Committed by
GitHub
Nov 04, 2025
Browse files
[router][grpc] Implement tool_choice support for Responses API (#12668)
parent
cbf23dbb
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
457 additions
and
184 deletions
+457
-184
sgl-router/src/grpc_client/sglang_scheduler.rs
sgl-router/src/grpc_client/sglang_scheduler.rs
+34
-4
sgl-router/src/protocols/chat.rs
sgl-router/src/protocols/chat.rs
+36
-14
sgl-router/src/protocols/common.rs
sgl-router/src/protocols/common.rs
+78
-4
sgl-router/src/protocols/responses.rs
sgl-router/src/protocols/responses.rs
+78
-0
sgl-router/src/routers/grpc/common/responses/utils.rs
sgl-router/src/routers/grpc/common/responses/utils.rs
+48
-1
sgl-router/src/routers/grpc/harmony/responses.rs
sgl-router/src/routers/grpc/harmony/responses.rs
+6
-18
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
+24
-5
sgl-router/src/routers/grpc/harmony/stages/request_building.rs
...outer/src/routers/grpc/harmony/stages/request_building.rs
+1
-0
sgl-router/src/routers/grpc/regular/responses/conversions.rs
sgl-router/src/routers/grpc/regular/responses/conversions.rs
+56
-71
sgl-router/src/routers/grpc/regular/responses/handlers.rs
sgl-router/src/routers/grpc/regular/responses/handlers.rs
+2
-2
sgl-router/src/routers/grpc/regular/responses/tool_loop.rs
sgl-router/src/routers/grpc/regular/responses/tool_loop.rs
+45
-24
sgl-router/src/routers/grpc/regular/stages/chat/preparation.rs
...outer/src/routers/grpc/regular/stages/chat/preparation.rs
+1
-1
sgl-router/src/routers/grpc/utils.rs
sgl-router/src/routers/grpc/utils.rs
+40
-24
sgl-router/tests/spec/chat_completion.rs
sgl-router/tests/spec/chat_completion.rs
+8
-16
No files found.
sgl-router/src/grpc_client/sglang_scheduler.rs
View file @
9f5e7018
...
...
@@ -303,6 +303,9 @@ impl SglangSchedulerClient {
}
/// Build a GenerateRequest from ResponsesRequest (OpenAI Responses API)
///
/// NOTE: This is used by the Harmony router only. The Regular router uses
/// responses_to_chat() conversion and goes through the chat pipeline.
pub
fn
build_generate_request_from_responses
(
&
self
,
request_id
:
String
,
...
...
@@ -310,9 +313,11 @@ impl SglangSchedulerClient {
processed_text
:
String
,
token_ids
:
Vec
<
u32
>
,
harmony_stop_ids
:
Option
<
Vec
<
u32
>>
,
tool_call_constraint
:
Option
<
(
String
,
String
)
>
,
)
->
Result
<
proto
::
GenerateRequest
,
String
>
{
// Build sampling params from ResponsesRequest
let
mut
sampling_params
=
self
.build_grpc_sampling_params_from_responses
(
body
)
?
;
let
mut
sampling_params
=
self
.build_grpc_sampling_params_from_responses
(
body
,
tool_call_constraint
)
?
;
// Inject Harmony stop token IDs if provided
if
let
Some
(
stop_ids
)
=
harmony_stop_ids
{
...
...
@@ -441,9 +446,10 @@ impl SglangSchedulerClient {
fn
build_grpc_sampling_params_from_responses
(
&
self
,
request
:
&
ResponsesRequest
,
tool_call_constraint
:
Option
<
(
String
,
String
)
>
,
)
->
Result
<
proto
::
SamplingParams
,
String
>
{
// ResponsesRequest doesn't have stop sequences in the same way
//
Tools are handled externally by MCP loop, not via
constraints
//
For Harmony router: Tools are handled via structural_tag
constraints
let
max_new_tokens
=
request
.max_output_tokens
.map
(|
v
|
v
as
i32
);
...
...
@@ -462,12 +468,36 @@ impl SglangSchedulerClient {
spaces_between_special_tokens
:
true
,
ignore_eos
:
false
,
no_stop_trim
:
false
,
n
:
1
,
// Responses API doesn't support n>1
constraint
:
None
,
// No constraints - tools handled by MCP
n
:
1
,
// Responses API doesn't support n>1
constraint
:
self
.build_constraint_for_responses
(
tool_call_constraint
)
?
,
..
Default
::
default
()
})
}
/// Build constraint for Responses API (simpler than Chat API's build_constraint)
///
/// Responses API doesn't support response_format, ebnf, or regex constraints,
/// so this only handles tool_call_constraint.
fn
build_constraint_for_responses
(
&
self
,
tool_call_constraint
:
Option
<
(
String
,
String
)
>
,
)
->
Result
<
Option
<
proto
::
sampling_params
::
Constraint
>
,
String
>
{
if
let
Some
((
constraint_type
,
constraint_value
))
=
tool_call_constraint
{
let
tool_constraint
=
match
constraint_type
.as_str
()
{
"structural_tag"
=>
{
proto
::
sampling_params
::
Constraint
::
StructuralTag
(
constraint_value
)
}
"json_schema"
=>
proto
::
sampling_params
::
Constraint
::
JsonSchema
(
constraint_value
),
"ebnf"
=>
proto
::
sampling_params
::
Constraint
::
EbnfGrammar
(
constraint_value
),
"regex"
=>
proto
::
sampling_params
::
Constraint
::
Regex
(
constraint_value
),
_
=>
return
Err
(
format!
(
"Unknown constraint type: {}"
,
constraint_type
)),
};
Ok
(
Some
(
tool_constraint
))
}
else
{
Ok
(
None
)
}
}
fn
build_single_constraint_from_plain
(
params
:
&
GenerateSamplingParams
,
)
->
Result
<
Option
<
proto
::
sampling_params
::
Constraint
>
,
String
>
{
...
...
sgl-router/src/protocols/chat.rs
View file @
9f5e7018
...
...
@@ -457,21 +457,43 @@ fn validate_chat_cross_parameters(
return
Err
(
e
);
}
// Validate that all
r
eference
d tool names exist i
n tools
// Validate that all
ToolR
eference
s are Function type (Chat API only supports functio
n tools
)
for
tool_ref
in
allowed_tools
{
let
tool_exists
=
tools
.iter
()
.any
(|
tool
|
{
tool
.tool_type
==
tool_ref
.tool_type
&&
tool
.function.name
==
tool_ref
.name
});
if
!
tool_exists
{
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_tool_not_found"
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice.tools': tool '{}' not found in 'tools'."
,
tool_ref
.name
)
.into
());
return
Err
(
e
);
match
tool_ref
{
ToolReference
::
Function
{
name
}
=>
{
// Validate that the function exists in tools array
let
tool_exists
=
tools
.iter
()
.any
(|
tool
|
{
tool
.tool_type
==
"function"
&&
tool
.function.name
==
*
name
});
if
!
tool_exists
{
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_tool_not_found"
,
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice.tools': tool '{}' not found in 'tools'."
,
name
)
.into
(),
);
return
Err
(
e
);
}
}
_
=>
{
// Chat Completion API only supports function tools in tool_choice
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_invalid_tool_type"
,
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice.tools': Chat Completion API only supports function tools, got '{}'."
,
tool_ref
.identifier
()
)
.into
(),
);
return
Err
(
e
);
}
}
}
}
...
...
sgl-router/src/protocols/common.rs
View file @
9f5e7018
...
...
@@ -183,6 +183,18 @@ impl Default for ToolChoice {
}
}
impl
ToolChoice
{
/// Serialize tool_choice to string for ResponsesResponse
///
/// Returns the JSON-serialized tool_choice or "auto" as default
pub
fn
serialize_to_string
(
tool_choice
:
&
Option
<
ToolChoice
>
)
->
String
{
tool_choice
.as_ref
()
.map
(|
tc
|
serde_json
::
to_string
(
tc
)
.unwrap_or_else
(|
_
|
"auto"
.to_string
()))
.unwrap_or_else
(||
"auto"
.to_string
())
}
}
/// Function choice specification for ToolChoice::Function
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
pub
struct
FunctionChoice
{
...
...
@@ -190,11 +202,73 @@ pub struct FunctionChoice {
}
/// Tool reference for ToolChoice::AllowedTools
///
/// Represents a reference to a specific tool in the allowed_tools array.
/// Different tool types have different required fields.
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
pub
struct
ToolReference
{
#[serde(rename
=
"type"
)]
pub
tool_type
:
String
,
// "function"
pub
name
:
String
,
#[serde(tag
=
"type"
)]
#[serde(rename_all
=
"snake_case"
)]
pub
enum
ToolReference
{
/// Reference to a function tool
#[serde(rename
=
"function"
)]
Function
{
name
:
String
},
/// Reference to an MCP tool
#[serde(rename
=
"mcp"
)]
Mcp
{
server_label
:
String
,
#[serde(skip_serializing_if
=
"Option::is_none"
)]
name
:
Option
<
String
>
,
},
/// File search hosted tool
#[serde(rename
=
"file_search"
)]
FileSearch
,
/// Web search preview hosted tool
#[serde(rename
=
"web_search_preview"
)]
WebSearchPreview
,
/// Computer use preview hosted tool
#[serde(rename
=
"computer_use_preview"
)]
ComputerUsePreview
,
/// Code interpreter hosted tool
#[serde(rename
=
"code_interpreter"
)]
CodeInterpreter
,
/// Image generation hosted tool
#[serde(rename
=
"image_generation"
)]
ImageGeneration
,
}
impl
ToolReference
{
/// Get a unique identifier for this tool reference
pub
fn
identifier
(
&
self
)
->
String
{
match
self
{
ToolReference
::
Function
{
name
}
=>
format!
(
"function:{}"
,
name
),
ToolReference
::
Mcp
{
server_label
,
name
}
=>
{
if
let
Some
(
n
)
=
name
{
format!
(
"mcp:{}:{}"
,
server_label
,
n
)
}
else
{
format!
(
"mcp:{}"
,
server_label
)
}
}
ToolReference
::
FileSearch
=>
"file_search"
.to_string
(),
ToolReference
::
WebSearchPreview
=>
"web_search_preview"
.to_string
(),
ToolReference
::
ComputerUsePreview
=>
"computer_use_preview"
.to_string
(),
ToolReference
::
CodeInterpreter
=>
"code_interpreter"
.to_string
(),
ToolReference
::
ImageGeneration
=>
"image_generation"
.to_string
(),
}
}
/// Get the tool name if this is a function tool
pub
fn
function_name
(
&
self
)
->
Option
<&
str
>
{
match
self
{
ToolReference
::
Function
{
name
}
=>
Some
(
name
.as_str
()),
_
=>
None
,
}
}
}
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
...
...
sgl-router/src/protocols/responses.rs
View file @
9f5e7018
...
...
@@ -447,6 +447,7 @@ fn default_top_p() -> Option<f32> {
// ============================================================================
#[derive(Debug,
Clone,
Deserialize,
Serialize,
Validate)]
#[validate(schema(function
=
"validate_responses_cross_parameters"
))]
pub
struct
ResponsesRequest
{
/// Run the request in the background
#[serde(skip_serializing_if
=
"Option::is_none"
)]
...
...
@@ -721,6 +722,83 @@ pub fn validate_conversation_id(conv_id: &str) -> Result<(), validator::Validati
Ok
(())
}
/// Schema-level validation for cross-field dependencies
fn
validate_responses_cross_parameters
(
request
:
&
ResponsesRequest
,
)
->
Result
<
(),
validator
::
ValidationError
>
{
use
super
::
common
::{
ToolChoice
,
ToolReference
};
// Only validate if both tools and tool_choice are present
if
let
(
Some
(
tools
),
Some
(
tool_choice
))
=
(
&
request
.tools
,
&
request
.tool_choice
)
{
// Extract function tool names from ResponseTools
let
function_tool_names
:
Vec
<&
str
>
=
tools
.iter
()
.filter_map
(|
t
|
match
t
.r
#
type
{
ResponseToolType
::
Function
=>
t
.function
.as_ref
()
.map
(|
f
|
f
.name
.as_str
()),
_
=>
None
,
})
.collect
();
match
tool_choice
{
ToolChoice
::
Function
{
function
,
..
}
=>
{
// Validate the specific function exists
if
!
function_tool_names
.contains
(
&
function
.name
.as_str
())
{
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_function_not_found"
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice': function '{}' not found in 'tools'."
,
function
.name
)
.into
(),
);
return
Err
(
e
);
}
}
ToolChoice
::
AllowedTools
{
mode
,
tools
:
allowed_tools
,
..
}
=>
{
// Validate mode is "auto" or "required"
if
mode
!=
"auto"
&&
mode
!=
"required"
{
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_invalid_mode"
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice.mode': must be 'auto' or 'required', got '{}'."
,
mode
)
.into
(),
);
return
Err
(
e
);
}
// Validate that all function tool references exist
for
tool_ref
in
allowed_tools
{
if
let
ToolReference
::
Function
{
name
}
=
tool_ref
{
if
!
function_tool_names
.contains
(
&
name
.as_str
())
{
let
mut
e
=
validator
::
ValidationError
::
new
(
"tool_choice_tool_not_found"
);
e
.message
=
Some
(
format!
(
"Invalid value for 'tool_choice.tools': tool '{}' not found in 'tools'."
,
name
)
.into
(),
);
return
Err
(
e
);
}
}
// Note: MCP and hosted tools don't need existence validation here
// as they are resolved dynamically at runtime
}
}
_
=>
{}
}
}
Ok
(())
}
/// Normalize a SimpleInputMessage to a proper Message item
///
/// This helper converts SimpleInputMessage (which can have flexible content)
...
...
sgl-router/src/routers/grpc/common/responses/utils.rs
View file @
9f5e7018
...
...
@@ -11,7 +11,10 @@ use serde_json::json;
use
crate
::{
core
::
WorkerRegistry
,
mcp
::
McpManager
,
protocols
::
responses
::{
ResponseTool
,
ResponseToolType
},
protocols
::{
common
::
Tool
,
responses
::{
ResponseTool
,
ResponseToolType
},
},
routers
::{
grpc
::
error
,
openai
::
mcp
::
ensure_request_mcp_client
},
};
...
...
@@ -76,3 +79,47 @@ pub fn validate_worker_availability(
None
}
/// Extract function tools (and optionally MCP tools) from ResponseTools
///
/// This utility consolidates the logic for extracting tools with schemas from ResponseTools.
/// It's used by both Harmony and Regular routers for different purposes:
///
/// - **Harmony router**: Extracts both Function and MCP tools (with `include_mcp: true`)
/// because MCP schemas are populated by convert_mcp_tools_to_response_tools() before the
/// pipeline runs. These tools are used to generate structural constraints in the
/// Harmony preparation stage.
///
/// - **Regular router**: Extracts only Function tools (with `include_mcp: false`) during
/// the initial conversion from ResponsesRequest to ChatCompletionRequest. MCP tools
/// are merged later by the tool loop before being sent to the chat pipeline, where
/// tool_choice constraints are generated for ALL tools (function + MCP combined).
pub
fn
extract_tools_from_response_tools
(
response_tools
:
Option
<&
[
ResponseTool
]
>
,
include_mcp
:
bool
,
)
->
Vec
<
Tool
>
{
let
Some
(
tools
)
=
response_tools
else
{
return
Vec
::
new
();
};
tools
.iter
()
.filter_map
(|
rt
|
{
match
rt
.r
#
type
{
// Function tools: Schema in request
ResponseToolType
::
Function
=>
rt
.function
.as_ref
()
.map
(|
f
|
Tool
{
tool_type
:
"function"
.to_string
(),
function
:
f
.clone
(),
}),
// MCP tools: Schema populated by convert_mcp_tools_to_response_tools()
// Only include if requested (Harmony case)
ResponseToolType
::
Mcp
if
include_mcp
=>
rt
.function
.as_ref
()
.map
(|
f
|
Tool
{
tool_type
:
"function"
.to_string
(),
function
:
f
.clone
(),
}),
// Hosted tools: No schema available, skip
_
=>
None
,
}
})
.collect
()
}
sgl-router/src/routers/grpc/harmony/responses.rs
View file @
9f5e7018
...
...
@@ -52,7 +52,7 @@ use crate::{
data_connector
::{
ResponseId
,
ResponseStorage
},
mcp
::{
self
,
McpManager
},
protocols
::{
common
::{
Function
,
ToolCall
,
Usage
},
common
::{
Function
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
,
Usage
},
responses
::{
McpToolInfo
,
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponseOutputItem
,
ResponseReasoningContent
,
ResponseStatus
,
ResponseTool
,
...
...
@@ -467,15 +467,6 @@ async fn execute_without_mcp_loop(
/// - Calls `streaming::process_responses_iteration_stream()` for per-iteration events
/// - Emits `response.completed` at end
/// - Handles errors with `response.failed`
///
/// # Arguments
///
/// * `ctx` - Harmony responses context with pipeline and dependencies
/// * `request` - Responses API request
///
/// # Returns
///
/// SSE stream response with proper headers
pub
async
fn
serve_harmony_responses_stream
(
ctx
:
&
HarmonyResponsesContext
,
request
:
ResponsesRequest
,
...
...
@@ -1189,6 +1180,11 @@ fn build_next_request_with_tools(
// Update request with new items
request
.input
=
ResponseInput
::
Items
(
items
);
// Switch tool_choice to "auto" for subsequent iterations
// This prevents infinite loops when original tool_choice was "required" or specific function
// After receiving tool results, the model should be free to decide whether to call more tools or finish
request
.tool_choice
=
Some
(
ToolChoice
::
Value
(
ToolChoiceValue
::
Auto
));
Ok
(
request
)
}
...
...
@@ -1214,14 +1210,6 @@ struct ToolResult {
///
/// Converts MCP Tool entries (from rmcp SDK) to ResponseTool format so the model
/// knows about available MCP tools when making tool calls.
///
/// # Arguments
///
/// * `mcp_tools` - MCP tools from the MCP manager inventory (rmcp::model::Tool)
///
/// # Returns
///
/// Vector of ResponseTool entries in MCP format
pub
fn
convert_mcp_tools_to_response_tools
(
mcp_tools
:
&
[
mcp
::
Tool
])
->
Vec
<
ResponseTool
>
{
mcp_tools
.iter
()
...
...
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
View file @
9f5e7018
...
...
@@ -12,7 +12,7 @@ use crate::{
responses
::
ResponsesRequest
,
},
routers
::
grpc
::{
common
::
stages
::
PipelineStage
,
common
::
{
responses
::
utils
::
extract_tools_from_response_tools
,
stages
::
PipelineStage
}
,
context
::{
PreparationOutput
,
RequestContext
,
RequestType
},
error
,
utils
,
},
...
...
@@ -84,7 +84,7 @@ impl HarmonyPreparationStage {
}
// Step 1: Filter tools if needed
let
body_ref
=
utils
::
filter_
tools_for_request
(
request
);
let
body_ref
=
utils
::
filter_
chat_request_by_tool_choice
(
request
);
// Step 2: Build tool constraints
let
tool_constraints
=
if
let
Some
(
tools
)
=
body_ref
.tools
.as_ref
()
{
...
...
@@ -128,18 +128,37 @@ impl HarmonyPreparationStage {
ctx
:
&
mut
RequestContext
,
request
:
&
ResponsesRequest
,
)
->
Result
<
Option
<
Response
>
,
Response
>
{
// Build via Harmony from responses API request
// Step 1: Extract function and MCP tools with schemas from ResponseTools
let
mut
function_tools
=
extract_tools_from_response_tools
(
request
.tools
.as_deref
(),
true
);
// Step 2: Filter tools based on tool_choice (AllowedTools or Function)
// Note: Tool existence is already validated in ResponsesRequest::validate()
if
let
Some
(
filtered
)
=
utils
::
filter_tools_by_tool_choice
(
&
function_tools
,
&
request
.tool_choice
)
{
function_tools
=
filtered
;
}
// Step 3: Generate Harmony structural tags from filtered tools
let
tool_constraints
=
if
!
function_tools
.is_empty
()
{
Self
::
generate_harmony_structural_tag
(
&
function_tools
,
&
request
.tool_choice
)
.map_err
(|
e
|
*
e
)
?
}
else
{
None
};
// Step 3: Build via Harmony from responses API request
let
build_output
=
self
.builder
.build_from_responses
(
request
)
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Harmony build failed: {}"
,
e
)))
?
;
// Store results
in preparation output
//
Step 4:
Store results
with tool_constraints
ctx
.state.preparation
=
Some
(
PreparationOutput
{
original_text
:
None
,
token_ids
:
build_output
.input_ids
,
processed_messages
:
None
,
tool_constraints
:
None
,
tool_constraints
,
filtered_request
:
None
,
harmony_mode
:
true
,
selection_text
:
Some
(
build_output
.selection_text
),
...
...
sgl-router/src/routers/grpc/harmony/stages/request_building.rs
View file @
9f5e7018
...
...
@@ -84,6 +84,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
placeholder_processed_text
,
prep
.token_ids
.clone
(),
prep
.harmony_stop_ids
.clone
(),
prep
.tool_constraints
.clone
(),
)
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Invalid request parameters: {}"
,
e
)))
?
,
_
=>
unreachable!
(),
...
...
sgl-router/src/routers/grpc/regular/responses/conversions.rs
View file @
9f5e7018
...
...
@@ -7,14 +7,17 @@
//! This allows the gRPC router to reuse the existing chat pipeline infrastructure
//! without requiring Python backend changes.
use
crate
::
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatMessage
,
UserMessageContent
},
common
::{
FunctionCallResponse
,
StreamOptions
,
ToolCall
,
UsageInfo
},
responses
::{
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponseOutputItem
,
ResponseReasoningContent
::
ReasoningText
,
ResponseStatus
,
ResponsesRequest
,
ResponsesResponse
,
ResponsesUsage
,
StringOrContentParts
,
use
crate
::{
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatMessage
,
UserMessageContent
},
common
::{
FunctionCallResponse
,
StreamOptions
,
ToolCall
,
ToolChoice
,
UsageInfo
},
responses
::{
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponseOutputItem
,
ResponseReasoningContent
::
ReasoningText
,
ResponseStatus
,
ResponsesRequest
,
ResponsesResponse
,
ResponsesUsage
,
StringOrContentParts
,
},
},
routers
::
grpc
::
common
::
responses
::
utils
::
extract_tools_from_response_tools
,
};
/// Convert a ResponsesRequest to ChatCompletionRequest for processing through the chat pipeline
...
...
@@ -23,7 +26,8 @@ use crate::protocols::{
/// - `input` (text/items) → `messages` (chat messages)
/// - `instructions` → system message (prepended)
/// - `max_output_tokens` → `max_completion_tokens`
/// - Tool-related fields are passed through
/// - `tools` → function tools extracted from ResponseTools
/// - `tool_choice` → passed through from request
/// - Response-specific fields (previous_response_id, conversation) are handled by router
pub
fn
responses_to_chat
(
req
:
&
ResponsesRequest
)
->
Result
<
ChatCompletionRequest
,
String
>
{
let
mut
messages
=
Vec
::
new
();
...
...
@@ -68,69 +72,13 @@ pub fn responses_to_chat(req: &ResponsesRequest) -> Result<ChatCompletionRequest
}
};
match
role
.as_str
()
{
"user"
=>
{
messages
.push
(
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
});
}
"assistant"
=>
{
messages
.push
(
ChatMessage
::
Assistant
{
content
:
Some
(
text
),
name
:
None
,
tool_calls
:
None
,
reasoning_content
:
None
,
});
}
"system"
=>
{
messages
.push
(
ChatMessage
::
System
{
content
:
text
,
name
:
None
,
});
}
_
=>
{
// Unknown role, treat as user message
messages
.push
(
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
});
}
}
messages
.push
(
role_to_chat_message
(
role
.as_str
(),
text
));
}
ResponseInputOutputItem
::
Message
{
role
,
content
,
..
}
=>
{
// Extract text from content parts
let
text
=
extract_text_from_content
(
content
);
match
role
.as_str
()
{
"user"
=>
{
messages
.push
(
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
});
}
"assistant"
=>
{
messages
.push
(
ChatMessage
::
Assistant
{
content
:
Some
(
text
),
name
:
None
,
tool_calls
:
None
,
reasoning_content
:
None
,
});
}
"system"
=>
{
messages
.push
(
ChatMessage
::
System
{
content
:
text
,
name
:
None
,
});
}
_
=>
{
// Unknown role, treat as user message
messages
.push
(
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
});
}
}
messages
.push
(
role_to_chat_message
(
role
.as_str
(),
text
));
}
ResponseInputOutputItem
::
FunctionToolCall
{
id
,
...
...
@@ -203,7 +151,18 @@ pub fn responses_to_chat(req: &ResponsesRequest) -> Result<ChatCompletionRequest
return
Err
(
"Request must contain at least one message"
.to_string
());
}
// 3. Build ChatCompletionRequest
// 3. Extract function tools from ResponseTools
// Only function tools are extracted here (include_mcp: false).
// MCP tools are merged later by the tool loop (see tool_loop.rs:prepare_chat_tools_and_choice)
// before the chat pipeline, where tool_choice constraints are applied to ALL tools combined.
let
function_tools
=
extract_tools_from_response_tools
(
req
.tools
.as_deref
(),
false
);
let
tools
=
if
function_tools
.is_empty
()
{
None
}
else
{
Some
(
function_tools
)
};
// 4. Build ChatCompletionRequest
let
is_streaming
=
req
.stream
.unwrap_or
(
false
);
Ok
(
ChatCompletionRequest
{
...
...
@@ -227,9 +186,8 @@ pub fn responses_to_chat(req: &ResponsesRequest) -> Result<ChatCompletionRequest
top_logprobs
:
req
.top_logprobs
,
top_p
:
req
.top_p
,
skip_special_tokens
:
true
,
// Note: tools and tool_choice will be handled separately for MCP transformation
tools
:
None
,
// Will be set by caller if needed
tool_choice
:
None
,
// Will be set by caller if needed
tools
,
tool_choice
:
req
.tool_choice
.clone
(),
..
Default
::
default
()
})
}
...
...
@@ -247,6 +205,33 @@ fn extract_text_from_content(content: &[ResponseContentPart]) -> String {
.join
(
""
)
}
/// Convert role and text to ChatMessage
fn
role_to_chat_message
(
role
:
&
str
,
text
:
String
)
->
ChatMessage
{
match
role
{
"user"
=>
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
},
"assistant"
=>
ChatMessage
::
Assistant
{
content
:
Some
(
text
),
name
:
None
,
tool_calls
:
None
,
reasoning_content
:
None
,
},
"system"
=>
ChatMessage
::
System
{
content
:
text
,
name
:
None
,
},
_
=>
{
// Unknown role, treat as user message
ChatMessage
::
User
{
content
:
UserMessageContent
::
Text
(
text
),
name
:
None
,
}
}
}
}
/// Convert a ChatCompletionResponse to ResponsesResponse
///
/// # Conversion Logic
...
...
@@ -354,7 +339,7 @@ pub fn chat_to_responses(
store
:
original_req
.store
.unwrap_or
(
true
),
temperature
:
original_req
.temperature
,
text
:
None
,
tool_choice
:
"auto"
.to_string
(),
// TODO: Map from
original
req
uest
tool_choice
:
ToolChoice
::
serialize_to_string
(
&
original
_
req
.tool_choice
),
tools
:
original_req
.tools
.clone
()
.unwrap_or_default
(),
top_p
:
original_req
.top_p
,
truncation
:
None
,
...
...
sgl-router/src/routers/grpc/regular/responses/handlers.rs
View file @
9f5e7018
...
...
@@ -58,7 +58,7 @@ use crate::{
},
protocols
::{
chat
::{
self
,
ChatCompletionStreamResponse
},
common
,
common
::{
self
,
ToolChoice
}
,
responses
::{
self
,
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponseOutputItem
,
ResponseReasoningContent
,
ResponseStatus
,
ResponsesRequest
,
ResponsesResponse
,
...
...
@@ -657,7 +657,7 @@ impl StreamingResponseAccumulator {
store
:
self
.original_request.store
.unwrap_or
(
true
),
temperature
:
self
.original_request.temperature
,
text
:
None
,
tool_choice
:
"auto"
.to_string
(
),
tool_choice
:
ToolChoice
::
serialize_to_string
(
&
self
.original_request.tool_choice
),
tools
:
self
.original_request.tools
.clone
()
.unwrap_or_default
(),
top_p
:
self
.original_request.top_p
,
truncation
:
None
,
...
...
sgl-router/src/routers/grpc/regular/responses/tool_loop.rs
View file @
9f5e7018
...
...
@@ -13,7 +13,7 @@ use axum::{
};
use
bytes
::
Bytes
;
use
futures_util
::
StreamExt
;
use
serde_json
::
json
;
use
serde_json
::
{
json
,
Value
}
;
use
tokio
::
sync
::
mpsc
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tracing
::{
debug
,
warn
};
...
...
@@ -24,7 +24,8 @@ use crate::{
mcp
::{
self
,
McpManager
},
protocols
::{
chat
::{
ChatChoice
,
ChatCompletionMessage
,
ChatCompletionResponse
,
ChatCompletionStreamResponse
,
ChatChoice
,
ChatCompletionMessage
,
ChatCompletionRequest
,
ChatCompletionResponse
,
ChatCompletionStreamResponse
,
},
common
::{
Function
,
FunctionCallResponse
,
Tool
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
},
responses
::{
...
...
@@ -66,6 +67,30 @@ fn extract_function_call_from_chat(
None
}
/// Merge function tools from request with MCP tools and set tool_choice based on iteration
fn
prepare_chat_tools_and_choice
(
chat_request
:
&
mut
ChatCompletionRequest
,
mcp_chat_tools
:
&
[
Tool
],
iteration
:
usize
,
)
{
// Merge function tools from request with MCP tools
let
mut
all_tools
=
chat_request
.tools
.clone
()
.unwrap_or_default
();
all_tools
.extend
(
mcp_chat_tools
.iter
()
.cloned
());
chat_request
.tools
=
Some
(
all_tools
);
// Set tool_choice based on iteration
// - Iteration 0: Use user's tool_choice or default to auto
// - Iteration 1+: Always use auto to avoid infinite loops
chat_request
.tool_choice
=
if
iteration
==
0
{
chat_request
.tool_choice
.clone
()
.or
(
Some
(
ToolChoice
::
Value
(
ToolChoiceValue
::
Auto
)))
}
else
{
Some
(
ToolChoice
::
Value
(
ToolChoiceValue
::
Auto
))
};
}
/// Extract all tool calls from chat response (for parallel tool call support)
fn
extract_all_tool_calls_from_chat
(
response
:
&
ChatCompletionResponse
,
...
...
@@ -166,16 +191,13 @@ fn build_mcp_list_tools_item(mcp: &Arc<McpManager>, server_label: &str) -> Respo
let
tools
=
mcp
.list_tools
();
let
tools_info
:
Vec
<
McpToolInfo
>
=
tools
.iter
()
.map
(|
t
|
{
use
serde_json
::
Value
;
McpToolInfo
{
name
:
t
.name
.to_string
(),
description
:
t
.description
.as_ref
()
.map
(|
d
|
d
.to_string
()),
input_schema
:
Value
::
Object
((
*
t
.input_schema
)
.clone
()),
annotations
:
Some
(
json!
({
"read_only"
:
false
})),
}
.map
(|
t
|
McpToolInfo
{
name
:
t
.name
.to_string
(),
description
:
t
.description
.as_ref
()
.map
(|
d
|
d
.to_string
()),
input_schema
:
Value
::
Object
((
*
t
.input_schema
)
.clone
()),
annotations
:
Some
(
json!
({
"read_only"
:
false
})),
})
.collect
();
...
...
@@ -247,17 +269,19 @@ pub(super) async fn execute_tool_loop(
// Get MCP tools and convert to chat format (do this once before loop)
let
mcp_tools
=
ctx
.mcp_manager
.list_tools
();
let
chat_tools
=
convert_mcp_tools_to_chat_tools
(
&
mcp_tools
);
debug!
(
"Converted {} MCP tools to chat format"
,
chat_tools
.len
());
let
mcp_chat_tools
=
convert_mcp_tools_to_chat_tools
(
&
mcp_tools
);
debug!
(
"Converted {} MCP tools to chat format"
,
mcp_chat_tools
.len
()
);
loop
{
// Convert to chat request
let
mut
chat_request
=
conversions
::
responses_to_chat
(
&
current_request
)
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Failed to convert request: {}"
,
e
)))
?
;
// Add MCP tools to chat request so LLM knows about them
chat_request
.tools
=
Some
(
chat_tools
.clone
());
chat_request
.tool_choice
=
Some
(
ToolChoice
::
Value
(
ToolChoiceValue
::
Auto
));
// Prepare tools and tool_choice for this iteration
prepare_chat_tools_and_choice
(
&
mut
chat_request
,
&
mcp_chat_tools
,
state
.iteration
);
// Execute chat pipeline (errors already have proper HTTP status codes)
let
chat_response
=
ctx
...
...
@@ -555,10 +579,10 @@ async fn execute_tool_loop_streaming_internal(
// Get MCP tools and convert to chat format (do this once before loop)
let
mcp_tools
=
ctx
.mcp_manager
.list_tools
();
let
chat_tools
=
convert_mcp_tools_to_chat_tools
(
&
mcp_tools
);
let
mcp_
chat_tools
=
convert_mcp_tools_to_chat_tools
(
&
mcp_tools
);
debug!
(
"Streaming: Converted {} MCP tools to chat format"
,
chat_tools
.len
()
mcp_
chat_tools
.len
()
);
// Flag to track if mcp_list_tools has been emitted
...
...
@@ -584,7 +608,6 @@ async fn execute_tool_loop_streaming_internal(
let
tool_items
:
Vec
<
_
>
=
mcp_tools
.iter
()
.map
(|
t
|
{
use
serde_json
::
Value
;
json!
({
"name"
:
t
.name
,
"description"
:
t
.description
,
...
...
@@ -635,9 +658,8 @@ async fn execute_tool_loop_streaming_internal(
let
mut
chat_request
=
conversions
::
responses_to_chat
(
&
current_request
)
.map_err
(|
e
|
format!
(
"Failed to convert request: {}"
,
e
))
?
;
// Add MCP tools to chat request so LLM knows about them
chat_request
.tools
=
Some
(
chat_tools
.clone
());
chat_request
.tool_choice
=
Some
(
ToolChoice
::
Value
(
ToolChoiceValue
::
Auto
));
// Prepare tools and tool_choice for this iteration (same logic as non-streaming)
prepare_chat_tools_and_choice
(
&
mut
chat_request
,
&
mcp_chat_tools
,
state
.iteration
);
// Execute chat streaming
let
response
=
ctx
...
...
@@ -913,7 +935,6 @@ async fn execute_tool_loop_streaming_internal(
/// Convert MCP tools to Chat API tool format
fn
convert_mcp_tools_to_chat_tools
(
mcp_tools
:
&
[
mcp
::
Tool
])
->
Vec
<
Tool
>
{
use
serde_json
::
Value
;
mcp_tools
.iter
()
.map
(|
tool_info
|
Tool
{
...
...
sgl-router/src/routers/grpc/regular/stages/chat/preparation.rs
View file @
9f5e7018
...
...
@@ -40,7 +40,7 @@ impl ChatPreparationStage {
request
:
&
ChatCompletionRequest
,
)
->
Result
<
(),
Response
>
{
// Step 1: Filter tools if needed
let
body_ref
=
utils
::
filter_
tools_for_request
(
request
);
let
body_ref
=
utils
::
filter_
chat_request_by_tool_choice
(
request
);
// Step 2: Process messages and apply chat template
let
processed_messages
=
...
...
sgl-router/src/routers/grpc/utils.rs
View file @
9f5e7018
...
...
@@ -9,7 +9,6 @@ use tracing::{error, warn};
use
uuid
::
Uuid
;
use
super
::{
error
,
ProcessedMessages
};
pub
use
crate
::
tokenizer
::
StopSequenceDecoder
;
use
crate
::{
core
::
Worker
,
grpc_client
::{
proto
,
sglang_scheduler
::
AbortOnDropStream
,
SglangSchedulerClient
},
...
...
@@ -28,8 +27,9 @@ use crate::{
tokenizer
::{
cache
::
CachedTokenizer
,
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
},
stop
::
StopSequenceDecoderBuilder
,
traits
::
Tokenizer
,
HuggingFaceTokenizer
,
HuggingFaceTokenizer
,
StopSequenceDecoder
,
},
tool_parser
::{
ParserFactory
as
ToolParserFactory
,
PooledParser
as
ToolPooledParser
,
ToolParser
,
...
...
@@ -273,39 +273,57 @@ fn build_required_array_schema(tools: &[Tool]) -> Result<String, String> {
.map_err
(|
e
|
format!
(
"Failed to serialize tool schema: {}"
,
e
))
}
/// Filter tools based on tool_choice (
shared by both rout
er
s
)
///
Returns a reference to the original body if no filtering needed,
///
otherwise returns a cloned and filtered body
pub
fn
filter_tools_for_request
(
body
:
&
ChatCompletionRequest
,
)
->
std
::
borrow
::
Cow
<
'_
,
ChatCompletionRequest
>
{
match
&
body
.t
ool
_c
hoice
{
Some
(
ToolChoice
::
AllowedTools
{
tools
:
allowed
,
..
})
if
body
.tools
.is_some
()
=
>
{
let
mut
filtered_body
=
body
.clone
();
let
all_tools
=
filtered_body
.tools
.as_ref
()
.unwrap
();
/// Filter tools based on tool_choice (
generic help
er)
///
///
Returns filtered tools if filtering is needed, otherwise returns None.
/// Used by both Chat API and Responses API (Harmony) for constraint generation.
pub
fn
filter_tools_by_tool_choice
(
tools
:
&
[
Tool
],
tool_choice
:
&
Option
<
T
ool
C
hoice
>
,
)
->
Option
<
Vec
<
Tool
>
>
{
match
tool_choice
{
Some
(
ToolChoice
::
AllowedTools
{
tools
:
allowed
,
..
})
=>
{
let
allowed_names
:
std
::
collections
::
HashSet
<&
str
>
=
allowed
.iter
()
.map
(|
t
|
t
.
name
.as_str
())
.collect
();
let
filtered
_tools
:
Vec
<
Tool
>
=
all_
tools
allowed
.iter
()
.
filter_
map
(|
t
|
t
.
function_name
())
.collect
();
let
filtered
:
Vec
<
Tool
>
=
tools
.iter
()
.filter
(|
t
|
allowed_names
.contains
(
t
.function.name
.as_str
()))
.cloned
()
.collect
();
filtered_body
.tools
=
Some
(
filtered_tools
);
std
::
borrow
::
Cow
::
Owned
(
filtered_body
)
Some
(
filtered
)
}
Some
(
ToolChoice
::
Function
{
function
,
..
})
if
body
.tools
.is_some
()
=>
{
let
mut
filtered_body
=
body
.clone
();
let
all_tools
=
filtered_body
.tools
.as_ref
()
.unwrap
();
let
filtered_tools
:
Vec
<
Tool
>
=
all_tools
Some
(
ToolChoice
::
Function
{
function
,
..
})
=>
{
let
filtered
:
Vec
<
Tool
>
=
tools
.iter
()
.filter
(|
t
|
t
.function.name
==
function
.name
)
.cloned
()
.collect
();
Some
(
filtered
)
}
_
=>
None
,
// No filtering needed
}
}
/// Filter ChatCompletionRequest by tool_choice
///
/// Returns a reference to the original request if no filtering needed,
/// otherwise returns a cloned request with filtered tools.
///
/// Note: Tool existence is validated earlier in ChatCompletionRequest::validate(),
/// so this function assumes tool_choice references valid tools.
pub
fn
filter_chat_request_by_tool_choice
(
body
:
&
ChatCompletionRequest
,
)
->
std
::
borrow
::
Cow
<
'_
,
ChatCompletionRequest
>
{
if
let
Some
(
tools
)
=
&
body
.tools
{
if
let
Some
(
filtered_tools
)
=
filter_tools_by_tool_choice
(
tools
,
&
body
.tool_choice
)
{
let
mut
filtered_body
=
body
.clone
();
filtered_body
.tools
=
Some
(
filtered_tools
);
std
::
borrow
::
Cow
::
Owned
(
filtered_body
)
return
std
::
borrow
::
Cow
::
Owned
(
filtered_body
)
;
}
_
=>
std
::
borrow
::
Cow
::
Borrowed
(
body
),
// No filtering needed, use original
}
// No filtering needed - return original request
std
::
borrow
::
Cow
::
Borrowed
(
body
)
}
/// Process chat messages and apply template (shared by both routers)
...
...
@@ -438,8 +456,6 @@ pub fn create_stop_decoder(
skip_special_tokens
:
bool
,
no_stop_trim
:
bool
,
)
->
StopSequenceDecoder
{
use
crate
::
tokenizer
::
stop
::
StopSequenceDecoderBuilder
;
// Extract stop sequences
let
stop_sequences
:
Vec
<
String
>
=
match
stop
{
Some
(
StringOrArray
::
String
(
s
))
=>
vec!
[
s
.clone
()],
...
...
sgl-router/tests/spec/chat_completion.rs
View file @
9f5e7018
...
...
@@ -349,8 +349,7 @@ fn test_tool_choice_allowed_tools_invalid_mode() {
}]),
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"invalid_mode"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
tools
:
vec!
[
ToolReference
::
Function
{
name
:
"get_weather"
.to_string
(),
}],
tool_type
:
"function"
.to_string
(),
...
...
@@ -387,8 +386,7 @@ fn test_tool_choice_allowed_tools_valid_mode_auto() {
}]),
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"auto"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
tools
:
vec!
[
ToolReference
::
Function
{
name
:
"get_weather"
.to_string
(),
}],
tool_type
:
"function"
.to_string
(),
...
...
@@ -419,8 +417,7 @@ fn test_tool_choice_allowed_tools_valid_mode_required() {
}]),
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"required"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
tools
:
vec!
[
ToolReference
::
Function
{
name
:
"get_weather"
.to_string
(),
}],
tool_type
:
"function"
.to_string
(),
...
...
@@ -451,8 +448,7 @@ fn test_tool_choice_allowed_tools_tool_not_found() {
}]),
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"auto"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
tools
:
vec!
[
ToolReference
::
Function
{
name
:
"nonexistent_tool"
.to_string
(),
}],
tool_type
:
"function"
.to_string
(),
...
...
@@ -501,12 +497,10 @@ fn test_tool_choice_allowed_tools_multiple_tools_valid() {
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"auto"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
ToolReference
::
Function
{
name
:
"get_weather"
.to_string
(),
},
ToolReference
{
tool_type
:
"function"
.to_string
(),
ToolReference
::
Function
{
name
:
"get_time"
.to_string
(),
},
],
...
...
@@ -550,12 +544,10 @@ fn test_tool_choice_allowed_tools_one_invalid_among_valid() {
tool_choice
:
Some
(
ToolChoice
::
AllowedTools
{
mode
:
"auto"
.to_string
(),
tools
:
vec!
[
ToolReference
{
tool_type
:
"function"
.to_string
(),
ToolReference
::
Function
{
name
:
"get_weather"
.to_string
(),
},
ToolReference
{
tool_type
:
"function"
.to_string
(),
ToolReference
::
Function
{
name
:
"nonexistent_tool"
.to_string
(),
},
],
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment