Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
0b41a293
Unverified
Commit
0b41a293
authored
Nov 03, 2025
by
Chang Su
Committed by
GitHub
Nov 03, 2025
Browse files
[router][grpc] Consolidate error messages build in error.rs (#12301)
parent
d31d48b3
Changes
19
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
255 additions
and
198 deletions
+255
-198
sgl-router/src/routers/grpc/error.rs
sgl-router/src/routers/grpc/error.rs
+143
-0
sgl-router/src/routers/grpc/harmony/processor.rs
sgl-router/src/routers/grpc/harmony/processor.rs
+8
-10
sgl-router/src/routers/grpc/harmony/responses.rs
sgl-router/src/routers/grpc/harmony/responses.rs
+5
-5
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
+7
-6
sgl-router/src/routers/grpc/harmony/stages/request_building.rs
...outer/src/routers/grpc/harmony/stages/request_building.rs
+6
-10
sgl-router/src/routers/grpc/harmony/stages/response_processing.rs
...er/src/routers/grpc/harmony/stages/response_processing.rs
+6
-6
sgl-router/src/routers/grpc/mod.rs
sgl-router/src/routers/grpc/mod.rs
+1
-0
sgl-router/src/routers/grpc/pipeline.rs
sgl-router/src/routers/grpc/pipeline.rs
+13
-13
sgl-router/src/routers/grpc/processing.rs
sgl-router/src/routers/grpc/processing.rs
+4
-4
sgl-router/src/routers/grpc/responses/handlers.rs
sgl-router/src/routers/grpc/responses/handlers.rs
+9
-14
sgl-router/src/routers/grpc/responses/tool_loop.rs
sgl-router/src/routers/grpc/responses/tool_loop.rs
+5
-14
sgl-router/src/routers/grpc/stages/client_acquisition.rs
sgl-router/src/routers/grpc/stages/client_acquisition.rs
+2
-2
sgl-router/src/routers/grpc/stages/dispatch_metadata.rs
sgl-router/src/routers/grpc/stages/dispatch_metadata.rs
+2
-2
sgl-router/src/routers/grpc/stages/preparation.rs
sgl-router/src/routers/grpc/stages/preparation.rs
+6
-10
sgl-router/src/routers/grpc/stages/request_building.rs
sgl-router/src/routers/grpc/stages/request_building.rs
+5
-7
sgl-router/src/routers/grpc/stages/request_execution.rs
sgl-router/src/routers/grpc/stages/request_execution.rs
+15
-12
sgl-router/src/routers/grpc/stages/response_processing.rs
sgl-router/src/routers/grpc/stages/response_processing.rs
+8
-8
sgl-router/src/routers/grpc/stages/worker_selection.rs
sgl-router/src/routers/grpc/stages/worker_selection.rs
+4
-4
sgl-router/src/routers/grpc/utils.rs
sgl-router/src/routers/grpc/utils.rs
+6
-71
No files found.
sgl-router/src/routers/grpc/error.rs
0 → 100644
View file @
0b41a293
//! Centralized error response handling for all routers
//!
//! This module provides consistent error responses across OpenAI and gRPC routers,
//! ensuring all errors follow OpenAI's API error format.
use
axum
::{
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
Json
,
};
use
serde_json
::
json
;
use
tracing
::{
error
,
warn
};
/// Create a 500 Internal Server Error response
///
/// Use this for unexpected server-side errors, database failures, etc.
///
/// # Example
/// ```ignore
/// return Err(internal_error("Database connection failed"));
/// ```
pub
fn
internal_error
(
message
:
impl
Into
<
String
>
)
->
Response
{
let
msg
=
message
.into
();
error!
(
"{}"
,
msg
);
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
Json
(
json!
({
"error"
:
{
"message"
:
msg
,
"type"
:
"internal_error"
,
"code"
:
500
}
})),
)
.into_response
()
}
/// Create a 400 Bad Request response
///
/// Use this for invalid request parameters, malformed JSON, validation errors, etc.
///
/// # Example
/// ```ignore
/// return Err(bad_request("Invalid conversation ID format"));
/// ```
pub
fn
bad_request
(
message
:
impl
Into
<
String
>
)
->
Response
{
let
msg
=
message
.into
();
error!
(
"{}"
,
msg
);
(
StatusCode
::
BAD_REQUEST
,
Json
(
json!
({
"error"
:
{
"message"
:
msg
,
"type"
:
"invalid_request_error"
,
"code"
:
400
}
})),
)
.into_response
()
}
/// Create a 404 Not Found response
///
/// Use this for resources that don't exist (conversations, responses, etc.)
///
/// # Example
/// ```ignore
/// return Err(not_found(format!("Conversation '{}' not found", id)));
/// ```
pub
fn
not_found
(
message
:
impl
Into
<
String
>
)
->
Response
{
let
msg
=
message
.into
();
warn!
(
"{}"
,
msg
);
(
StatusCode
::
NOT_FOUND
,
Json
(
json!
({
"error"
:
{
"message"
:
msg
,
"type"
:
"invalid_request_error"
,
"code"
:
404
}
})),
)
.into_response
()
}
/// Create a 503 Service Unavailable response
///
/// Use this for temporary service issues like no workers available, rate limiting, etc.
///
/// # Example
/// ```ignore
/// return Err(service_unavailable("No workers available for this model"));
/// ```
pub
fn
service_unavailable
(
message
:
impl
Into
<
String
>
)
->
Response
{
let
msg
=
message
.into
();
warn!
(
"{}"
,
msg
);
(
StatusCode
::
SERVICE_UNAVAILABLE
,
Json
(
json!
({
"error"
:
{
"message"
:
msg
,
"type"
:
"service_unavailable"
,
"code"
:
503
}
})),
)
.into_response
()
}
#[cfg(test)]
mod
tests
{
use
super
::
*
;
#[test]
fn
test_internal_error_string
()
{
let
response
=
internal_error
(
"Test error"
);
assert_eq!
(
response
.status
(),
StatusCode
::
INTERNAL_SERVER_ERROR
);
}
#[test]
fn
test_internal_error_format
()
{
let
response
=
internal_error
(
format!
(
"Error: {}"
,
42
));
assert_eq!
(
response
.status
(),
StatusCode
::
INTERNAL_SERVER_ERROR
);
}
#[test]
fn
test_bad_request
()
{
let
response
=
bad_request
(
"Invalid input"
);
assert_eq!
(
response
.status
(),
StatusCode
::
BAD_REQUEST
);
}
#[test]
fn
test_not_found
()
{
let
response
=
not_found
(
"Resource not found"
);
assert_eq!
(
response
.status
(),
StatusCode
::
NOT_FOUND
);
}
#[test]
fn
test_service_unavailable
()
{
let
response
=
service_unavailable
(
"No workers"
);
assert_eq!
(
response
.status
(),
StatusCode
::
SERVICE_UNAVAILABLE
);
}
}
sgl-router/src/routers/grpc/harmony/processor.rs
View file @
0b41a293
...
@@ -18,7 +18,7 @@ use crate::{
...
@@ -18,7 +18,7 @@ use crate::{
},
},
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
DispatchMetadata
,
ExecutionResult
},
context
::{
DispatchMetadata
,
ExecutionResult
},
utils
,
error
,
utils
,
},
},
};
};
...
@@ -66,7 +66,7 @@ impl HarmonyResponseProcessor {
...
@@ -66,7 +66,7 @@ impl HarmonyResponseProcessor {
// Collect all completed responses (one per choice)
// Collect all completed responses (one per choice)
let
all_responses
=
Self
::
collect_responses
(
execution_result
)
.await
?
;
let
all_responses
=
Self
::
collect_responses
(
execution_result
)
.await
?
;
if
all_responses
.is_empty
()
{
if
all_responses
.is_empty
()
{
return
Err
(
utils
::
internal_error
_static
(
"No responses from server"
));
return
Err
(
error
::
internal_error
(
"No responses from server"
));
}
}
// Build choices by parsing output with HarmonyParserAdapter
// Build choices by parsing output with HarmonyParserAdapter
...
@@ -84,7 +84,7 @@ impl HarmonyResponseProcessor {
...
@@ -84,7 +84,7 @@ impl HarmonyResponseProcessor {
// Parse Harmony channels with HarmonyParserAdapter
// Parse Harmony channels with HarmonyParserAdapter
let
mut
parser
=
HarmonyParserAdapter
::
new
()
.map_err
(|
e
|
{
let
mut
parser
=
HarmonyParserAdapter
::
new
()
.map_err
(|
e
|
{
utils
::
internal_error
_message
(
format!
(
"Failed to create Harmony parser: {}"
,
e
))
error
::
internal_error
(
format!
(
"Failed to create Harmony parser: {}"
,
e
))
})
?
;
})
?
;
// Parse Harmony channels with finish_reason and matched_stop
// Parse Harmony channels with finish_reason and matched_stop
...
@@ -94,9 +94,7 @@ impl HarmonyResponseProcessor {
...
@@ -94,9 +94,7 @@ impl HarmonyResponseProcessor {
complete
.finish_reason
.clone
(),
complete
.finish_reason
.clone
(),
matched_stop
.clone
(),
matched_stop
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Harmony parsing failed: {}"
,
e
)))
?
;
utils
::
internal_error_message
(
format!
(
"Harmony parsing failed: {}"
,
e
))
})
?
;
// Build response message (assistant)
// Build response message (assistant)
let
message
=
ChatCompletionMessage
{
let
message
=
ChatCompletionMessage
{
...
@@ -195,17 +193,17 @@ impl HarmonyResponseProcessor {
...
@@ -195,17 +193,17 @@ impl HarmonyResponseProcessor {
// Collect all completed responses
// Collect all completed responses
let
all_responses
=
Self
::
collect_responses
(
execution_result
)
.await
?
;
let
all_responses
=
Self
::
collect_responses
(
execution_result
)
.await
?
;
if
all_responses
.is_empty
()
{
if
all_responses
.is_empty
()
{
return
Err
(
utils
::
internal_error
_static
(
"No responses from server"
));
return
Err
(
error
::
internal_error
(
"No responses from server"
));
}
}
// For Responses API, we only process the first response (n=1)
// For Responses API, we only process the first response (n=1)
let
complete
=
all_responses
let
complete
=
all_responses
.first
()
.first
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No complete response"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"No complete response"
))
?
;
// Parse Harmony channels
// Parse Harmony channels
let
mut
parser
=
HarmonyParserAdapter
::
new
()
.map_err
(|
e
|
{
let
mut
parser
=
HarmonyParserAdapter
::
new
()
.map_err
(|
e
|
{
utils
::
internal_error
_message
(
format!
(
"Failed to create Harmony parser: {}"
,
e
))
error
::
internal_error
(
format!
(
"Failed to create Harmony parser: {}"
,
e
))
})
?
;
})
?
;
// Convert matched_stop from proto to JSON
// Convert matched_stop from proto to JSON
...
@@ -224,7 +222,7 @@ impl HarmonyResponseProcessor {
...
@@ -224,7 +222,7 @@ impl HarmonyResponseProcessor {
complete
.finish_reason
.clone
(),
complete
.finish_reason
.clone
(),
matched_stop
,
matched_stop
,
)
)
.map_err
(|
e
|
utils
::
internal_error
_message
(
format!
(
"Harmony parsing failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Harmony parsing failed: {}"
,
e
)))
?
;
// VALIDATION: Check if model incorrectly generated Tool role messages
// VALIDATION: Check if model incorrectly generated Tool role messages
// This happens when the model copies the format of tool result messages
// This happens when the model copies the format of tool result messages
...
...
sgl-router/src/routers/grpc/harmony/responses.rs
View file @
0b41a293
...
@@ -61,10 +61,10 @@ use crate::{
...
@@ -61,10 +61,10 @@ use crate::{
routers
::{
routers
::{
grpc
::{
grpc
::{
context
::
SharedComponents
,
context
::
SharedComponents
,
error
,
harmony
::
processor
::
ResponsesIterationResult
,
harmony
::
processor
::
ResponsesIterationResult
,
pipeline
::
RequestPipeline
,
pipeline
::
RequestPipeline
,
responses
::
streaming
::{
OutputItemType
,
ResponseStreamEventEmitter
},
responses
::
streaming
::{
OutputItemType
,
ResponseStreamEventEmitter
},
utils
,
},
},
openai
::
mcp
::
ensure_request_mcp_client
,
openai
::
mcp
::
ensure_request_mcp_client
,
},
},
...
@@ -285,7 +285,7 @@ pub async fn serve_harmony_responses(
...
@@ -285,7 +285,7 @@ pub async fn serve_harmony_responses(
// Safety check: prevent infinite loops
// Safety check: prevent infinite loops
if
iteration_count
>
MAX_TOOL_ITERATIONS
{
if
iteration_count
>
MAX_TOOL_ITERATIONS
{
return
Err
(
utils
::
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Maximum tool iterations ({}) exceeded"
,
"Maximum tool iterations ({}) exceeded"
,
MAX_TOOL_ITERATIONS
MAX_TOOL_ITERATIONS
)));
)));
...
@@ -333,7 +333,7 @@ pub async fn serve_harmony_responses(
...
@@ -333,7 +333,7 @@ pub async fn serve_harmony_responses(
execute_mcp_tools
(
&
ctx
.mcp_manager
,
&
tool_calls
,
tracking
)
.await
?
execute_mcp_tools
(
&
ctx
.mcp_manager
,
&
tool_calls
,
tracking
)
.await
?
}
else
{
}
else
{
// Should never happen (we only get tool_calls when has_mcp_tools=true)
// Should never happen (we only get tool_calls when has_mcp_tools=true)
return
Err
(
utils
::
internal_error
_static
(
return
Err
(
error
::
internal_error
(
"Tool calls found but MCP tracking not initialized"
,
"Tool calls found but MCP tracking not initialized"
,
));
));
};
};
...
@@ -734,7 +734,7 @@ async fn execute_mcp_tools(
...
@@ -734,7 +734,7 @@ async fn execute_mcp_tools(
// Parse tool arguments from JSON string
// Parse tool arguments from JSON string
let
args_str
=
tool_call
.function.arguments
.as_deref
()
.unwrap_or
(
"{}"
);
let
args_str
=
tool_call
.function.arguments
.as_deref
()
.unwrap_or
(
"{}"
);
let
args
:
JsonValue
=
serde_json
::
from_str
(
args_str
)
.map_err
(|
e
|
{
let
args
:
JsonValue
=
serde_json
::
from_str
(
args_str
)
.map_err
(|
e
|
{
utils
::
internal_error
_message
(
format!
(
error
::
internal_error
(
format!
(
"Invalid tool arguments JSON for tool '{}': {}"
,
"Invalid tool arguments JSON for tool '{}': {}"
,
tool_call
.function.name
,
e
tool_call
.function.name
,
e
))
))
...
@@ -1111,7 +1111,7 @@ async fn load_previous_messages(
...
@@ -1111,7 +1111,7 @@ async fn load_previous_messages(
.get_response_chain
(
&
prev_id
,
None
)
.get_response_chain
(
&
prev_id
,
None
)
.await
.await
.map_err
(|
e
|
{
.map_err
(|
e
|
{
utils
::
internal_error
_message
(
format!
(
error
::
internal_error
(
format!
(
"Failed to load previous response chain for {}: {}"
,
"Failed to load previous response chain for {}: {}"
,
prev_id_str
,
e
prev_id_str
,
e
))
))
...
...
sgl-router/src/routers/grpc/harmony/stages/preparation.rs
View file @
0b41a293
...
@@ -13,6 +13,7 @@ use crate::{
...
@@ -13,6 +13,7 @@ use crate::{
},
},
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
PreparationOutput
,
RequestContext
,
RequestType
},
context
::{
PreparationOutput
,
RequestContext
,
RequestType
},
error
,
stages
::
PipelineStage
,
stages
::
PipelineStage
,
utils
,
utils
,
},
},
...
@@ -56,7 +57,7 @@ impl PipelineStage for HarmonyPreparationStage {
...
@@ -56,7 +57,7 @@ impl PipelineStage for HarmonyPreparationStage {
let
request_arc
=
ctx
.responses_request_arc
();
let
request_arc
=
ctx
.responses_request_arc
();
self
.prepare_responses
(
ctx
,
&
request_arc
)
.await
?
;
self
.prepare_responses
(
ctx
,
&
request_arc
)
.await
?
;
}
else
{
}
else
{
return
Err
(
utils
::
bad_request
_error
(
return
Err
(
error
::
bad_request
(
"Only Chat and Responses requests supported in Harmony pipeline"
.to_string
(),
"Only Chat and Responses requests supported in Harmony pipeline"
.to_string
(),
));
));
}
}
...
@@ -78,7 +79,7 @@ impl HarmonyPreparationStage {
...
@@ -78,7 +79,7 @@ impl HarmonyPreparationStage {
)
->
Result
<
Option
<
Response
>
,
Response
>
{
)
->
Result
<
Option
<
Response
>
,
Response
>
{
// Validate - reject logprobs
// Validate - reject logprobs
if
request
.logprobs
{
if
request
.logprobs
{
return
Err
(
utils
::
bad_request
_error
(
return
Err
(
error
::
bad_request
(
"logprobs are not supported for Harmony models"
.to_string
(),
"logprobs are not supported for Harmony models"
.to_string
(),
));
));
}
}
...
@@ -97,7 +98,7 @@ impl HarmonyPreparationStage {
...
@@ -97,7 +98,7 @@ impl HarmonyPreparationStage {
let
build_output
=
self
let
build_output
=
self
.builder
.builder
.build_from_chat
(
&
body_ref
)
.build_from_chat
(
&
body_ref
)
.map_err
(|
e
|
utils
::
bad_request
_error
(
format!
(
"Harmony build failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Harmony build failed: {}"
,
e
)))
?
;
// Step 4: Store results
// Step 4: Store results
ctx
.state.preparation
=
Some
(
PreparationOutput
{
ctx
.state.preparation
=
Some
(
PreparationOutput
{
...
@@ -132,7 +133,7 @@ impl HarmonyPreparationStage {
...
@@ -132,7 +133,7 @@ impl HarmonyPreparationStage {
let
build_output
=
self
let
build_output
=
self
.builder
.builder
.build_from_responses
(
request
)
.build_from_responses
(
request
)
.map_err
(|
e
|
utils
::
bad_request
_error
(
format!
(
"Harmony build failed: {}"
,
e
)))
?
;
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Harmony build failed: {}"
,
e
)))
?
;
// Store results in preparation output
// Store results in preparation output
ctx
.state.preparation
=
Some
(
PreparationOutput
{
ctx
.state.preparation
=
Some
(
PreparationOutput
{
...
@@ -202,7 +203,7 @@ impl HarmonyPreparationStage {
...
@@ -202,7 +203,7 @@ impl HarmonyPreparationStage {
// Validate specific function exists
// Validate specific function exists
if
specific_function
.is_some
()
&&
tools_to_use
.is_empty
()
{
if
specific_function
.is_some
()
&&
tools_to_use
.is_empty
()
{
return
Err
(
Box
::
new
(
utils
::
bad_request
_error
(
format!
(
return
Err
(
Box
::
new
(
error
::
bad_request
(
format!
(
"Tool '{}' not found in tools list"
,
"Tool '{}' not found in tools list"
,
specific_function
.unwrap
()
specific_function
.unwrap
()
))));
))));
...
@@ -236,7 +237,7 @@ impl HarmonyPreparationStage {
...
@@ -236,7 +237,7 @@ impl HarmonyPreparationStage {
});
});
serde_json
::
to_string
(
&
structural_tag
)
.map_err
(|
e
|
{
serde_json
::
to_string
(
&
structural_tag
)
.map_err
(|
e
|
{
Box
::
new
(
utils
::
internal_error
_message
(
format!
(
Box
::
new
(
error
::
internal_error
(
format!
(
"Failed to serialize structural tag: {}"
,
"Failed to serialize structural tag: {}"
,
e
e
)))
)))
...
...
sgl-router/src/routers/grpc/harmony/stages/request_building.rs
View file @
0b41a293
...
@@ -13,8 +13,8 @@ use crate::{
...
@@ -13,8 +13,8 @@ use crate::{
grpc_client
::
proto
::{
DisaggregatedParams
,
GenerateRequest
},
grpc_client
::
proto
::{
DisaggregatedParams
,
GenerateRequest
},
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
ClientSelection
,
RequestContext
,
RequestType
,
WorkerSelection
},
context
::{
ClientSelection
,
RequestContext
,
RequestType
,
WorkerSelection
},
error
,
stages
::
PipelineStage
,
stages
::
PipelineStage
,
utils
,
},
},
};
};
...
@@ -69,14 +69,14 @@ impl PipelineStage for HarmonyRequestBuildingStage {
...
@@ -69,14 +69,14 @@ impl PipelineStage for HarmonyRequestBuildingStage {
.state
.state
.preparation
.preparation
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Preparation not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Preparation not completed"
))
?
;
// Get clients
// Get clients
let
clients
=
ctx
let
clients
=
ctx
.state
.state
.clients
.clients
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Client acquisition not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Client acquisition not completed"
))
?
;
let
builder_client
=
match
clients
{
let
builder_client
=
match
clients
{
ClientSelection
::
Single
{
client
}
=>
client
,
ClientSelection
::
Single
{
client
}
=>
client
,
ClientSelection
::
Dual
{
prefill
,
..
}
=>
prefill
,
ClientSelection
::
Dual
{
prefill
,
..
}
=>
prefill
,
...
@@ -87,7 +87,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
...
@@ -87,7 +87,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
RequestType
::
Chat
(
_
)
=>
format!
(
"chatcmpl-{}"
,
Uuid
::
new_v4
()),
RequestType
::
Chat
(
_
)
=>
format!
(
"chatcmpl-{}"
,
Uuid
::
new_v4
()),
RequestType
::
Responses
(
_
)
=>
format!
(
"responses-{}"
,
Uuid
::
new_v4
()),
RequestType
::
Responses
(
_
)
=>
format!
(
"responses-{}"
,
Uuid
::
new_v4
()),
RequestType
::
Generate
(
_
)
=>
{
RequestType
::
Generate
(
_
)
=>
{
return
Err
(
utils
::
bad_request
_error
(
return
Err
(
error
::
bad_request
(
"Generate requests are not supported with Harmony models"
.to_string
(),
"Generate requests are not supported with Harmony models"
.to_string
(),
));
));
}
}
...
@@ -111,9 +111,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
...
@@ -111,9 +111,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
None
,
None
,
prep
.tool_constraints
.clone
(),
prep
.tool_constraints
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Invalid request parameters: {}"
,
e
)))
?
utils
::
bad_request_error
(
format!
(
"Invalid request parameters: {}"
,
e
))
})
?
}
}
RequestType
::
Responses
(
request
)
=>
builder_client
RequestType
::
Responses
(
request
)
=>
builder_client
.build_generate_request_from_responses
(
.build_generate_request_from_responses
(
...
@@ -123,9 +121,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
...
@@ -123,9 +121,7 @@ impl PipelineStage for HarmonyRequestBuildingStage {
prep
.token_ids
.clone
(),
prep
.token_ids
.clone
(),
prep
.harmony_stop_ids
.clone
(),
prep
.harmony_stop_ids
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Invalid request parameters: {}"
,
e
)))
?
,
utils
::
bad_request_error
(
format!
(
"Invalid request parameters: {}"
,
e
))
})
?
,
_
=>
unreachable!
(),
_
=>
unreachable!
(),
};
};
...
...
sgl-router/src/routers/grpc/harmony/stages/response_processing.rs
View file @
0b41a293
...
@@ -8,8 +8,8 @@ use axum::response::Response;
...
@@ -8,8 +8,8 @@ use axum::response::Response;
use
super
::
super
::{
HarmonyResponseProcessor
,
HarmonyStreamingProcessor
};
use
super
::
super
::{
HarmonyResponseProcessor
,
HarmonyStreamingProcessor
};
use
crate
::
routers
::
grpc
::{
use
crate
::
routers
::
grpc
::{
context
::{
FinalResponse
,
RequestContext
,
RequestType
},
context
::{
FinalResponse
,
RequestContext
,
RequestType
},
error
,
stages
::
PipelineStage
,
stages
::
PipelineStage
,
utils
,
};
};
/// Harmony Response Processing stage: Parse and format Harmony responses
/// Harmony Response Processing stage: Parse and format Harmony responses
...
@@ -51,14 +51,14 @@ impl PipelineStage for HarmonyResponseProcessingStage {
...
@@ -51,14 +51,14 @@ impl PipelineStage for HarmonyResponseProcessingStage {
.response
.response
.execution_result
.execution_result
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No execution result"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"No execution result"
))
?
;
let
dispatch
=
ctx
let
dispatch
=
ctx
.state
.state
.dispatch
.dispatch
.as_ref
()
.as_ref
()
.cloned
()
.cloned
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Dispatch metadata not set"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Dispatch metadata not set"
))
?
;
// For streaming, delegate to streaming processor and return SSE response
// For streaming, delegate to streaming processor and return SSE response
if
is_streaming
{
if
is_streaming
{
...
@@ -97,14 +97,14 @@ impl PipelineStage for HarmonyResponseProcessingStage {
...
@@ -97,14 +97,14 @@ impl PipelineStage for HarmonyResponseProcessingStage {
.response
.response
.execution_result
.execution_result
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No execution result"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"No execution result"
))
?
;
let
dispatch
=
ctx
let
dispatch
=
ctx
.state
.state
.dispatch
.dispatch
.as_ref
()
.as_ref
()
.cloned
()
.cloned
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Dispatch metadata not set"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Dispatch metadata not set"
))
?
;
let
responses_request
=
ctx
.responses_request_arc
();
let
responses_request
=
ctx
.responses_request_arc
();
let
iteration_result
=
self
let
iteration_result
=
self
...
@@ -115,7 +115,7 @@ impl PipelineStage for HarmonyResponseProcessingStage {
...
@@ -115,7 +115,7 @@ impl PipelineStage for HarmonyResponseProcessingStage {
ctx
.state.response.responses_iteration_result
=
Some
(
iteration_result
);
ctx
.state.response.responses_iteration_result
=
Some
(
iteration_result
);
Ok
(
None
)
Ok
(
None
)
}
}
RequestType
::
Generate
(
_
)
=>
Err
(
utils
::
internal_error
_static
(
RequestType
::
Generate
(
_
)
=>
Err
(
error
::
internal_error
(
"Generate requests not supported in Harmony pipeline"
,
"Generate requests not supported in Harmony pipeline"
,
)),
)),
}
}
...
...
sgl-router/src/routers/grpc/mod.rs
View file @
0b41a293
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
use
crate
::{
grpc_client
::
proto
,
protocols
::
common
::
StringOrArray
};
use
crate
::{
grpc_client
::
proto
,
protocols
::
common
::
StringOrArray
};
pub
mod
context
;
pub
mod
context
;
pub
mod
error
;
pub
mod
harmony
;
pub
mod
harmony
;
pub
mod
pd_router
;
pub
mod
pd_router
;
pub
mod
pipeline
;
pub
mod
pipeline
;
...
...
sgl-router/src/routers/grpc/pipeline.rs
View file @
0b41a293
...
@@ -11,7 +11,7 @@ use tracing::{debug, error};
...
@@ -11,7 +11,7 @@ use tracing::{debug, error};
// Import all stage types from the stages module
// Import all stage types from the stages module
use
super
::
stages
::
*
;
use
super
::
stages
::
*
;
use
super
::{
context
::
*
,
harmony
,
processing
,
responses
::
BackgroundTaskInfo
,
streaming
,
utils
};
use
super
::{
context
::
*
,
error
,
harmony
,
processing
,
responses
::
BackgroundTaskInfo
,
streaming
};
use
crate
::{
use
crate
::{
core
::
WorkerRegistry
,
core
::
WorkerRegistry
,
policies
::
PolicyRegistry
,
policies
::
PolicyRegistry
,
...
@@ -228,9 +228,9 @@ impl RequestPipeline {
...
@@ -228,9 +228,9 @@ impl RequestPipeline {
match
ctx
.state.response.final_response
{
match
ctx
.state.response.final_response
{
Some
(
FinalResponse
::
Chat
(
response
))
=>
axum
::
Json
(
response
)
.into_response
(),
Some
(
FinalResponse
::
Chat
(
response
))
=>
axum
::
Json
(
response
)
.into_response
(),
Some
(
FinalResponse
::
Generate
(
_
))
=>
{
Some
(
FinalResponse
::
Generate
(
_
))
=>
{
utils
::
internal_error
_static
(
"Internal error: wrong response type"
)
error
::
internal_error
(
"Internal error: wrong response type"
)
}
}
None
=>
utils
::
internal_error
_static
(
"No response produced"
),
None
=>
error
::
internal_error
(
"No response produced"
),
}
}
}
}
...
@@ -272,9 +272,9 @@ impl RequestPipeline {
...
@@ -272,9 +272,9 @@ impl RequestPipeline {
match
ctx
.state.response.final_response
{
match
ctx
.state.response.final_response
{
Some
(
FinalResponse
::
Generate
(
response
))
=>
axum
::
Json
(
response
)
.into_response
(),
Some
(
FinalResponse
::
Generate
(
response
))
=>
axum
::
Json
(
response
)
.into_response
(),
Some
(
FinalResponse
::
Chat
(
_
))
=>
{
Some
(
FinalResponse
::
Chat
(
_
))
=>
{
utils
::
internal_error
_static
(
"Internal error: wrong response type"
)
error
::
internal_error
(
"Internal error: wrong response type"
)
}
}
None
=>
utils
::
internal_error
_static
(
"No response produced"
),
None
=>
error
::
internal_error
(
"No response produced"
),
}
}
}
}
...
@@ -303,7 +303,7 @@ impl RequestPipeline {
...
@@ -303,7 +303,7 @@ impl RequestPipeline {
match
stage
.execute
(
&
mut
ctx
)
.await
{
match
stage
.execute
(
&
mut
ctx
)
.await
{
Ok
(
Some
(
_
response
))
=>
{
Ok
(
Some
(
_
response
))
=>
{
// Streaming not supported for responses sync mode
// Streaming not supported for responses sync mode
return
Err
(
utils
::
bad_request
_error
(
return
Err
(
error
::
bad_request
(
"Streaming is not supported in this context"
.to_string
(),
"Streaming is not supported in this context"
.to_string
(),
));
));
}
}
...
@@ -360,10 +360,10 @@ impl RequestPipeline {
...
@@ -360,10 +360,10 @@ impl RequestPipeline {
// Extract final response
// Extract final response
match
ctx
.state.response.final_response
{
match
ctx
.state.response.final_response
{
Some
(
FinalResponse
::
Chat
(
response
))
=>
Ok
(
response
),
Some
(
FinalResponse
::
Chat
(
response
))
=>
Ok
(
response
),
Some
(
FinalResponse
::
Generate
(
_
))
=>
Err
(
utils
::
internal_error_static
(
Some
(
FinalResponse
::
Generate
(
_
))
=>
{
"Internal error: wrong response type"
,
Err
(
error
::
internal_error
(
"Internal error: wrong response type"
))
)),
}
None
=>
Err
(
utils
::
internal_error
_static
(
"No response produced"
)),
None
=>
Err
(
error
::
internal_error
(
"No response produced"
)),
}
}
}
}
...
@@ -384,7 +384,7 @@ impl RequestPipeline {
...
@@ -384,7 +384,7 @@ impl RequestPipeline {
_
model_id
:
Option
<
String
>
,
_
model_id
:
Option
<
String
>
,
_
components
:
Arc
<
SharedComponents
>
,
_
components
:
Arc
<
SharedComponents
>
,
)
->
Response
{
)
->
Response
{
utils
::
internal_error
_static
(
"Responses API execution not yet implemented"
)
error
::
internal_error
(
"Responses API execution not yet implemented"
)
}
}
/// Execute Harmony Responses API request through all pipeline stages
/// Execute Harmony Responses API request through all pipeline stages
...
@@ -451,7 +451,7 @@ impl RequestPipeline {
...
@@ -451,7 +451,7 @@ impl RequestPipeline {
.responses_iteration_result
.responses_iteration_result
.take
()
.take
()
.ok_or_else
(||
{
.ok_or_else
(||
{
utils
::
internal_error
_static
(
"No ResponsesIterationResult produced by pipeline"
)
error
::
internal_error
(
"No ResponsesIterationResult produced by pipeline"
)
})
})
}
}
...
@@ -501,6 +501,6 @@ impl RequestPipeline {
...
@@ -501,6 +501,6 @@ impl RequestPipeline {
.response
.response
.execution_result
.execution_result
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No ExecutionResult produced by pipeline"
))
.ok_or_else
(||
error
::
internal_error
(
"No ExecutionResult produced by pipeline"
))
}
}
}
}
sgl-router/src/routers/grpc/processing.rs
View file @
0b41a293
...
@@ -11,7 +11,7 @@ use tracing::error;
...
@@ -11,7 +11,7 @@ use tracing::error;
use
super
::{
use
super
::{
context
::{
DispatchMetadata
,
ExecutionResult
},
context
::{
DispatchMetadata
,
ExecutionResult
},
utils
,
error
,
utils
,
};
};
use
crate
::{
use
crate
::{
grpc_client
::
proto
,
grpc_client
::
proto
,
...
@@ -104,7 +104,7 @@ impl ResponseProcessor {
...
@@ -104,7 +104,7 @@ impl ResponseProcessor {
};
};
if
all_responses
.is_empty
()
{
if
all_responses
.is_empty
()
{
return
Err
(
utils
::
internal_error
_static
(
"No responses from server"
));
return
Err
(
error
::
internal_error
(
"No responses from server"
));
}
}
Ok
(
all_responses
)
Ok
(
all_responses
)
...
@@ -332,7 +332,7 @@ impl ResponseProcessor {
...
@@ -332,7 +332,7 @@ impl ResponseProcessor {
{
{
Ok
(
choice
)
=>
choices
.push
(
choice
),
Ok
(
choice
)
=>
choices
.push
(
choice
),
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Failed to process choice {}: {}"
,
"Failed to process choice {}: {}"
,
index
,
e
index
,
e
)));
)));
...
@@ -447,7 +447,7 @@ impl ResponseProcessor {
...
@@ -447,7 +447,7 @@ impl ResponseProcessor {
let
outputs
=
match
stop_decoder
.process_tokens
(
&
complete
.output_ids
)
{
let
outputs
=
match
stop_decoder
.process_tokens
(
&
complete
.output_ids
)
{
Ok
(
outputs
)
=>
outputs
,
Ok
(
outputs
)
=>
outputs
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Failed to process tokens: {}"
,
"Failed to process tokens: {}"
,
e
e
)))
)))
...
...
sgl-router/src/routers/grpc/responses/handlers.rs
View file @
0b41a293
...
@@ -67,7 +67,10 @@ use crate::{
...
@@ -67,7 +67,10 @@ use crate::{
ResponseStatus
,
ResponsesRequest
,
ResponsesResponse
,
ResponsesUsage
,
ResponseStatus
,
ResponsesRequest
,
ResponsesResponse
,
ResponsesUsage
,
},
},
},
},
routers
::
openai
::{
conversations
::
persist_conversation_items
,
mcp
::
ensure_request_mcp_client
},
routers
::{
grpc
::
error
,
openai
::{
conversations
::
persist_conversation_items
,
mcp
::
ensure_request_mcp_client
},
},
};
};
// ============================================================================
// ============================================================================
...
@@ -863,11 +866,9 @@ async fn execute_without_mcp(
...
@@ -863,11 +866,9 @@ async fn execute_without_mcp(
model_id
:
Option
<
String
>
,
model_id
:
Option
<
String
>
,
response_id
:
Option
<
String
>
,
response_id
:
Option
<
String
>
,
)
->
Result
<
ResponsesResponse
,
Response
>
{
)
->
Result
<
ResponsesResponse
,
Response
>
{
use
crate
::
routers
::
grpc
::
utils
;
// Convert ResponsesRequest → ChatCompletionRequest
// Convert ResponsesRequest → ChatCompletionRequest
let
chat_request
=
conversions
::
responses_to_chat
(
modified_request
)
let
chat_request
=
conversions
::
responses_to_chat
(
modified_request
)
.map_err
(|
e
|
utils
::
bad_request
_error
(
format!
(
"Failed to convert request: {}"
,
e
)))
?
;
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Failed to convert request: {}"
,
e
)))
?
;
// Execute chat pipeline (errors already have proper HTTP status codes)
// Execute chat pipeline (errors already have proper HTTP status codes)
let
chat_response
=
ctx
let
chat_response
=
ctx
...
@@ -883,9 +884,8 @@ async fn execute_without_mcp(
...
@@ -883,9 +884,8 @@ async fn execute_without_mcp(
.await
?
;
// Preserve the Response error as-is
.await
?
;
// Preserve the Response error as-is
// Convert ChatCompletionResponse → ResponsesResponse
// Convert ChatCompletionResponse → ResponsesResponse
conversions
::
chat_to_responses
(
&
chat_response
,
original_request
,
response_id
)
.map_err
(|
e
|
{
conversions
::
chat_to_responses
(
&
chat_response
,
original_request
,
response_id
)
utils
::
internal_error_message
(
format!
(
"Failed to convert to responses format: {}"
,
e
))
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Failed to convert to responses format: {}"
,
e
)))
})
}
}
/// Load conversation history and response chains, returning modified request
/// Load conversation history and response chains, returning modified request
...
@@ -962,15 +962,10 @@ async fn load_conversation_history(
...
@@ -962,15 +962,10 @@ async fn load_conversation_history(
.conversation_storage
.conversation_storage
.get_conversation
(
&
conv_id
)
.get_conversation
(
&
conv_id
)
.await
.await
.map_err
(|
e
|
{
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Failed to check conversation: {}"
,
e
)))
?
;
crate
::
routers
::
grpc
::
utils
::
internal_error_message
(
format!
(
"Failed to check conversation: {}"
,
e
))
})
?
;
if
conversation
.is_none
()
{
if
conversation
.is_none
()
{
return
Err
(
crate
::
routers
::
grpc
::
utils
::
bad_request_error
(
format!
(
return
Err
(
error
::
not_found
(
format!
(
"Conversation '{}' not found. Please create the conversation first using the conversations API."
,
"Conversation '{}' not found. Please create the conversation first using the conversations API."
,
conv_id_str
conv_id_str
)));
)));
...
...
sgl-router/src/routers/grpc/responses/tool_loop.rs
View file @
0b41a293
...
@@ -19,6 +19,7 @@ use tracing::{debug, warn};
...
@@ -19,6 +19,7 @@ use tracing::{debug, warn};
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
super
::{
use
super
::{
super
::
error
,
conversions
,
conversions
,
streaming
::{
OutputItemType
,
ResponseStreamEventEmitter
},
streaming
::{
OutputItemType
,
ResponseStreamEventEmitter
},
};
};
...
@@ -247,12 +248,8 @@ pub(super) async fn execute_tool_loop(
...
@@ -247,12 +248,8 @@ pub(super) async fn execute_tool_loop(
loop
{
loop
{
// Convert to chat request
// Convert to chat request
let
mut
chat_request
=
conversions
::
responses_to_chat
(
&
current_request
)
.map_err
(|
e
|
{
let
mut
chat_request
=
conversions
::
responses_to_chat
(
&
current_request
)
crate
::
routers
::
grpc
::
utils
::
bad_request_error
(
format!
(
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Failed to convert request: {}"
,
e
)))
?
;
"Failed to convert request: {}"
,
e
))
})
?
;
// Add MCP tools to chat request so LLM knows about them
// Add MCP tools to chat request so LLM knows about them
chat_request
.tools
=
Some
(
chat_tools
.clone
());
chat_request
.tools
=
Some
(
chat_tools
.clone
());
...
@@ -301,10 +298,7 @@ pub(super) async fn execute_tool_loop(
...
@@ -301,10 +298,7 @@ pub(super) async fn execute_tool_loop(
response_id
.clone
(),
response_id
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
{
crate
::
routers
::
grpc
::
utils
::
internal_error_message
(
format!
(
error
::
internal_error
(
format!
(
"Failed to convert to responses format: {}"
,
e
))
"Failed to convert to responses format: {}"
,
e
))
})
?
;
})
?
;
// Mark as completed but with incomplete details
// Mark as completed but with incomplete details
...
@@ -423,10 +417,7 @@ pub(super) async fn execute_tool_loop(
...
@@ -423,10 +417,7 @@ pub(super) async fn execute_tool_loop(
response_id
.clone
(),
response_id
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
{
crate
::
routers
::
grpc
::
utils
::
internal_error_message
(
format!
(
error
::
internal_error
(
format!
(
"Failed to convert to responses format: {}"
,
e
))
"Failed to convert to responses format: {}"
,
e
))
})
?
;
})
?
;
// Inject MCP metadata into output
// Inject MCP metadata into output
...
...
sgl-router/src/routers/grpc/stages/client_acquisition.rs
View file @
0b41a293
...
@@ -6,7 +6,7 @@ use axum::response::Response;
...
@@ -6,7 +6,7 @@ use axum::response::Response;
use
super
::
PipelineStage
;
use
super
::
PipelineStage
;
use
crate
::
routers
::
grpc
::{
use
crate
::
routers
::
grpc
::{
context
::{
ClientSelection
,
RequestContext
,
WorkerSelection
},
context
::{
ClientSelection
,
RequestContext
,
WorkerSelection
},
utils
,
error
,
utils
,
};
};
/// Client acquisition stage: Get gRPC clients from selected workers
/// Client acquisition stage: Get gRPC clients from selected workers
...
@@ -19,7 +19,7 @@ impl PipelineStage for ClientAcquisitionStage {
...
@@ -19,7 +19,7 @@ impl PipelineStage for ClientAcquisitionStage {
.state
.state
.workers
.workers
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Worker selection not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Worker selection not completed"
))
?
;
let
clients
=
match
workers
{
let
clients
=
match
workers
{
WorkerSelection
::
Single
{
worker
}
=>
{
WorkerSelection
::
Single
{
worker
}
=>
{
...
...
sgl-router/src/routers/grpc/stages/dispatch_metadata.rs
View file @
0b41a293
...
@@ -8,7 +8,7 @@ use axum::response::Response;
...
@@ -8,7 +8,7 @@ use axum::response::Response;
use
super
::
PipelineStage
;
use
super
::
PipelineStage
;
use
crate
::
routers
::
grpc
::{
use
crate
::
routers
::
grpc
::{
context
::{
DispatchMetadata
,
RequestContext
,
RequestType
,
WorkerSelection
},
context
::{
DispatchMetadata
,
RequestContext
,
RequestType
,
WorkerSelection
},
utils
,
error
,
};
};
/// Dispatch metadata stage: Prepare metadata for dispatch
/// Dispatch metadata stage: Prepare metadata for dispatch
...
@@ -21,7 +21,7 @@ impl PipelineStage for DispatchMetadataStage {
...
@@ -21,7 +21,7 @@ impl PipelineStage for DispatchMetadataStage {
.state
.state
.proto_request
.proto_request
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Proto request not built"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Proto request not built"
))
?
;
let
request_id
=
proto_request
.request_id
.clone
();
let
request_id
=
proto_request
.request_id
.clone
();
let
model
=
match
&
ctx
.input.request_type
{
let
model
=
match
&
ctx
.input.request_type
{
...
...
sgl-router/src/routers/grpc/stages/preparation.rs
View file @
0b41a293
...
@@ -10,7 +10,7 @@ use crate::{
...
@@ -10,7 +10,7 @@ use crate::{
protocols
::{
chat
::
ChatCompletionRequest
,
common
::
InputIds
,
generate
::
GenerateRequest
},
protocols
::{
chat
::
ChatCompletionRequest
,
common
::
InputIds
,
generate
::
GenerateRequest
},
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
PreparationOutput
,
RequestContext
,
RequestType
},
context
::{
PreparationOutput
,
RequestContext
,
RequestType
},
utils
,
error
,
utils
,
},
},
tokenizer
::
traits
::
Tokenizer
,
tokenizer
::
traits
::
Tokenizer
,
};
};
...
@@ -56,7 +56,7 @@ impl PreparationStage {
...
@@ -56,7 +56,7 @@ impl PreparationStage {
match
utils
::
process_chat_messages
(
&
body_ref
,
&*
ctx
.components.tokenizer
)
{
match
utils
::
process_chat_messages
(
&
body_ref
,
&*
ctx
.components.tokenizer
)
{
Ok
(
msgs
)
=>
msgs
,
Ok
(
msgs
)
=>
msgs
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
bad_request
_error
(
e
));
return
Err
(
error
::
bad_request
(
e
));
}
}
};
};
...
@@ -64,10 +64,7 @@ impl PreparationStage {
...
@@ -64,10 +64,7 @@ impl PreparationStage {
let
encoding
=
match
ctx
.components.tokenizer
.encode
(
&
processed_messages
.text
)
{
let
encoding
=
match
ctx
.components.tokenizer
.encode
(
&
processed_messages
.text
)
{
Ok
(
encoding
)
=>
encoding
,
Ok
(
encoding
)
=>
encoding
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
internal_error_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Tokenization failed: {}"
,
e
)));
"Tokenization failed: {}"
,
e
)));
}
}
};
};
...
@@ -75,9 +72,8 @@ impl PreparationStage {
...
@@ -75,9 +72,8 @@ impl PreparationStage {
// Step 4: Build tool constraints if needed
// Step 4: Build tool constraints if needed
let
tool_call_constraint
=
if
let
Some
(
tools
)
=
body_ref
.tools
.as_ref
()
{
let
tool_call_constraint
=
if
let
Some
(
tools
)
=
body_ref
.tools
.as_ref
()
{
utils
::
generate_tool_constraints
(
tools
,
&
request
.tool_choice
,
&
request
.model
)
.map_err
(
utils
::
generate_tool_constraints
(
tools
,
&
request
.tool_choice
,
&
request
.model
)
|
e
|
utils
::
bad_request_error
(
format!
(
"Invalid tool configuration: {}"
,
e
)),
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Invalid tool configuration: {}"
,
e
)))
?
)
?
}
else
{
}
else
{
None
None
};
};
...
@@ -124,7 +120,7 @@ impl PreparationStage {
...
@@ -124,7 +120,7 @@ impl PreparationStage {
let
(
original_text
,
token_ids
)
=
match
self
.resolve_generate_input
(
ctx
,
request
)
{
let
(
original_text
,
token_ids
)
=
match
self
.resolve_generate_input
(
ctx
,
request
)
{
Ok
(
res
)
=>
res
,
Ok
(
res
)
=>
res
,
Err
(
msg
)
=>
{
Err
(
msg
)
=>
{
return
Err
(
utils
::
bad_request
_error
(
msg
));
return
Err
(
error
::
bad_request
(
msg
));
}
}
};
};
...
...
sgl-router/src/routers/grpc/stages/request_building.rs
View file @
0b41a293
...
@@ -15,7 +15,7 @@ use crate::{
...
@@ -15,7 +15,7 @@ use crate::{
grpc_client
::
proto
,
grpc_client
::
proto
,
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
ClientSelection
,
RequestContext
,
RequestType
,
WorkerSelection
},
context
::{
ClientSelection
,
RequestContext
,
RequestType
,
WorkerSelection
},
utils
,
error
,
},
},
};
};
...
@@ -37,13 +37,13 @@ impl PipelineStage for RequestBuildingStage {
...
@@ -37,13 +37,13 @@ impl PipelineStage for RequestBuildingStage {
.state
.state
.preparation
.preparation
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Preparation not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Preparation not completed"
))
?
;
let
clients
=
ctx
let
clients
=
ctx
.state
.state
.clients
.clients
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Client acquisition not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Client acquisition not completed"
))
?
;
// Get client for building request (use prefill client if PD mode)
// Get client for building request (use prefill client if PD mode)
let
builder_client
=
match
clients
{
let
builder_client
=
match
clients
{
...
@@ -69,9 +69,7 @@ impl PipelineStage for RequestBuildingStage {
...
@@ -69,9 +69,7 @@ impl PipelineStage for RequestBuildingStage {
.clone
(),
.clone
(),
prep
.tool_constraints
.clone
(),
prep
.tool_constraints
.clone
(),
)
)
.map_err
(|
e
|
{
.map_err
(|
e
|
error
::
bad_request
(
format!
(
"Invalid request parameters: {}"
,
e
)))
?
utils
::
bad_request_error
(
format!
(
"Invalid request parameters: {}"
,
e
))
})
?
}
}
RequestType
::
Generate
(
request
)
=>
{
RequestType
::
Generate
(
request
)
=>
{
let
request_id
=
request
let
request_id
=
request
...
@@ -86,7 +84,7 @@ impl PipelineStage for RequestBuildingStage {
...
@@ -86,7 +84,7 @@ impl PipelineStage for RequestBuildingStage {
prep
.original_text
.clone
(),
prep
.original_text
.clone
(),
prep
.token_ids
.clone
(),
prep
.token_ids
.clone
(),
)
)
.map_err
(
utils
::
bad_request
_error
)
?
.map_err
(
error
::
bad_request
)
?
}
}
RequestType
::
Responses
(
_
request
)
=>
{
RequestType
::
Responses
(
_
request
)
=>
{
// Responses API builds request during the MCP loop
// Responses API builds request during the MCP loop
...
...
sgl-router/src/routers/grpc/stages/request_execution.rs
View file @
0b41a293
...
@@ -5,13 +5,15 @@ use axum::response::Response;
...
@@ -5,13 +5,15 @@ use axum::response::Response;
use
super
::
PipelineStage
;
use
super
::
PipelineStage
;
use
crate
::{
use
crate
::{
grpc_client
::
proto
,
grpc_client
::
{
proto
,
sglang_scheduler
::
AbortOnDropStream
},
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
ClientSelection
,
ExecutionResult
,
RequestContext
},
context
::{
ClientSelection
,
ExecutionResult
,
RequestContext
},
utils
,
error
,
},
},
};
};
type
StreamResult
=
Result
<
AbortOnDropStream
,
Box
<
dyn
std
::
error
::
Error
+
Send
+
Sync
>>
;
/// Request execution stage: Execute gRPC requests (single or dual dispatch)
/// Request execution stage: Execute gRPC requests (single or dual dispatch)
pub
struct
RequestExecutionStage
{
pub
struct
RequestExecutionStage
{
mode
:
ExecutionMode
,
mode
:
ExecutionMode
,
...
@@ -37,13 +39,13 @@ impl PipelineStage for RequestExecutionStage {
...
@@ -37,13 +39,13 @@ impl PipelineStage for RequestExecutionStage {
.state
.state
.proto_request
.proto_request
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Proto request not built"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Proto request not built"
))
?
;
let
clients
=
ctx
let
clients
=
ctx
.state
.state
.clients
.clients
.as_mut
()
.as_mut
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Client acquisition not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Client acquisition not completed"
))
?
;
let
result
=
match
self
.mode
{
let
result
=
match
self
.mode
{
ExecutionMode
::
Single
=>
self
.execute_single
(
proto_request
,
clients
)
.await
?
,
ExecutionMode
::
Single
=>
self
.execute_single
(
proto_request
,
clients
)
.await
?
,
...
@@ -70,11 +72,12 @@ impl RequestExecutionStage {
...
@@ -70,11 +72,12 @@ impl RequestExecutionStage {
)
->
Result
<
ExecutionResult
,
Response
>
{
)
->
Result
<
ExecutionResult
,
Response
>
{
let
client
=
clients
let
client
=
clients
.single_mut
()
.single_mut
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Expected single client but got dual"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Expected single client but got dual"
))
?
;
let
stream
=
client
.generate
(
proto_request
)
.await
.map_err
(|
e
|
{
let
stream
=
client
utils
::
internal_error_message
(
format!
(
"Failed to start generation: {}"
,
e
))
.generate
(
proto_request
)
})
?
;
.await
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Failed to start generation: {}"
,
e
)))
?
;
Ok
(
ExecutionResult
::
Single
{
stream
})
Ok
(
ExecutionResult
::
Single
{
stream
})
}
}
...
@@ -86,12 +89,12 @@ impl RequestExecutionStage {
...
@@ -86,12 +89,12 @@ impl RequestExecutionStage {
)
->
Result
<
ExecutionResult
,
Response
>
{
)
->
Result
<
ExecutionResult
,
Response
>
{
let
(
prefill_client
,
decode_client
)
=
clients
let
(
prefill_client
,
decode_client
)
=
clients
.dual_mut
()
.dual_mut
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Expected dual clients but got single"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Expected dual clients but got single"
))
?
;
let
prefill_request
=
proto_request
.clone
();
let
prefill_request
=
proto_request
.clone
();
let
decode_request
=
proto_request
;
let
decode_request
=
proto_request
;
let
(
prefill_result
,
decode_result
)
=
tokio
::
join!
(
let
(
prefill_result
,
decode_result
)
:
(
StreamResult
,
StreamResult
)
=
tokio
::
join!
(
prefill_client
.generate
(
prefill_request
),
prefill_client
.generate
(
prefill_request
),
decode_client
.generate
(
decode_request
)
decode_client
.generate
(
decode_request
)
);
);
...
@@ -100,7 +103,7 @@ impl RequestExecutionStage {
...
@@ -100,7 +103,7 @@ impl RequestExecutionStage {
let
prefill_stream
=
match
prefill_result
{
let
prefill_stream
=
match
prefill_result
{
Ok
(
s
)
=>
s
,
Ok
(
s
)
=>
s
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Prefill worker failed to start: {}"
,
"Prefill worker failed to start: {}"
,
e
e
)));
)));
...
@@ -111,7 +114,7 @@ impl RequestExecutionStage {
...
@@ -111,7 +114,7 @@ impl RequestExecutionStage {
let
decode_stream
=
match
decode_result
{
let
decode_stream
=
match
decode_result
{
Ok
(
s
)
=>
s
,
Ok
(
s
)
=>
s
,
Err
(
e
)
=>
{
Err
(
e
)
=>
{
return
Err
(
utils
::
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"Decode worker failed to start: {}"
,
"Decode worker failed to start: {}"
,
e
e
)));
)));
...
...
sgl-router/src/routers/grpc/stages/response_processing.rs
View file @
0b41a293
...
@@ -11,7 +11,7 @@ use axum::response::Response;
...
@@ -11,7 +11,7 @@ use axum::response::Response;
use
super
::
PipelineStage
;
use
super
::
PipelineStage
;
use
crate
::
routers
::
grpc
::{
use
crate
::
routers
::
grpc
::{
context
::{
FinalResponse
,
RequestContext
,
RequestType
},
context
::{
FinalResponse
,
RequestContext
,
RequestType
},
processing
,
streaming
,
utils
,
error
,
processing
,
streaming
,
};
};
/// Response processing stage: Handles both streaming and non-streaming responses
/// Response processing stage: Handles both streaming and non-streaming responses
...
@@ -42,7 +42,7 @@ impl PipelineStage for ResponseProcessingStage {
...
@@ -42,7 +42,7 @@ impl PipelineStage for ResponseProcessingStage {
match
&
ctx
.input.request_type
{
match
&
ctx
.input.request_type
{
RequestType
::
Chat
(
_
)
=>
self
.process_chat_response
(
ctx
)
.await
,
RequestType
::
Chat
(
_
)
=>
self
.process_chat_response
(
ctx
)
.await
,
RequestType
::
Generate
(
_
)
=>
self
.process_generate_response
(
ctx
)
.await
,
RequestType
::
Generate
(
_
)
=>
self
.process_generate_response
(
ctx
)
.await
,
RequestType
::
Responses
(
_
)
=>
Err
(
utils
::
bad_request
_error
(
RequestType
::
Responses
(
_
)
=>
Err
(
error
::
bad_request
(
"Responses API processing must be handled by responses handler"
.to_string
(),
"Responses API processing must be handled by responses handler"
.to_string
(),
)),
)),
}
}
...
@@ -66,14 +66,14 @@ impl ResponseProcessingStage {
...
@@ -66,14 +66,14 @@ impl ResponseProcessingStage {
.response
.response
.execution_result
.execution_result
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No execution result"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"No execution result"
))
?
;
// Get dispatch metadata (needed by both streaming and non-streaming)
// Get dispatch metadata (needed by both streaming and non-streaming)
let
dispatch
=
ctx
let
dispatch
=
ctx
.state
.state
.dispatch
.dispatch
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Dispatch metadata not set"
))
?
.ok_or_else
(||
error
::
internal_error
(
"Dispatch metadata not set"
))
?
.clone
();
.clone
();
if
is_streaming
{
if
is_streaming
{
...
@@ -100,7 +100,7 @@ impl ResponseProcessingStage {
...
@@ -100,7 +100,7 @@ impl ResponseProcessingStage {
.response
.response
.stop_decoder
.stop_decoder
.as_mut
()
.as_mut
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Stop decoder not initialized"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Stop decoder not initialized"
))
?
;
let
response
=
self
let
response
=
self
.processor
.processor
...
@@ -132,14 +132,14 @@ impl ResponseProcessingStage {
...
@@ -132,14 +132,14 @@ impl ResponseProcessingStage {
.response
.response
.execution_result
.execution_result
.take
()
.take
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"No execution result"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"No execution result"
))
?
;
// Get dispatch metadata (needed by both streaming and non-streaming)
// Get dispatch metadata (needed by both streaming and non-streaming)
let
dispatch
=
ctx
let
dispatch
=
ctx
.state
.state
.dispatch
.dispatch
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Dispatch metadata not set"
))
?
.ok_or_else
(||
error
::
internal_error
(
"Dispatch metadata not set"
))
?
.clone
();
.clone
();
if
is_streaming
{
if
is_streaming
{
...
@@ -162,7 +162,7 @@ impl ResponseProcessingStage {
...
@@ -162,7 +162,7 @@ impl ResponseProcessingStage {
.response
.response
.stop_decoder
.stop_decoder
.as_mut
()
.as_mut
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Stop decoder not initialized"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Stop decoder not initialized"
))
?
;
let
result_array
=
self
let
result_array
=
self
.processor
.processor
...
...
sgl-router/src/routers/grpc/stages/worker_selection.rs
View file @
0b41a293
...
@@ -12,7 +12,7 @@ use crate::{
...
@@ -12,7 +12,7 @@ use crate::{
policies
::
PolicyRegistry
,
policies
::
PolicyRegistry
,
routers
::
grpc
::{
routers
::
grpc
::{
context
::{
RequestContext
,
WorkerSelection
},
context
::{
RequestContext
,
WorkerSelection
},
utils
,
error
,
},
},
};
};
...
@@ -51,7 +51,7 @@ impl PipelineStage for WorkerSelectionStage {
...
@@ -51,7 +51,7 @@ impl PipelineStage for WorkerSelectionStage {
.state
.state
.preparation
.preparation
.as_ref
()
.as_ref
()
.ok_or_else
(||
utils
::
internal_error
_static
(
"Preparation stage not completed"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Preparation stage not completed"
))
?
;
// For Harmony, use selection_text produced during Harmony encoding
// For Harmony, use selection_text produced during Harmony encoding
// Otherwise, use original_text from regular preparation
// Otherwise, use original_text from regular preparation
...
@@ -66,7 +66,7 @@ impl PipelineStage for WorkerSelectionStage {
...
@@ -66,7 +66,7 @@ impl PipelineStage for WorkerSelectionStage {
match
self
.select_single_worker
(
ctx
.input.model_id
.as_deref
(),
text
)
{
match
self
.select_single_worker
(
ctx
.input.model_id
.as_deref
(),
text
)
{
Some
(
w
)
=>
WorkerSelection
::
Single
{
worker
:
w
},
Some
(
w
)
=>
WorkerSelection
::
Single
{
worker
:
w
},
None
=>
{
None
=>
{
return
Err
(
utils
::
service_unavailable
_error
(
format!
(
return
Err
(
error
::
service_unavailable
(
format!
(
"No available workers for model: {:?}"
,
"No available workers for model: {:?}"
,
ctx
.input.model_id
ctx
.input.model_id
)));
)));
...
@@ -77,7 +77,7 @@ impl PipelineStage for WorkerSelectionStage {
...
@@ -77,7 +77,7 @@ impl PipelineStage for WorkerSelectionStage {
match
self
.select_pd_pair
(
ctx
.input.model_id
.as_deref
(),
text
)
{
match
self
.select_pd_pair
(
ctx
.input.model_id
.as_deref
(),
text
)
{
Some
((
prefill
,
decode
))
=>
WorkerSelection
::
Dual
{
prefill
,
decode
},
Some
((
prefill
,
decode
))
=>
WorkerSelection
::
Dual
{
prefill
,
decode
},
None
=>
{
None
=>
{
return
Err
(
utils
::
service_unavailable
_error
(
format!
(
return
Err
(
error
::
service_unavailable
(
format!
(
"No available PD worker pairs for model: {:?}"
,
"No available PD worker pairs for model: {:?}"
,
ctx
.input.model_id
ctx
.input.model_id
)));
)));
...
...
sgl-router/src/routers/grpc/utils.rs
View file @
0b41a293
...
@@ -2,17 +2,13 @@
...
@@ -2,17 +2,13 @@
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
axum
::{
use
axum
::
response
::
Response
;
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
Json
,
};
use
futures
::
StreamExt
;
use
futures
::
StreamExt
;
use
serde_json
::{
json
,
Map
,
Value
};
use
serde_json
::{
json
,
Map
,
Value
};
use
tracing
::{
error
,
warn
};
use
tracing
::{
error
,
warn
};
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
super
::
ProcessedMessages
;
use
super
::
{
error
,
ProcessedMessages
}
;
pub
use
crate
::
tokenizer
::
StopSequenceDecoder
;
pub
use
crate
::
tokenizer
::
StopSequenceDecoder
;
use
crate
::{
use
crate
::{
core
::
Worker
,
core
::
Worker
,
...
@@ -40,8 +36,8 @@ pub async fn get_grpc_client_from_worker(
...
@@ -40,8 +36,8 @@ pub async fn get_grpc_client_from_worker(
let
client_arc
=
worker
let
client_arc
=
worker
.get_grpc_client
()
.get_grpc_client
()
.await
.await
.map_err
(|
e
|
internal_error
_message
(
format!
(
"Failed to get gRPC client: {}"
,
e
)))
?
.map_err
(|
e
|
error
::
internal_error
(
format!
(
"Failed to get gRPC client: {}"
,
e
)))
?
.ok_or_else
(||
internal_error
_static
(
"Selected worker is not configured for gRPC"
))
?
;
.ok_or_else
(||
error
::
internal_error
(
"Selected worker is not configured for gRPC"
))
?
;
Ok
((
*
client_arc
)
.clone
())
Ok
((
*
client_arc
)
.clone
())
}
}
...
@@ -433,67 +429,6 @@ pub fn process_chat_messages(
...
@@ -433,67 +429,6 @@ pub fn process_chat_messages(
})
})
}
}
/// Error response helpers (shared between regular and PD routers)
pub
fn
internal_error_static
(
msg
:
&
'static
str
)
->
Response
{
error!
(
"{}"
,
msg
);
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
Json
(
json!
({
"error"
:
{
"message"
:
msg
,
"type"
:
"internal_error"
,
"code"
:
500
}
})),
)
.into_response
()
}
pub
fn
internal_error_message
(
message
:
String
)
->
Response
{
error!
(
"{}"
,
message
);
(
StatusCode
::
INTERNAL_SERVER_ERROR
,
Json
(
json!
({
"error"
:
{
"message"
:
message
,
"type"
:
"internal_error"
,
"code"
:
500
}
})),
)
.into_response
()
}
pub
fn
bad_request_error
(
message
:
String
)
->
Response
{
error!
(
"{}"
,
message
);
(
StatusCode
::
BAD_REQUEST
,
Json
(
json!
({
"error"
:
{
"message"
:
message
,
"type"
:
"invalid_request_error"
,
"code"
:
400
}
})),
)
.into_response
()
}
pub
fn
service_unavailable_error
(
message
:
String
)
->
Response
{
warn!
(
"{}"
,
message
);
(
StatusCode
::
SERVICE_UNAVAILABLE
,
Json
(
json!
({
"error"
:
{
"message"
:
message
,
"type"
:
"service_unavailable"
,
"code"
:
503
}
})),
)
.into_response
()
}
/// Create a StopSequenceDecoder from stop parameters
/// Create a StopSequenceDecoder from stop parameters
pub
fn
create_stop_decoder
(
pub
fn
create_stop_decoder
(
tokenizer
:
&
Arc
<
dyn
Tokenizer
>
,
tokenizer
:
&
Arc
<
dyn
Tokenizer
>
,
...
@@ -646,7 +581,7 @@ pub async fn collect_stream_responses(
...
@@ -646,7 +581,7 @@ pub async fn collect_stream_responses(
Some
(
Error
(
err
))
=>
{
Some
(
Error
(
err
))
=>
{
error!
(
"{} error: {}"
,
worker_name
,
err
.message
);
error!
(
"{} error: {}"
,
worker_name
,
err
.message
);
// Don't mark as completed - let Drop send abort for error cases
// Don't mark as completed - let Drop send abort for error cases
return
Err
(
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"{} generation failed: {}"
,
"{} generation failed: {}"
,
worker_name
,
err
.message
worker_name
,
err
.message
)));
)));
...
@@ -662,7 +597,7 @@ pub async fn collect_stream_responses(
...
@@ -662,7 +597,7 @@ pub async fn collect_stream_responses(
Err
(
e
)
=>
{
Err
(
e
)
=>
{
error!
(
"{} stream error: {:?}"
,
worker_name
,
e
);
error!
(
"{} stream error: {:?}"
,
worker_name
,
e
);
// Don't mark as completed - let Drop send abort for error cases
// Don't mark as completed - let Drop send abort for error cases
return
Err
(
internal_error
_message
(
format!
(
return
Err
(
error
::
internal_error
(
format!
(
"{} stream failed: {}"
,
"{} stream failed: {}"
,
worker_name
,
e
worker_name
,
e
)));
)));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment