Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
dc01313d
Unverified
Commit
dc01313d
authored
Oct 16, 2025
by
Chang Su
Committed by
GitHub
Oct 16, 2025
Browse files
[router] Add rustfmt and set group imports by default (#11732)
parent
7a7f99be
Changes
126
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
284 additions
and
227 deletions
+284
-227
sgl-router/src/routers/grpc/streaming.rs
sgl-router/src/routers/grpc/streaming.rs
+28
-26
sgl-router/src/routers/grpc/utils.rs
sgl-router/src/routers/grpc/utils.rs
+31
-20
sgl-router/src/routers/header_utils.rs
sgl-router/src/routers/header_utils.rs
+1
-3
sgl-router/src/routers/http/pd_router.rs
sgl-router/src/routers/http/pd_router.rs
+22
-18
sgl-router/src/routers/http/router.rs
sgl-router/src/routers/http/router.rs
+24
-20
sgl-router/src/routers/mod.rs
sgl-router/src/routers/mod.rs
+11
-9
sgl-router/src/routers/openai/conversations.rs
sgl-router/src/routers/openai/conversations.rs
+15
-11
sgl-router/src/routers/openai/mcp.rs
sgl-router/src/routers/openai/mcp.rs
+7
-6
sgl-router/src/routers/openai/responses.rs
sgl-router/src/routers/openai/responses.rs
+6
-3
sgl-router/src/routers/openai/router.rs
sgl-router/src/routers/openai/router.rs
+32
-24
sgl-router/src/routers/openai/streaming.rs
sgl-router/src/routers/openai/streaming.rs
+17
-12
sgl-router/src/routers/router_manager.rs
sgl-router/src/routers/router_manager.rs
+17
-11
sgl-router/src/server.rs
sgl-router/src/server.rs
+21
-18
sgl-router/src/service_discovery.rs
sgl-router/src/service_discovery.rs
+18
-16
sgl-router/src/tokenizer/chat_template.rs
sgl-router/src/tokenizer/chat_template.rs
+11
-5
sgl-router/src/tokenizer/factory.rs
sgl-router/src/tokenizer/factory.rs
+4
-9
sgl-router/src/tokenizer/hub.rs
sgl-router/src/tokenizer/hub.rs
+5
-2
sgl-router/src/tokenizer/huggingface.rs
sgl-router/src/tokenizer/huggingface.rs
+6
-6
sgl-router/src/tokenizer/mock.rs
sgl-router/src/tokenizer/mock.rs
+4
-2
sgl-router/src/tokenizer/mod.rs
sgl-router/src/tokenizer/mod.rs
+4
-6
No files found.
sgl-router/src/routers/grpc/streaming.rs
View file @
dc01313d
...
...
@@ -3,38 +3,40 @@
//! This module contains shared streaming logic for both Regular and PD routers,
//! eliminating ~600 lines of duplication.
use
axum
::
response
::
Response
;
use
axum
::{
body
::
Body
,
http
::
StatusCode
};
use
std
::{
collections
::
HashMap
,
io
,
sync
::
Arc
,
time
::
Instant
};
use
axum
::{
body
::
Body
,
http
::
StatusCode
,
response
::
Response
};
use
bytes
::
Bytes
;
use
http
::
header
::{
HeaderValue
,
CONTENT_TYPE
};
use
proto
::{
generate_complete
::
MatchedStop
::{
MatchedStopStr
,
MatchedTokenId
},
generate_response
::
Response
::{
Chunk
,
Complete
,
Error
},
};
use
serde_json
::{
json
,
Value
};
use
std
::
collections
::
HashMap
;
use
std
::
io
;
use
std
::
sync
::
Arc
;
use
tokio
::
sync
::
mpsc
::
UnboundedSender
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tokio_stream
::
StreamExt
;
use
tokio
::
sync
::{
mpsc
,
mpsc
::
UnboundedSender
};
use
tokio_stream
::{
wrappers
::
UnboundedReceiverStream
,
StreamExt
};
use
tracing
::{
debug
,
error
,
warn
};
use
super
::
context
;
use
super
::
utils
;
use
crate
::
grpc_client
::
proto
;
use
crate
::
protocols
::
chat
::{
ChatCompletionRequest
,
ChatCompletionStreamResponse
,
ChatMessageDelta
,
ChatStreamChoice
,
};
use
crate
::
protocols
::
common
::{
ChatLogProbs
,
FunctionCallDelta
,
StringOrArray
,
Tool
,
ToolCallDelta
,
ToolChoice
,
ToolChoiceValue
,
Usage
,
use
super
::{
context
,
utils
};
use
crate
::{
grpc_client
::
proto
,
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatCompletionStreamResponse
,
ChatMessageDelta
,
ChatStreamChoice
,
},
common
::{
ChatLogProbs
,
FunctionCallDelta
,
StringOrArray
,
Tool
,
ToolCallDelta
,
ToolChoice
,
ToolChoiceValue
,
Usage
,
},
generate
::
GenerateRequest
,
},
reasoning_parser
::
ReasoningParser
,
tokenizer
::{
stop
::{
SequenceDecoderOutput
,
StopSequenceDecoder
},
traits
::
Tokenizer
,
},
tool_parser
::
ToolParser
,
};
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
reasoning_parser
::
ReasoningParser
;
use
crate
::
tokenizer
::
stop
::{
SequenceDecoderOutput
,
StopSequenceDecoder
};
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tool_parser
::
ToolParser
;
use
proto
::
generate_complete
::
MatchedStop
::{
MatchedStopStr
,
MatchedTokenId
};
use
proto
::
generate_response
::
Response
::{
Chunk
,
Complete
,
Error
};
use
std
::
time
::
Instant
;
use
tokio
::
sync
::
mpsc
;
/// Shared streaming processor for both single and dual dispatch modes
#[derive(Clone)]
...
...
sgl-router/src/routers/grpc/utils.rs
View file @
dc01313d
//! Shared utilities for gRPC routers
use
super
::
ProcessedMessages
;
use
crate
::
core
::
Worker
;
use
crate
::
grpc_client
::
sglang_scheduler
::
AbortOnDropStream
;
use
crate
::
grpc_client
::{
proto
,
SglangSchedulerClient
};
use
crate
::
protocols
::
chat
::{
ChatCompletionRequest
,
ChatMessage
};
use
crate
::
protocols
::
common
::{
ChatLogProbs
,
ChatLogProbsContent
,
FunctionCallResponse
,
StringOrArray
,
Tool
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
,
TopLogProb
,
};
use
crate
::
protocols
::
generate
::
GenerateFinishReason
;
use
crate
::
tokenizer
::
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
};
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tokenizer
::
HuggingFaceTokenizer
;
pub
use
crate
::
tokenizer
::
StopSequenceDecoder
;
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
axum
::{
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
...
...
@@ -21,11 +9,29 @@ use axum::{
};
use
futures
::
StreamExt
;
use
serde_json
::{
json
,
Map
,
Value
};
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
tracing
::{
error
,
warn
};
use
uuid
::
Uuid
;
use
super
::
ProcessedMessages
;
pub
use
crate
::
tokenizer
::
StopSequenceDecoder
;
use
crate
::{
core
::
Worker
,
grpc_client
::{
proto
,
sglang_scheduler
::
AbortOnDropStream
,
SglangSchedulerClient
},
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatMessage
},
common
::{
ChatLogProbs
,
ChatLogProbsContent
,
FunctionCallResponse
,
StringOrArray
,
Tool
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
,
TopLogProb
,
},
generate
::
GenerateFinishReason
,
},
tokenizer
::{
chat_template
::{
ChatTemplateContentFormat
,
ChatTemplateParams
},
traits
::
Tokenizer
,
HuggingFaceTokenizer
,
},
};
/// Get gRPC client from worker, returning appropriate error response on failure
pub
async
fn
get_grpc_client_from_worker
(
worker
:
&
Arc
<
dyn
Worker
>
,
...
...
@@ -953,12 +959,17 @@ pub fn parse_finish_reason(reason_str: &str, completion_tokens: i32) -> Generate
#[cfg(test)]
mod
tests
{
use
super
::
*
;
use
crate
::
protocols
::
chat
::{
ChatMessage
,
UserMessageContent
};
use
crate
::
protocols
::
common
::{
ContentPart
,
ImageUrl
};
use
crate
::
tokenizer
::
chat_template
::
ChatTemplateContentFormat
;
use
serde_json
::
json
;
use
super
::
*
;
use
crate
::{
protocols
::{
chat
::{
ChatMessage
,
UserMessageContent
},
common
::{
ContentPart
,
ImageUrl
},
},
tokenizer
::
chat_template
::
ChatTemplateContentFormat
,
};
#[test]
fn
test_transform_messages_string_format
()
{
let
messages
=
vec!
[
ChatMessage
::
User
{
...
...
sgl-router/src/routers/header_utils.rs
View file @
dc01313d
use
axum
::
body
::
Body
;
use
axum
::
extract
::
Request
;
use
axum
::
http
::
HeaderMap
;
use
axum
::{
body
::
Body
,
extract
::
Request
,
http
::
HeaderMap
};
/// Copy request headers to a Vec of name-value string pairs
/// Used for forwarding headers to backend workers
...
...
sgl-router/src/routers/http/pd_router.rs
View file @
dc01313d
use
super
::
pd_types
::
api_path
;
use
crate
::
config
::
types
::
RetryConfig
;
use
crate
::
core
::{
is_retryable_status
,
RetryExecutor
,
Worker
,
WorkerLoadGuard
,
WorkerRegistry
,
WorkerType
,
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
policies
::{
LoadBalancingPolicy
,
PolicyRegistry
};
use
crate
::
protocols
::
chat
::{
ChatCompletionRequest
,
ChatMessage
,
UserMessageContent
};
use
crate
::
protocols
::
common
::{
InputIds
,
StringOrArray
};
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::
RerankRequest
;
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
use
crate
::
routers
::
header_utils
;
use
crate
::
routers
::
RouterTrait
;
use
std
::{
sync
::
Arc
,
time
::
Instant
};
use
async_trait
::
async_trait
;
use
axum
::{
body
::
Body
,
...
...
@@ -25,11 +11,29 @@ use futures_util::StreamExt;
use
reqwest
::
Client
;
use
serde
::
Serialize
;
use
serde_json
::{
json
,
Value
};
use
std
::
sync
::
Arc
;
use
std
::
time
::
Instant
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tracing
::{
debug
,
error
,
warn
};
use
super
::
pd_types
::
api_path
;
use
crate
::{
config
::
types
::
RetryConfig
,
core
::{
is_retryable_status
,
RetryExecutor
,
Worker
,
WorkerLoadGuard
,
WorkerRegistry
,
WorkerType
,
},
metrics
::
RouterMetrics
,
policies
::{
LoadBalancingPolicy
,
PolicyRegistry
},
protocols
::{
chat
::{
ChatCompletionRequest
,
ChatMessage
,
UserMessageContent
},
common
::{
InputIds
,
StringOrArray
},
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::
RerankRequest
,
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
},
routers
::{
header_utils
,
RouterTrait
},
};
#[derive(Debug)]
pub
struct
PDRouter
{
pub
worker_registry
:
Arc
<
WorkerRegistry
>
,
...
...
sgl-router/src/routers/http/router.rs
View file @
dc01313d
use
crate
::
config
::
types
::
RetryConfig
;
use
crate
::
core
::{
is_retryable_status
,
ConnectionMode
,
RetryExecutor
,
Worker
,
WorkerRegistry
,
WorkerType
,
};
use
crate
::
metrics
::
RouterMetrics
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
common
::
GenerationRequest
;
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::{
RerankRequest
,
RerankResponse
,
RerankResult
};
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
use
crate
::
routers
::
header_utils
;
use
crate
::
routers
::
RouterTrait
;
use
axum
::
body
::
to_bytes
;
use
std
::{
sync
::
Arc
,
time
::
Instant
};
use
axum
::{
body
::
Body
,
body
::
{
to_bytes
,
Body
}
,
extract
::
Request
,
http
::{
header
::
CONTENT_LENGTH
,
header
::
CONTENT_TYPE
,
HeaderMap
,
HeaderValue
,
Method
,
StatusCode
,
header
::{
CONTENT_LENGTH
,
CONTENT_TYPE
},
HeaderMap
,
HeaderValue
,
Method
,
StatusCode
,
},
response
::{
IntoResponse
,
Response
},
Json
,
};
use
futures_util
::
StreamExt
;
use
reqwest
::
Client
;
use
std
::
sync
::
Arc
;
use
std
::
time
::
Instant
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tracing
::{
debug
,
error
};
use
crate
::{
config
::
types
::
RetryConfig
,
core
::{
is_retryable_status
,
ConnectionMode
,
RetryExecutor
,
Worker
,
WorkerRegistry
,
WorkerType
,
},
metrics
::
RouterMetrics
,
policies
::
PolicyRegistry
,
protocols
::{
chat
::
ChatCompletionRequest
,
common
::
GenerationRequest
,
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::{
RerankRequest
,
RerankResponse
,
RerankResult
},
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
},
routers
::{
header_utils
,
RouterTrait
},
};
/// Regular router that uses injected load balancing policies
#[derive(Debug)]
pub
struct
Router
{
...
...
sgl-router/src/routers/mod.rs
View file @
dc01313d
//! Router implementations
use
std
::
fmt
::
Debug
;
use
async_trait
::
async_trait
;
use
axum
::{
body
::
Body
,
...
...
@@ -7,16 +9,17 @@ use axum::{
http
::{
HeaderMap
,
StatusCode
},
response
::{
IntoResponse
,
Response
},
};
use
std
::
fmt
::
Debug
;
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::
RerankRequest
;
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
use
serde_json
::
Value
;
use
crate
::
protocols
::{
chat
::
ChatCompletionRequest
,
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::
RerankRequest
,
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
};
pub
mod
factory
;
pub
mod
grpc
;
pub
mod
header_utils
;
...
...
@@ -25,7 +28,6 @@ pub mod openai; // New refactored OpenAI router module
pub
mod
router_manager
;
pub
use
factory
::
RouterFactory
;
// Re-export HTTP routers for convenience
pub
use
http
::{
pd_router
,
pd_types
,
router
};
...
...
sgl-router/src/routers/openai/conversations.rs
View file @
dc01313d
//! Conversation CRUD operations and persistence
use
crate
::
data_connector
::{
conversation_items
::
ListParams
,
conversation_items
::
SortOrder
,
Conversation
,
ConversationId
,
ConversationItemId
,
ConversationItemStorage
,
ConversationStorage
,
NewConversation
,
NewConversationItem
,
ResponseId
,
ResponseStorage
,
SharedConversationItemStorage
,
SharedConversationStorage
,
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
axum
::{
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
Json
,
};
use
crate
::
protocols
::
responses
::{
ResponseInput
,
ResponseInputOutputItem
,
ResponsesRequest
};
use
axum
::
http
::
StatusCode
;
use
axum
::
response
::{
IntoResponse
,
Response
};
use
axum
::
Json
;
use
chrono
::
Utc
;
use
serde_json
::{
json
,
Value
};
use
std
::
collections
::
HashMap
;
use
std
::
sync
::
Arc
;
use
tracing
::{
debug
,
info
,
warn
};
use
super
::
responses
::
build_stored_response
;
use
crate
::{
data_connector
::{
conversation_items
::{
ListParams
,
SortOrder
},
Conversation
,
ConversationId
,
ConversationItemId
,
ConversationItemStorage
,
ConversationStorage
,
NewConversation
,
NewConversationItem
,
ResponseId
,
ResponseStorage
,
SharedConversationItemStorage
,
SharedConversationStorage
,
},
protocols
::
responses
::{
ResponseInput
,
ResponseInputOutputItem
,
ResponsesRequest
},
};
/// Maximum number of properties allowed in conversation metadata
pub
(
crate
)
const
MAX_METADATA_PROPERTIES
:
usize
=
16
;
...
...
sgl-router/src/routers/openai/mcp.rs
View file @
dc01313d
...
...
@@ -8,19 +8,20 @@
//! - Payload transformation for MCP tool interception
//! - Metadata injection for MCP operations
use
crate
::
mcp
::
McpClientManager
;
use
crate
::
protocols
::
responses
::{
ResponseInput
,
ResponseTool
,
ResponseToolType
,
ResponsesRequest
,
};
use
crate
::
routers
::
header_utils
::
apply_request_headers
;
use
std
::{
io
,
sync
::
Arc
};
use
axum
::
http
::
HeaderMap
;
use
bytes
::
Bytes
;
use
serde_json
::{
json
,
to_value
,
Value
};
use
std
::{
io
,
sync
::
Arc
};
use
tokio
::
sync
::
mpsc
;
use
tracing
::{
info
,
warn
};
use
super
::
utils
::
event_types
;
use
crate
::{
mcp
::
McpClientManager
,
protocols
::
responses
::{
ResponseInput
,
ResponseTool
,
ResponseToolType
,
ResponsesRequest
},
routers
::
header_utils
::
apply_request_headers
,
};
// ============================================================================
// Configuration and State Types
...
...
sgl-router/src/routers/openai/responses.rs
View file @
dc01313d
//! Response storage, patching, and extraction utilities
use
crate
::
data_connector
::{
ResponseId
,
StoredResponse
};
use
crate
::
protocols
::
responses
::{
ResponseInput
,
ResponseToolType
,
ResponsesRequest
};
use
serde_json
::{
json
,
Value
};
use
std
::
collections
::
HashMap
;
use
serde_json
::{
json
,
Value
};
use
tracing
::
warn
;
use
super
::
utils
::
event_types
;
use
crate
::{
data_connector
::{
ResponseId
,
StoredResponse
},
protocols
::
responses
::{
ResponseInput
,
ResponseToolType
,
ResponsesRequest
},
};
// ============================================================================
// Response Storage Operations
...
...
sgl-router/src/routers/openai/router.rs
View file @
dc01313d
//! OpenAI router - main coordinator that delegates to specialized modules
use
crate
::
config
::
CircuitBreakerConfig
;
use
crate
::
core
::{
CircuitBreaker
,
CircuitBreakerConfig
as
CoreCircuitBreakerConfig
};
use
crate
::
data_connector
::{
conversation_items
::
ListParams
,
conversation_items
::
SortOrder
,
ConversationId
,
ResponseId
,
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
};
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::
RerankRequest
;
use
crate
::
protocols
::
responses
::{
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponsesGetParams
,
ResponsesRequest
,
use
std
::{
any
::
Any
,
sync
::{
atomic
::
AtomicBool
,
Arc
},
};
use
crate
::
routers
::
header_utils
::
apply_request_headers
;
use
axum
::{
body
::
Body
,
extract
::
Request
,
...
...
@@ -25,10 +14,6 @@ use axum::{
};
use
futures_util
::
StreamExt
;
use
serde_json
::{
json
,
to_value
,
Value
};
use
std
::{
any
::
Any
,
sync
::{
atomic
::
AtomicBool
,
Arc
},
};
use
tokio
::
sync
::
mpsc
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tracing
::
warn
;
...
...
@@ -39,12 +24,35 @@ use super::conversations::{
get_conversation
,
get_conversation_item
,
list_conversation_items
,
persist_conversation_items
,
update_conversation
,
};
use
super
::
mcp
::{
execute_tool_loop
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
McpLoopConfig
,
use
super
::{
mcp
::{
execute_tool_loop
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
McpLoopConfig
,
},
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
},
streaming
::
handle_streaming_response
,
};
use
crate
::{
config
::
CircuitBreakerConfig
,
core
::{
CircuitBreaker
,
CircuitBreakerConfig
as
CoreCircuitBreakerConfig
},
data_connector
::{
conversation_items
::{
ListParams
,
SortOrder
},
ConversationId
,
ResponseId
,
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
},
protocols
::{
chat
::
ChatCompletionRequest
,
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::
RerankRequest
,
responses
::{
ResponseContentPart
,
ResponseInput
,
ResponseInputOutputItem
,
ResponsesGetParams
,
ResponsesRequest
,
},
},
routers
::
header_utils
::
apply_request_headers
,
};
use
super
::
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
};
use
super
::
streaming
::
handle_streaming_response
;
// ============================================================================
// OpenAIRouter Struct
...
...
sgl-router/src/routers/openai/streaming.rs
View file @
dc01313d
...
...
@@ -7,11 +7,8 @@
//! - MCP tool execution loops within streaming responses
//! - Event transformation and output index remapping
use
crate
::
data_connector
::{
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
};
use
crate
::
protocols
::
responses
::{
ResponseToolType
,
ResponsesRequest
};
use
crate
::
routers
::
header_utils
::{
apply_request_headers
,
preserve_response_headers
};
use
std
::{
borrow
::
Cow
,
io
,
sync
::
Arc
};
use
axum
::{
body
::
Body
,
http
::{
header
::
CONTENT_TYPE
,
HeaderMap
,
HeaderValue
,
StatusCode
},
...
...
@@ -20,20 +17,28 @@ use axum::{
use
bytes
::
Bytes
;
use
futures_util
::
StreamExt
;
use
serde_json
::{
json
,
Value
};
use
std
::{
borrow
::
Cow
,
io
,
sync
::
Arc
};
use
tokio
::
sync
::
mpsc
;
use
tokio_stream
::
wrappers
::
UnboundedReceiverStream
;
use
tracing
::
warn
;
// Import from sibling modules
use
super
::
conversations
::
persist_conversation_items
;
use
super
::
mcp
::{
build_resume_payload
,
execute_streaming_tool_calls
,
inject_mcp_metadata_streaming
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
send_mcp_list_tools_events
,
McpLoopConfig
,
ToolLoopState
,
use
super
::{
mcp
::{
build_resume_payload
,
execute_streaming_tool_calls
,
inject_mcp_metadata_streaming
,
mcp_manager_from_request_tools
,
prepare_mcp_payload_for_streaming
,
send_mcp_list_tools_events
,
McpLoopConfig
,
ToolLoopState
,
},
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
,
rewrite_streaming_block
},
utils
::{
event_types
,
FunctionCallInProgress
,
OutputIndexMapper
,
StreamAction
},
};
use
crate
::{
data_connector
::{
SharedConversationItemStorage
,
SharedConversationStorage
,
SharedResponseStorage
,
},
protocols
::
responses
::{
ResponseToolType
,
ResponsesRequest
},
routers
::
header_utils
::{
apply_request_headers
,
preserve_response_headers
},
};
use
super
::
responses
::{
mask_tools_as_mcp
,
patch_streaming_response_json
,
rewrite_streaming_block
};
use
super
::
utils
::{
event_types
,
FunctionCallInProgress
,
OutputIndexMapper
,
StreamAction
};
// ============================================================================
// Streaming Response Accumulator
...
...
sgl-router/src/routers/router_manager.rs
View file @
dc01313d
...
...
@@ -4,16 +4,8 @@
//! - Single Router Mode (enable_igw=false): Router owns workers directly
//! - Multi-Router Mode (enable_igw=true): RouterManager coordinates everything
use
crate
::
config
::{
ConnectionMode
,
RoutingMode
};
use
crate
::
core
::{
WorkerRegistry
,
WorkerType
};
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::
RerankRequest
;
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
use
crate
::
routers
::
RouterTrait
;
use
crate
::
server
::{
AppContext
,
ServerConfig
};
use
std
::
sync
::
Arc
;
use
async_trait
::
async_trait
;
use
axum
::{
body
::
Body
,
...
...
@@ -23,9 +15,23 @@ use axum::{
};
use
dashmap
::
DashMap
;
use
serde_json
::
Value
;
use
std
::
sync
::
Arc
;
use
tracing
::{
debug
,
info
,
warn
};
use
crate
::{
config
::{
ConnectionMode
,
RoutingMode
},
core
::{
WorkerRegistry
,
WorkerType
},
protocols
::{
chat
::
ChatCompletionRequest
,
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::
RerankRequest
,
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
},
routers
::
RouterTrait
,
server
::{
AppContext
,
ServerConfig
},
};
#[derive(Debug,
Clone,
Hash,
Eq,
PartialEq)]
pub
struct
RouterId
(
String
);
...
...
sgl-router/src/server.rs
View file @
dc01313d
use
std
::{
sync
::{
atomic
::{
AtomicBool
,
Ordering
},
Arc
,
OnceLock
,
},
time
::
Duration
,
};
use
axum
::{
extract
::{
Path
,
Query
,
Request
,
State
},
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
routing
::{
delete
,
get
,
post
},
serve
,
Json
,
Router
,
};
use
reqwest
::
Client
;
use
serde
::
Deserialize
;
use
serde_json
::{
json
,
Value
};
use
tokio
::{
net
::
TcpListener
,
signal
,
spawn
};
use
tracing
::{
error
,
info
,
warn
,
Level
};
use
crate
::{
config
::{
ConnectionMode
,
HistoryBackend
,
RouterConfig
,
RoutingMode
},
core
::{
...
...
@@ -30,24 +51,6 @@ use crate::{
tokenizer
::{
factory
as
tokenizer_factory
,
traits
::
Tokenizer
},
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
use
axum
::{
extract
::{
Path
,
Query
,
Request
,
State
},
http
::
StatusCode
,
response
::{
IntoResponse
,
Response
},
routing
::{
delete
,
get
,
post
},
serve
,
Json
,
Router
,
};
use
reqwest
::
Client
;
use
serde
::
Deserialize
;
use
serde_json
::{
json
,
Value
};
use
std
::
sync
::
OnceLock
;
use
std
::{
sync
::
atomic
::{
AtomicBool
,
Ordering
},
sync
::
Arc
,
time
::
Duration
,
};
use
tokio
::{
net
::
TcpListener
,
signal
,
spawn
};
use
tracing
::{
error
,
info
,
warn
,
Level
};
//
...
...
sgl-router/src/service_discovery.rs
View file @
dc01313d
use
crate
::
core
::
WorkerManager
;
use
crate
::
protocols
::
worker_spec
::
WorkerConfigRequest
;
use
crate
::
server
::
AppContext
;
use
std
::{
collections
::{
HashMap
,
HashSet
},
sync
::{
Arc
,
Mutex
},
time
::
Duration
,
};
use
futures
::{
StreamExt
,
TryStreamExt
};
use
k8s_openapi
::
api
::
core
::
v1
::
Pod
;
use
kube
::{
api
::
Api
,
runtime
::
watcher
::{
watcher
,
Config
},
runtime
::
WatchStreamExt
,
runtime
::{
watcher
::{
watcher
,
Config
},
WatchStreamExt
,
},
Client
,
};
use
std
::
collections
::{
HashMap
,
HashSet
};
use
rustls
;
use
std
::
sync
::{
Arc
,
Mutex
};
use
std
::
time
::
Duration
;
use
tokio
::
task
;
use
tokio
::
time
;
use
tokio
::{
task
,
time
};
use
tracing
::{
debug
,
error
,
info
,
warn
};
use
crate
::{
core
::
WorkerManager
,
protocols
::
worker_spec
::
WorkerConfigRequest
,
server
::
AppContext
};
#[derive(Debug,
Clone)]
pub
struct
ServiceDiscoveryConfig
{
pub
enabled
:
bool
,
...
...
@@ -452,10 +453,12 @@ async fn handle_pod_deletion(
#[cfg(test)]
mod
tests
{
use
k8s_openapi
::{
api
::
core
::
v1
::{
Pod
,
PodCondition
,
PodSpec
,
PodStatus
},
apimachinery
::
pkg
::
apis
::
meta
::
v1
::{
ObjectMeta
,
Time
},
};
use
super
::
*
;
use
k8s_openapi
::
api
::
core
::
v1
::{
Pod
,
PodCondition
,
PodSpec
,
PodStatus
};
use
k8s_openapi
::
apimachinery
::
pkg
::
apis
::
meta
::
v1
::
ObjectMeta
;
use
k8s_openapi
::
apimachinery
::
pkg
::
apis
::
meta
::
v1
::
Time
;
fn
create_k8s_pod
(
name
:
Option
<&
str
>
,
...
...
@@ -535,8 +538,7 @@ mod tests {
}
async
fn
create_test_app_context
()
->
Arc
<
AppContext
>
{
use
crate
::
config
::
RouterConfig
;
use
crate
::
middleware
::
TokenBucket
;
use
crate
::{
config
::
RouterConfig
,
middleware
::
TokenBucket
};
let
router_config
=
RouterConfig
{
worker_startup_timeout_secs
:
1
,
...
...
sgl-router/src/tokenizer/chat_template.rs
View file @
dc01313d
...
...
@@ -3,11 +3,15 @@
//! This module provides functionality to apply chat templates to messages,
//! similar to HuggingFace transformers' apply_chat_template method.
use
std
::
collections
::
HashMap
;
use
anyhow
::{
anyhow
,
Result
};
use
minijinja
::
machinery
::
ast
::{
Expr
,
Stmt
};
use
minijinja
::{
context
,
Environment
,
Value
};
use
minijinja
::{
context
,
machinery
::
ast
::{
Expr
,
Stmt
},
Environment
,
Value
,
};
use
serde_json
;
use
std
::
collections
::
HashMap
;
/// Chat template content format
#[derive(Debug,
Clone,
Copy,
PartialEq,
Eq)]
...
...
@@ -319,8 +323,10 @@ impl<'a> Detector<'a> {
/// AST-based detection using minijinja's unstable machinery
/// Single-pass detector with scope tracking
fn
detect_format_with_ast
(
template
:
&
str
)
->
Option
<
ChatTemplateContentFormat
>
{
use
minijinja
::
machinery
::{
parse
,
WhitespaceConfig
};
use
minijinja
::
syntax
::
SyntaxConfig
;
use
minijinja
::{
machinery
::{
parse
,
WhitespaceConfig
},
syntax
::
SyntaxConfig
,
};
let
ast
=
match
parse
(
template
,
...
...
sgl-router/src/tokenizer/factory.rs
View file @
dc01313d
use
super
::
traits
;
use
std
::{
fs
::
File
,
io
::
Read
,
path
::
Path
,
sync
::
Arc
};
use
anyhow
::{
Error
,
Result
};
use
std
::
fs
::
File
;
use
std
::
io
::
Read
;
use
std
::
path
::
Path
;
use
std
::
sync
::
Arc
;
use
tracing
::{
debug
,
info
};
use
super
::
huggingface
::
HuggingFaceTokenizer
;
use
super
::
tiktoken
::
TiktokenTokenizer
;
use
super
::{
huggingface
::
HuggingFaceTokenizer
,
tiktoken
::
TiktokenTokenizer
,
traits
};
use
crate
::
tokenizer
::
hub
::
download_tokenizer_from_hf
;
/// Represents the type of tokenizer being used
...
...
@@ -379,8 +375,7 @@ pub fn get_tokenizer_info(file_path: &str) -> Result<TokenizerType> {
Some
(
"json"
)
=>
Ok
(
TokenizerType
::
HuggingFace
(
file_path
.to_string
())),
_
=>
{
// Try auto-detection
use
std
::
fs
::
File
;
use
std
::
io
::
Read
;
use
std
::{
fs
::
File
,
io
::
Read
};
let
mut
file
=
File
::
open
(
file_path
)
?
;
let
mut
buffer
=
vec!
[
0u8
;
512
];
...
...
sgl-router/src/tokenizer/hub.rs
View file @
dc01313d
use
std
::{
env
,
path
::{
Path
,
PathBuf
},
};
use
hf_hub
::
api
::
tokio
::
ApiBuilder
;
use
std
::
env
;
use
std
::
path
::{
Path
,
PathBuf
};
const
IGNORED
:
[
&
str
;
5
]
=
[
".gitattributes"
,
...
...
sgl-router/src/tokenizer/huggingface.rs
View file @
dc01313d
...
...
@@ -3,12 +3,12 @@ use std::collections::HashMap;
use
anyhow
::{
Error
,
Result
};
use
tokenizers
::
tokenizer
::
Tokenizer
as
HfTokenizer
;
use
super
::
chat_template
::
{
detect_
chat_template
_content_format
,
ChatTemplateContentFormat
,
ChatTemplateParams
,
ChatTemplateProcessor
,
};
use
super
::
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
TokenIdType
,
Tokenizer
as
TokenizerTrait
,
use
super
::{
chat_template
::{
detect_chat_template_content_format
,
ChatTemplateContentFormat
,
ChatTemplateParams
,
ChatTemplateProcessor
,
},
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
TokenIdType
,
Tokenizer
as
TokenizerTrait
}
,
};
/// HuggingFace tokenizer wrapper
...
...
sgl-router/src/tokenizer/mock.rs
View file @
dc01313d
//! Mock tokenizer implementation for testing
use
super
::
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
Tokenizer
as
TokenizerTrait
};
use
anyhow
::
Result
;
use
std
::
collections
::
HashMap
;
use
anyhow
::
Result
;
use
super
::
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
Tokenizer
as
TokenizerTrait
};
/// Mock tokenizer for testing purposes
pub
struct
MockTokenizer
{
vocab
:
HashMap
<
String
,
u32
>
,
...
...
sgl-router/src/tokenizer/mod.rs
View file @
dc01313d
use
std
::{
ops
::
Deref
,
sync
::
Arc
};
use
anyhow
::
Result
;
use
std
::
ops
::
Deref
;
use
std
::
sync
::
Arc
;
pub
mod
factory
;
pub
mod
hub
;
...
...
@@ -27,14 +27,12 @@ pub use factory::{
create_tokenizer_from_file
,
create_tokenizer_with_chat_template
,
create_tokenizer_with_chat_template_blocking
,
TokenizerType
,
};
pub
use
huggingface
::
HuggingFaceTokenizer
;
pub
use
sequence
::
Sequence
;
pub
use
stop
::{
SequenceDecoderOutput
,
StopSequenceConfig
,
StopSequenceDecoder
};
pub
use
stream
::
DecodeStream
;
pub
use
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
Tokenizer
as
TokenizerTrait
};
pub
use
huggingface
::
HuggingFaceTokenizer
;
pub
use
tiktoken
::{
TiktokenModel
,
TiktokenTokenizer
};
pub
use
traits
::{
Decoder
,
Encoder
,
Encoding
,
SpecialTokens
,
Tokenizer
as
TokenizerTrait
};
/// Main tokenizer wrapper that provides a unified interface for different tokenizer implementations
#[derive(Clone)]
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment