Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
dc01313d
Unverified
Commit
dc01313d
authored
Oct 16, 2025
by
Chang Su
Committed by
GitHub
Oct 16, 2025
Browse files
[router] Add rustfmt and set group imports by default (#11732)
parent
7a7f99be
Changes
126
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
148 additions
and
113 deletions
+148
-113
sgl-router/src/protocols/common.rs
sgl-router/src/protocols/common.rs
+2
-1
sgl-router/src/protocols/completion.rs
sgl-router/src/protocols/completion.rs
+2
-1
sgl-router/src/protocols/generate.rs
sgl-router/src/protocols/generate.rs
+6
-3
sgl-router/src/protocols/rerank.rs
sgl-router/src/protocols/rerank.rs
+2
-1
sgl-router/src/protocols/responses.rs
sgl-router/src/protocols/responses.rs
+2
-1
sgl-router/src/protocols/validated.rs
sgl-router/src/protocols/validated.rs
+2
-1
sgl-router/src/protocols/worker_spec.rs
sgl-router/src/protocols/worker_spec.rs
+2
-1
sgl-router/src/reasoning_parser/factory.rs
sgl-router/src/reasoning_parser/factory.rs
+14
-8
sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs
sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs
+4
-2
sgl-router/src/reasoning_parser/parsers/glm45.rs
sgl-router/src/reasoning_parser/parsers/glm45.rs
+4
-2
sgl-router/src/reasoning_parser/parsers/kimi.rs
sgl-router/src/reasoning_parser/parsers/kimi.rs
+4
-2
sgl-router/src/reasoning_parser/parsers/qwen3.rs
sgl-router/src/reasoning_parser/parsers/qwen3.rs
+4
-2
sgl-router/src/reasoning_parser/parsers/step3.rs
sgl-router/src/reasoning_parser/parsers/step3.rs
+4
-2
sgl-router/src/routers/factory.rs
sgl-router/src/routers/factory.rs
+8
-6
sgl-router/src/routers/grpc/context.rs
sgl-router/src/routers/grpc/context.rs
+12
-10
sgl-router/src/routers/grpc/mod.rs
sgl-router/src/routers/grpc/mod.rs
+1
-2
sgl-router/src/routers/grpc/pd_router.rs
sgl-router/src/routers/grpc/pd_router.rs
+21
-18
sgl-router/src/routers/grpc/pipeline.rs
sgl-router/src/routers/grpc/pipeline.rs
+17
-17
sgl-router/src/routers/grpc/processing.rs
sgl-router/src/routers/grpc/processing.rs
+18
-16
sgl-router/src/routers/grpc/router.rs
sgl-router/src/routers/grpc/router.rs
+19
-17
No files found.
sgl-router/src/protocols/common.rs
View file @
dc01313d
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
// ============================================================================
// ============================================================================
// Default value helpers
// Default value helpers
...
...
sgl-router/src/protocols/completion.rs
View file @
dc01313d
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde_json
::{
Map
,
Value
};
use
serde_json
::{
Map
,
Value
};
use
std
::
collections
::
HashMap
;
use
super
::
common
::
*
;
use
super
::
common
::
*
;
...
...
sgl-router/src/protocols/generate.rs
View file @
dc01313d
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
use
validator
::
Validate
;
use
validator
::
Validate
;
use
super
::
common
::{
default_true
,
GenerationRequest
,
InputIds
};
use
super
::{
use
super
::
sampling_params
::
SamplingParams
;
common
::{
default_true
,
GenerationRequest
,
InputIds
},
sampling_params
::
SamplingParams
,
};
use
crate
::
protocols
::
validated
::
Normalizable
;
use
crate
::
protocols
::
validated
::
Normalizable
;
// ============================================================================
// ============================================================================
...
...
sgl-router/src/protocols/rerank.rs
View file @
dc01313d
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
use
validator
::
Validate
;
use
validator
::
Validate
;
use
super
::
common
::{
default_model
,
default_true
,
GenerationRequest
,
StringOrArray
,
UsageInfo
};
use
super
::
common
::{
default_model
,
default_true
,
GenerationRequest
,
StringOrArray
,
UsageInfo
};
...
...
sgl-router/src/protocols/responses.rs
View file @
dc01313d
// OpenAI Responses API types
// OpenAI Responses API types
// https://platform.openai.com/docs/api-reference/responses
// https://platform.openai.com/docs/api-reference/responses
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
std
::
collections
::
HashMap
;
// Import shared types from common module
// Import shared types from common module
use
super
::
common
::{
use
super
::
common
::{
...
...
sgl-router/src/protocols/validated.rs
View file @
dc01313d
...
@@ -117,10 +117,11 @@ impl<T> std::ops::DerefMut for ValidatedJson<T> {
...
@@ -117,10 +117,11 @@ impl<T> std::ops::DerefMut for ValidatedJson<T> {
#[cfg(test)]
#[cfg(test)]
mod
tests
{
mod
tests
{
use
super
::
*
;
use
serde
::{
Deserialize
,
Serialize
};
use
serde
::{
Deserialize
,
Serialize
};
use
validator
::
Validate
;
use
validator
::
Validate
;
use
super
::
*
;
#[derive(Debug,
Deserialize,
Serialize,
Validate)]
#[derive(Debug,
Deserialize,
Serialize,
Validate)]
struct
TestRequest
{
struct
TestRequest
{
#[validate(range(min
=
0.0
,
max
=
1.0
))]
#[validate(range(min
=
0.0
,
max
=
1.0
))]
...
...
sgl-router/src/protocols/worker_spec.rs
View file @
dc01313d
...
@@ -2,9 +2,10 @@
...
@@ -2,9 +2,10 @@
//!
//!
//! Defines the request/response structures for worker management endpoints
//! Defines the request/response structures for worker management endpoints
use
serde
::{
Deserialize
,
Serialize
};
use
std
::
collections
::
HashMap
;
use
std
::
collections
::
HashMap
;
use
serde
::{
Deserialize
,
Serialize
};
/// Worker configuration for API requests
/// Worker configuration for API requests
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
#[derive(Debug,
Clone,
Deserialize,
Serialize)]
pub
struct
WorkerConfigRequest
{
pub
struct
WorkerConfigRequest
{
...
...
sgl-router/src/reasoning_parser/factory.rs
View file @
dc01313d
// Factory and registry for creating model-specific reasoning parsers.
// Factory and registry for creating model-specific reasoning parsers.
// Now with parser pooling support for efficient reuse across requests.
// Now with parser pooling support for efficient reuse across requests.
use
std
::
collections
::
HashMap
;
use
std
::{
use
std
::
sync
::{
Arc
,
RwLock
};
collections
::
HashMap
,
sync
::{
Arc
,
RwLock
},
};
use
tokio
::
sync
::
Mutex
;
use
tokio
::
sync
::
Mutex
;
use
crate
::
reasoning_parser
::
parsers
::{
use
crate
::
reasoning_parser
::{
BaseReasoningParser
,
DeepSeekR1Parser
,
Glm45Parser
,
KimiParser
,
Qwen3Parser
,
parsers
::{
QwenThinkingParser
,
Step3Parser
,
BaseReasoningParser
,
DeepSeekR1Parser
,
Glm45Parser
,
KimiParser
,
Qwen3Parser
,
QwenThinkingParser
,
Step3Parser
,
},
traits
::{
ParseError
,
ParserConfig
,
ReasoningParser
},
};
};
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ReasoningParser
};
/// Type alias for pooled parser instances.
/// Type alias for pooled parser instances.
/// Uses tokio::Mutex to avoid blocking the async executor.
/// Uses tokio::Mutex to avoid blocking the async executor.
...
@@ -402,8 +406,10 @@ mod tests {
...
@@ -402,8 +406,10 @@ mod tests {
#[tokio::test(flavor
=
"multi_thread"
,
worker_threads
=
8
)]
#[tokio::test(flavor
=
"multi_thread"
,
worker_threads
=
8
)]
async
fn
test_high_concurrency_parser_access
()
{
async
fn
test_high_concurrency_parser_access
()
{
use
std
::
sync
::
atomic
::{
AtomicUsize
,
Ordering
};
use
std
::{
use
std
::
time
::
Instant
;
sync
::
atomic
::{
AtomicUsize
,
Ordering
},
time
::
Instant
,
};
let
factory
=
ParserFactory
::
new
();
let
factory
=
ParserFactory
::
new
();
let
num_tasks
=
100
;
let
num_tasks
=
100
;
...
...
sgl-router/src/reasoning_parser/parsers/deepseek_r1.rs
View file @
dc01313d
...
@@ -2,8 +2,10 @@
...
@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=true, assuming all text is reasoning
// This parser starts with in_reasoning=true, assuming all text is reasoning
// until an end token is encountered.
// until an end token is encountered.
use
crate
::
reasoning_parser
::
parsers
::
BaseReasoningParser
;
use
crate
::
reasoning_parser
::{
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
};
parsers
::
BaseReasoningParser
,
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
},
};
/// DeepSeek-R1 reasoning parser.
/// DeepSeek-R1 reasoning parser.
///
///
...
...
sgl-router/src/reasoning_parser/parsers/glm45.rs
View file @
dc01313d
// GLM45 specific reasoning parser.
// GLM45 specific reasoning parser.
// Uses the same format as Qwen3 but has its own implementation for debugging.
// Uses the same format as Qwen3 but has its own implementation for debugging.
use
crate
::
reasoning_parser
::
parsers
::
BaseReasoningParser
;
use
crate
::
reasoning_parser
::{
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
};
parsers
::
BaseReasoningParser
,
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
},
};
/// GLM45 reasoning parser.
/// GLM45 reasoning parser.
///
///
...
...
sgl-router/src/reasoning_parser/parsers/kimi.rs
View file @
dc01313d
// Kimi specific reasoning parser.
// Kimi specific reasoning parser.
// This parser uses Unicode tokens and starts with in_reasoning=false.
// This parser uses Unicode tokens and starts with in_reasoning=false.
use
crate
::
reasoning_parser
::
parsers
::
BaseReasoningParser
;
use
crate
::
reasoning_parser
::{
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
};
parsers
::
BaseReasoningParser
,
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
},
};
/// Kimi reasoning parser.
/// Kimi reasoning parser.
///
///
...
...
sgl-router/src/reasoning_parser/parsers/qwen3.rs
View file @
dc01313d
...
@@ -2,8 +2,10 @@
...
@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=false, requiring an explicit
// This parser starts with in_reasoning=false, requiring an explicit
// start token to enter reasoning mode.
// start token to enter reasoning mode.
use
crate
::
reasoning_parser
::
parsers
::
BaseReasoningParser
;
use
crate
::
reasoning_parser
::{
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
};
parsers
::
BaseReasoningParser
,
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
},
};
/// Qwen3 reasoning parser.
/// Qwen3 reasoning parser.
///
///
...
...
sgl-router/src/reasoning_parser/parsers/step3.rs
View file @
dc01313d
// Step3 specific reasoning parser.
// Step3 specific reasoning parser.
// Uses the same format as DeepSeek-R1 but has its own implementation for debugging.
// Uses the same format as DeepSeek-R1 but has its own implementation for debugging.
use
crate
::
reasoning_parser
::
parsers
::
BaseReasoningParser
;
use
crate
::
reasoning_parser
::{
use
crate
::
reasoning_parser
::
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
};
parsers
::
BaseReasoningParser
,
traits
::{
ParseError
,
ParserConfig
,
ParserResult
,
ReasoningParser
},
};
/// Step3 reasoning parser.
/// Step3 reasoning parser.
///
///
...
...
sgl-router/src/routers/factory.rs
View file @
dc01313d
//! Factory for creating router instances
//! Factory for creating router instances
use
s
uper
::
grpc
::
pd_router
::
GrpcPDRouter
;
use
s
td
::
sync
::
Arc
;
use
super
::
grpc
::
router
::
GrpcRouter
;
use
super
::{
use
super
::{
grpc
::{
pd_router
::
GrpcPDRouter
,
router
::
GrpcRouter
},
http
::{
pd_router
::
PDRouter
,
router
::
Router
},
http
::{
pd_router
::
PDRouter
,
router
::
Router
},
openai
::
OpenAIRouter
,
openai
::
OpenAIRouter
,
RouterTrait
,
RouterTrait
,
};
};
use
crate
::
config
::{
ConnectionMode
,
PolicyConfig
,
RoutingMode
};
use
crate
::{
use
crate
::
policies
::
PolicyFactory
;
config
::{
ConnectionMode
,
PolicyConfig
,
RoutingMode
},
use
crate
::
server
::
AppContext
;
policies
::
PolicyFactory
,
use
std
::
sync
::
Arc
;
server
::
AppContext
,
};
/// Factory for creating router instances based on configuration
/// Factory for creating router instances based on configuration
pub
struct
RouterFactory
;
pub
struct
RouterFactory
;
...
...
sgl-router/src/routers/grpc/context.rs
View file @
dc01313d
...
@@ -4,20 +4,22 @@
...
@@ -4,20 +4,22 @@
//! eliminating deep parameter passing chains and providing a single source of truth
//! eliminating deep parameter passing chains and providing a single source of truth
//! for request state.
//! for request state.
use
std
::
collections
::
HashMap
;
use
std
::{
collections
::
HashMap
,
sync
::
Arc
};
use
std
::
sync
::
Arc
;
use
axum
::
http
::
HeaderMap
;
use
axum
::
http
::
HeaderMap
;
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
crate
::
core
::
Worker
;
use
crate
::{
use
crate
::
grpc_client
::{
proto
,
SglangSchedulerClient
};
core
::
Worker
,
use
crate
::
protocols
::
chat
::{
ChatCompletionRequest
,
ChatCompletionResponse
};
grpc_client
::{
proto
,
SglangSchedulerClient
},
use
crate
::
protocols
::
generate
::{
GenerateRequest
,
GenerateResponse
};
protocols
::{
use
crate
::
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
;
chat
::{
ChatCompletionRequest
,
ChatCompletionResponse
},
use
crate
::
tokenizer
::
stop
::
StopSequenceDecoder
;
generate
::{
GenerateRequest
,
GenerateResponse
},
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
},
use
crate
::
tool_parser
::
ParserFactory
as
ToolParserFactory
;
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
,
tokenizer
::{
stop
::
StopSequenceDecoder
,
traits
::
Tokenizer
},
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
// ============================================================================
// ============================================================================
// Core Context Types
// Core Context Types
...
...
sgl-router/src/routers/grpc/mod.rs
View file @
dc01313d
//! gRPC router implementations
//! gRPC router implementations
use
crate
::
grpc_client
::
proto
;
use
crate
::{
grpc_client
::
proto
,
protocols
::
common
::
StringOrArray
};
use
crate
::
protocols
::
common
::
StringOrArray
;
pub
mod
context
;
pub
mod
context
;
pub
mod
pd_router
;
pub
mod
pd_router
;
...
...
sgl-router/src/routers/grpc/pd_router.rs
View file @
dc01313d
// PD (Prefill-Decode) gRPC Router Implementation
// PD (Prefill-Decode) gRPC Router Implementation
use
crate
::
config
::
types
::
RetryConfig
;
use
std
::
sync
::
Arc
;
use
crate
::
core
::{
ConnectionMode
,
WorkerRegistry
,
WorkerType
};
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
completion
::
CompletionRequest
;
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
protocols
::
rerank
::
RerankRequest
;
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
use
crate
::
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
;
use
crate
::
routers
::
RouterTrait
;
use
crate
::
server
::
AppContext
;
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tool_parser
::
ParserFactory
as
ToolParserFactory
;
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
axum
::{
use
axum
::{
body
::
Body
,
body
::
Body
,
...
@@ -21,12 +9,27 @@ use axum::{
...
@@ -21,12 +9,27 @@ use axum::{
http
::{
HeaderMap
,
StatusCode
},
http
::{
HeaderMap
,
StatusCode
},
response
::{
IntoResponse
,
Response
},
response
::{
IntoResponse
,
Response
},
};
};
use
std
::
sync
::
Arc
;
use
tracing
::
debug
;
use
tracing
::
debug
;
use
super
::
context
::
SharedComponents
;
use
super
::{
context
::
SharedComponents
,
pipeline
::
RequestPipeline
};
use
super
::
pipeline
::
RequestPipeline
;
use
crate
::{
config
::
types
::
RetryConfig
,
core
::{
ConnectionMode
,
WorkerRegistry
,
WorkerType
},
policies
::
PolicyRegistry
,
protocols
::{
chat
::
ChatCompletionRequest
,
completion
::
CompletionRequest
,
embedding
::
EmbeddingRequest
,
generate
::
GenerateRequest
,
rerank
::
RerankRequest
,
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
},
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
,
routers
::
RouterTrait
,
server
::
AppContext
,
tokenizer
::
traits
::
Tokenizer
,
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
/// gRPC PD (Prefill-Decode) router implementation for SGLang
/// gRPC PD (Prefill-Decode) router implementation for SGLang
#[derive(Clone)]
#[derive(Clone)]
...
...
sgl-router/src/routers/grpc/pipeline.rs
View file @
dc01313d
...
@@ -3,29 +3,29 @@
...
@@ -3,29 +3,29 @@
//! This module defines the core pipeline abstraction and individual processing stages
//! This module defines the core pipeline abstraction and individual processing stages
//! that transform a RequestContext through its lifecycle.
//! that transform a RequestContext through its lifecycle.
use
std
::{
sync
::
Arc
,
time
::{
Instant
,
SystemTime
,
UNIX_EPOCH
},
};
use
async_trait
::
async_trait
;
use
async_trait
::
async_trait
;
use
axum
::
response
::{
IntoResponse
,
Response
};
use
axum
::
response
::{
IntoResponse
,
Response
};
use
tracing
::{
debug
,
error
,
warn
};
use
super
::
context
::
*
;
use
super
::
processing
;
use
super
::
streaming
;
use
super
::
utils
;
use
crate
::
core
::{
ConnectionMode
,
Worker
,
WorkerRegistry
,
WorkerType
};
use
crate
::
grpc_client
::
proto
;
use
crate
::
policies
::
PolicyRegistry
;
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
use
crate
::
protocols
::
common
::
InputIds
;
use
crate
::
protocols
::
generate
::
GenerateRequest
;
use
crate
::
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
;
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tool_parser
::
ParserFactory
as
ToolParserFactory
;
use
proto
::
DisaggregatedParams
;
use
proto
::
DisaggregatedParams
;
use
rand
::
Rng
;
use
rand
::
Rng
;
use
std
::
sync
::
Arc
;
use
tracing
::{
debug
,
error
,
warn
};
use
std
::
time
::{
Instant
,
SystemTime
,
UNIX_EPOCH
};
use
uuid
::
Uuid
;
use
uuid
::
Uuid
;
use
super
::{
context
::
*
,
processing
,
streaming
,
utils
};
use
crate
::{
core
::{
ConnectionMode
,
Worker
,
WorkerRegistry
,
WorkerType
},
grpc_client
::
proto
,
policies
::
PolicyRegistry
,
protocols
::{
chat
::
ChatCompletionRequest
,
common
::
InputIds
,
generate
::
GenerateRequest
},
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
,
tokenizer
::
traits
::
Tokenizer
,
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
// ============================================================================
// ============================================================================
// Pipeline Trait
// Pipeline Trait
// ============================================================================
// ============================================================================
...
...
sgl-router/src/routers/grpc/processing.rs
View file @
dc01313d
...
@@ -3,28 +3,30 @@
...
@@ -3,28 +3,30 @@
//! This module contains response processing functions that are shared between
//! This module contains response processing functions that are shared between
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
use
std
::
sync
::
Arc
;
use
std
::
{
sync
::
Arc
,
time
::
Instant
}
;
use
proto
::
generate_complete
::
MatchedStop
;
use
serde_json
::
Value
;
use
serde_json
::
Value
;
use
tracing
::
error
;
use
tracing
::
error
;
use
crate
::
grpc_client
::
proto
;
use
super
::{
use
crate
::
protocols
::
chat
::{
context
::{
DispatchMetadata
,
ExecutionResult
},
ChatChoice
,
ChatCompletionMessage
,
ChatCompletionRequest
,
ChatCompletionResponse
,
utils
,
};
};
use
crate
::
protocols
::
common
::{
use
crate
::{
FunctionCallResponse
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
,
Usage
,
grpc_client
::
proto
,
protocols
::{
chat
::{
ChatChoice
,
ChatCompletionMessage
,
ChatCompletionRequest
,
ChatCompletionResponse
},
common
::{
FunctionCallResponse
,
ToolCall
,
ToolChoice
,
ToolChoiceValue
,
Usage
},
generate
::{
GenerateMetaInfo
,
GenerateRequest
,
GenerateResponse
},
},
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
,
tokenizer
::{
stop
::{
SequenceDecoderOutput
,
StopSequenceDecoder
},
traits
::
Tokenizer
,
},
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
};
use
crate
::
protocols
::
generate
::{
GenerateMetaInfo
,
GenerateRequest
,
GenerateResponse
};
use
crate
::
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
;
use
crate
::
tokenizer
::
stop
::{
SequenceDecoderOutput
,
StopSequenceDecoder
};
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
use
crate
::
tool_parser
::
ParserFactory
as
ToolParserFactory
;
use
proto
::
generate_complete
::
MatchedStop
;
use
std
::
time
::
Instant
;
use
super
::
context
::{
DispatchMetadata
,
ExecutionResult
};
use
super
::
utils
;
// ============================================================================
// ============================================================================
// Response Processor - Main Entry Point
// Response Processor - Main Entry Point
...
...
sgl-router/src/routers/grpc/router.rs
View file @
dc01313d
...
@@ -11,23 +11,25 @@ use axum::{
...
@@ -11,23 +11,25 @@ use axum::{
};
};
use
tracing
::
debug
;
use
tracing
::
debug
;
use
crate
::
config
::
types
::
RetryConfig
;
use
super
::{
context
::
SharedComponents
,
pipeline
::
RequestPipeline
};
use
crate
::
core
::
WorkerRegistry
;
use
crate
::{
use
crate
::
policies
::
PolicyRegistry
;
config
::
types
::
RetryConfig
,
use
crate
::
protocols
::
chat
::
ChatCompletionRequest
;
core
::
WorkerRegistry
,
use
crate
::
protocols
::
completion
::
CompletionRequest
;
policies
::
PolicyRegistry
,
use
crate
::
protocols
::
embedding
::
EmbeddingRequest
;
protocols
::{
use
crate
::
protocols
::
generate
::
GenerateRequest
;
chat
::
ChatCompletionRequest
,
use
crate
::
protocols
::
rerank
::
RerankRequest
;
completion
::
CompletionRequest
,
use
crate
::
protocols
::
responses
::{
ResponsesGetParams
,
ResponsesRequest
};
embedding
::
EmbeddingRequest
,
use
crate
::
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
;
generate
::
GenerateRequest
,
use
crate
::
routers
::
RouterTrait
;
rerank
::
RerankRequest
,
use
crate
::
server
::
AppContext
;
responses
::{
ResponsesGetParams
,
ResponsesRequest
},
use
crate
::
tokenizer
::
traits
::
Tokenizer
;
},
use
crate
::
tool_parser
::
ParserFactory
as
ToolParserFactory
;
reasoning_parser
::
ParserFactory
as
ReasoningParserFactory
,
routers
::
RouterTrait
,
use
super
::
context
::
SharedComponents
;
server
::
AppContext
,
use
super
::
pipeline
::
RequestPipeline
;
tokenizer
::
traits
::
Tokenizer
,
tool_parser
::
ParserFactory
as
ToolParserFactory
,
};
/// gRPC router implementation for SGLang
/// gRPC router implementation for SGLang
#[derive(Clone)]
#[derive(Clone)]
...
...
Prev
1
2
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment