Unverified Commit dc01313d authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router] Add rustfmt and set group imports by default (#11732)

parent 7a7f99be
use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::HashMap;
// ============================================================================ // ============================================================================
// Default value helpers // Default value helpers
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::{Map, Value}; use serde_json::{Map, Value};
use std::collections::HashMap;
use super::common::*; use super::common::*;
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::HashMap;
use validator::Validate; use validator::Validate;
use super::common::{default_true, GenerationRequest, InputIds}; use super::{
use super::sampling_params::SamplingParams; common::{default_true, GenerationRequest, InputIds},
sampling_params::SamplingParams,
};
use crate::protocols::validated::Normalizable; use crate::protocols::validated::Normalizable;
// ============================================================================ // ============================================================================
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::HashMap;
use validator::Validate; use validator::Validate;
use super::common::{default_model, default_true, GenerationRequest, StringOrArray, UsageInfo}; use super::common::{default_model, default_true, GenerationRequest, StringOrArray, UsageInfo};
......
// OpenAI Responses API types // OpenAI Responses API types
// https://platform.openai.com/docs/api-reference/responses // https://platform.openai.com/docs/api-reference/responses
use std::collections::HashMap;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use std::collections::HashMap;
// Import shared types from common module // Import shared types from common module
use super::common::{ use super::common::{
......
...@@ -117,10 +117,11 @@ impl<T> std::ops::DerefMut for ValidatedJson<T> { ...@@ -117,10 +117,11 @@ impl<T> std::ops::DerefMut for ValidatedJson<T> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use validator::Validate; use validator::Validate;
use super::*;
#[derive(Debug, Deserialize, Serialize, Validate)] #[derive(Debug, Deserialize, Serialize, Validate)]
struct TestRequest { struct TestRequest {
#[validate(range(min = 0.0, max = 1.0))] #[validate(range(min = 0.0, max = 1.0))]
......
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
//! //!
//! Defines the request/response structures for worker management endpoints //! Defines the request/response structures for worker management endpoints
use serde::{Deserialize, Serialize};
use std::collections::HashMap; use std::collections::HashMap;
use serde::{Deserialize, Serialize};
/// Worker configuration for API requests /// Worker configuration for API requests
#[derive(Debug, Clone, Deserialize, Serialize)] #[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerConfigRequest { pub struct WorkerConfigRequest {
......
// Factory and registry for creating model-specific reasoning parsers. // Factory and registry for creating model-specific reasoning parsers.
// Now with parser pooling support for efficient reuse across requests. // Now with parser pooling support for efficient reuse across requests.
use std::collections::HashMap; use std::{
use std::sync::{Arc, RwLock}; collections::HashMap,
sync::{Arc, RwLock},
};
use tokio::sync::Mutex; use tokio::sync::Mutex;
use crate::reasoning_parser::parsers::{ use crate::reasoning_parser::{
BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser, parsers::{
QwenThinkingParser, Step3Parser, BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser,
QwenThinkingParser, Step3Parser,
},
traits::{ParseError, ParserConfig, ReasoningParser},
}; };
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ReasoningParser};
/// Type alias for pooled parser instances. /// Type alias for pooled parser instances.
/// Uses tokio::Mutex to avoid blocking the async executor. /// Uses tokio::Mutex to avoid blocking the async executor.
...@@ -402,8 +406,10 @@ mod tests { ...@@ -402,8 +406,10 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 8)] #[tokio::test(flavor = "multi_thread", worker_threads = 8)]
async fn test_high_concurrency_parser_access() { async fn test_high_concurrency_parser_access() {
use std::sync::atomic::{AtomicUsize, Ordering}; use std::{
use std::time::Instant; sync::atomic::{AtomicUsize, Ordering},
time::Instant,
};
let factory = ParserFactory::new(); let factory = ParserFactory::new();
let num_tasks = 100; let num_tasks = 100;
......
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=true, assuming all text is reasoning // This parser starts with in_reasoning=true, assuming all text is reasoning
// until an end token is encountered. // until an end token is encountered.
use crate::reasoning_parser::parsers::BaseReasoningParser; use crate::reasoning_parser::{
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// DeepSeek-R1 reasoning parser. /// DeepSeek-R1 reasoning parser.
/// ///
......
// GLM45 specific reasoning parser. // GLM45 specific reasoning parser.
// Uses the same format as Qwen3 but has its own implementation for debugging. // Uses the same format as Qwen3 but has its own implementation for debugging.
use crate::reasoning_parser::parsers::BaseReasoningParser; use crate::reasoning_parser::{
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// GLM45 reasoning parser. /// GLM45 reasoning parser.
/// ///
......
// Kimi specific reasoning parser. // Kimi specific reasoning parser.
// This parser uses Unicode tokens and starts with in_reasoning=false. // This parser uses Unicode tokens and starts with in_reasoning=false.
use crate::reasoning_parser::parsers::BaseReasoningParser; use crate::reasoning_parser::{
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Kimi reasoning parser. /// Kimi reasoning parser.
/// ///
......
...@@ -2,8 +2,10 @@ ...@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=false, requiring an explicit // This parser starts with in_reasoning=false, requiring an explicit
// start token to enter reasoning mode. // start token to enter reasoning mode.
use crate::reasoning_parser::parsers::BaseReasoningParser; use crate::reasoning_parser::{
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Qwen3 reasoning parser. /// Qwen3 reasoning parser.
/// ///
......
// Step3 specific reasoning parser. // Step3 specific reasoning parser.
// Uses the same format as DeepSeek-R1 but has its own implementation for debugging. // Uses the same format as DeepSeek-R1 but has its own implementation for debugging.
use crate::reasoning_parser::parsers::BaseReasoningParser; use crate::reasoning_parser::{
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser}; parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Step3 reasoning parser. /// Step3 reasoning parser.
/// ///
......
//! Factory for creating router instances //! Factory for creating router instances
use super::grpc::pd_router::GrpcPDRouter; use std::sync::Arc;
use super::grpc::router::GrpcRouter;
use super::{ use super::{
grpc::{pd_router::GrpcPDRouter, router::GrpcRouter},
http::{pd_router::PDRouter, router::Router}, http::{pd_router::PDRouter, router::Router},
openai::OpenAIRouter, openai::OpenAIRouter,
RouterTrait, RouterTrait,
}; };
use crate::config::{ConnectionMode, PolicyConfig, RoutingMode}; use crate::{
use crate::policies::PolicyFactory; config::{ConnectionMode, PolicyConfig, RoutingMode},
use crate::server::AppContext; policies::PolicyFactory,
use std::sync::Arc; server::AppContext,
};
/// Factory for creating router instances based on configuration /// Factory for creating router instances based on configuration
pub struct RouterFactory; pub struct RouterFactory;
......
...@@ -4,20 +4,22 @@ ...@@ -4,20 +4,22 @@
//! eliminating deep parameter passing chains and providing a single source of truth //! eliminating deep parameter passing chains and providing a single source of truth
//! for request state. //! for request state.
use std::collections::HashMap; use std::{collections::HashMap, sync::Arc};
use std::sync::Arc;
use axum::http::HeaderMap; use axum::http::HeaderMap;
use serde_json::Value; use serde_json::Value;
use crate::core::Worker; use crate::{
use crate::grpc_client::{proto, SglangSchedulerClient}; core::Worker,
use crate::protocols::chat::{ChatCompletionRequest, ChatCompletionResponse}; grpc_client::{proto, SglangSchedulerClient},
use crate::protocols::generate::{GenerateRequest, GenerateResponse}; protocols::{
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory; chat::{ChatCompletionRequest, ChatCompletionResponse},
use crate::tokenizer::stop::StopSequenceDecoder; generate::{GenerateRequest, GenerateResponse},
use crate::tokenizer::traits::Tokenizer; },
use crate::tool_parser::ParserFactory as ToolParserFactory; reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{stop::StopSequenceDecoder, traits::Tokenizer},
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================ // ============================================================================
// Core Context Types // Core Context Types
......
//! gRPC router implementations //! gRPC router implementations
use crate::grpc_client::proto; use crate::{grpc_client::proto, protocols::common::StringOrArray};
use crate::protocols::common::StringOrArray;
pub mod context; pub mod context;
pub mod pd_router; pub mod pd_router;
......
// PD (Prefill-Decode) gRPC Router Implementation // PD (Prefill-Decode) gRPC Router Implementation
use crate::config::types::RetryConfig; use std::sync::Arc;
use crate::core::{ConnectionMode, WorkerRegistry, WorkerType};
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::completion::CompletionRequest;
use crate::protocols::embedding::EmbeddingRequest;
use crate::protocols::generate::GenerateRequest;
use crate::protocols::rerank::RerankRequest;
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::routers::RouterTrait;
use crate::server::AppContext;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use async_trait::async_trait; use async_trait::async_trait;
use axum::{ use axum::{
body::Body, body::Body,
...@@ -21,12 +9,27 @@ use axum::{ ...@@ -21,12 +9,27 @@ use axum::{
http::{HeaderMap, StatusCode}, http::{HeaderMap, StatusCode},
response::{IntoResponse, Response}, response::{IntoResponse, Response},
}; };
use std::sync::Arc;
use tracing::debug; use tracing::debug;
use super::context::SharedComponents; use super::{context::SharedComponents, pipeline::RequestPipeline};
use super::pipeline::RequestPipeline; use crate::{
config::types::RetryConfig,
core::{ConnectionMode, WorkerRegistry, WorkerType},
policies::PolicyRegistry,
protocols::{
chat::ChatCompletionRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
server::AppContext,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC PD (Prefill-Decode) router implementation for SGLang /// gRPC PD (Prefill-Decode) router implementation for SGLang
#[derive(Clone)] #[derive(Clone)]
......
...@@ -3,29 +3,29 @@ ...@@ -3,29 +3,29 @@
//! This module defines the core pipeline abstraction and individual processing stages //! This module defines the core pipeline abstraction and individual processing stages
//! that transform a RequestContext through its lifecycle. //! that transform a RequestContext through its lifecycle.
use std::{
sync::Arc,
time::{Instant, SystemTime, UNIX_EPOCH},
};
use async_trait::async_trait; use async_trait::async_trait;
use axum::response::{IntoResponse, Response}; use axum::response::{IntoResponse, Response};
use tracing::{debug, error, warn};
use super::context::*;
use super::processing;
use super::streaming;
use super::utils;
use crate::core::{ConnectionMode, Worker, WorkerRegistry, WorkerType};
use crate::grpc_client::proto;
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::common::InputIds;
use crate::protocols::generate::GenerateRequest;
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::DisaggregatedParams; use proto::DisaggregatedParams;
use rand::Rng; use rand::Rng;
use std::sync::Arc; use tracing::{debug, error, warn};
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use uuid::Uuid; use uuid::Uuid;
use super::{context::*, processing, streaming, utils};
use crate::{
core::{ConnectionMode, Worker, WorkerRegistry, WorkerType},
grpc_client::proto,
policies::PolicyRegistry,
protocols::{chat::ChatCompletionRequest, common::InputIds, generate::GenerateRequest},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================ // ============================================================================
// Pipeline Trait // Pipeline Trait
// ============================================================================ // ============================================================================
......
...@@ -3,28 +3,30 @@ ...@@ -3,28 +3,30 @@
//! This module contains response processing functions that are shared between //! This module contains response processing functions that are shared between
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates. //! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
use std::sync::Arc; use std::{sync::Arc, time::Instant};
use proto::generate_complete::MatchedStop;
use serde_json::Value; use serde_json::Value;
use tracing::error; use tracing::error;
use crate::grpc_client::proto; use super::{
use crate::protocols::chat::{ context::{DispatchMetadata, ExecutionResult},
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse, utils,
}; };
use crate::protocols::common::{ use crate::{
FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage, grpc_client::proto,
protocols::{
chat::{ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse},
common::{FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage},
generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{
stop::{SequenceDecoderOutput, StopSequenceDecoder},
traits::Tokenizer,
},
tool_parser::ParserFactory as ToolParserFactory,
}; };
use crate::protocols::generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::generate_complete::MatchedStop;
use std::time::Instant;
use super::context::{DispatchMetadata, ExecutionResult};
use super::utils;
// ============================================================================ // ============================================================================
// Response Processor - Main Entry Point // Response Processor - Main Entry Point
......
...@@ -11,23 +11,25 @@ use axum::{ ...@@ -11,23 +11,25 @@ use axum::{
}; };
use tracing::debug; use tracing::debug;
use crate::config::types::RetryConfig; use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::core::WorkerRegistry; use crate::{
use crate::policies::PolicyRegistry; config::types::RetryConfig,
use crate::protocols::chat::ChatCompletionRequest; core::WorkerRegistry,
use crate::protocols::completion::CompletionRequest; policies::PolicyRegistry,
use crate::protocols::embedding::EmbeddingRequest; protocols::{
use crate::protocols::generate::GenerateRequest; chat::ChatCompletionRequest,
use crate::protocols::rerank::RerankRequest; completion::CompletionRequest,
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest}; embedding::EmbeddingRequest,
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory; generate::GenerateRequest,
use crate::routers::RouterTrait; rerank::RerankRequest,
use crate::server::AppContext; responses::{ResponsesGetParams, ResponsesRequest},
use crate::tokenizer::traits::Tokenizer; },
use crate::tool_parser::ParserFactory as ToolParserFactory; reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
use super::context::SharedComponents; server::AppContext,
use super::pipeline::RequestPipeline; tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC router implementation for SGLang /// gRPC router implementation for SGLang
#[derive(Clone)] #[derive(Clone)]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment