Unverified Commit dc01313d authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router] Add rustfmt and set group imports by default (#11732)

parent 7a7f99be
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
// ============================================================================
// Default value helpers
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::{Map, Value};
use std::collections::HashMap;
use super::common::*;
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use validator::Validate;
use super::common::{default_true, GenerationRequest, InputIds};
use super::sampling_params::SamplingParams;
use super::{
common::{default_true, GenerationRequest, InputIds},
sampling_params::SamplingParams,
};
use crate::protocols::validated::Normalizable;
// ============================================================================
......
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
use validator::Validate;
use super::common::{default_model, default_true, GenerationRequest, StringOrArray, UsageInfo};
......
// OpenAI Responses API types
// https://platform.openai.com/docs/api-reference/responses
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use std::collections::HashMap;
// Import shared types from common module
use super::common::{
......
......@@ -117,10 +117,11 @@ impl<T> std::ops::DerefMut for ValidatedJson<T> {
#[cfg(test)]
mod tests {
use super::*;
use serde::{Deserialize, Serialize};
use validator::Validate;
use super::*;
#[derive(Debug, Deserialize, Serialize, Validate)]
struct TestRequest {
#[validate(range(min = 0.0, max = 1.0))]
......
......@@ -2,9 +2,10 @@
//!
//! Defines the request/response structures for worker management endpoints
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use serde::{Deserialize, Serialize};
/// Worker configuration for API requests
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct WorkerConfigRequest {
......
// Factory and registry for creating model-specific reasoning parsers.
// Now with parser pooling support for efficient reuse across requests.
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::{
collections::HashMap,
sync::{Arc, RwLock},
};
use tokio::sync::Mutex;
use crate::reasoning_parser::parsers::{
BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser,
QwenThinkingParser, Step3Parser,
use crate::reasoning_parser::{
parsers::{
BaseReasoningParser, DeepSeekR1Parser, Glm45Parser, KimiParser, Qwen3Parser,
QwenThinkingParser, Step3Parser,
},
traits::{ParseError, ParserConfig, ReasoningParser},
};
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ReasoningParser};
/// Type alias for pooled parser instances.
/// Uses tokio::Mutex to avoid blocking the async executor.
......@@ -402,8 +406,10 @@ mod tests {
#[tokio::test(flavor = "multi_thread", worker_threads = 8)]
async fn test_high_concurrency_parser_access() {
use std::sync::atomic::{AtomicUsize, Ordering};
use std::time::Instant;
use std::{
sync::atomic::{AtomicUsize, Ordering},
time::Instant,
};
let factory = ParserFactory::new();
let num_tasks = 100;
......
......@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=true, assuming all text is reasoning
// until an end token is encountered.
use crate::reasoning_parser::parsers::BaseReasoningParser;
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
use crate::reasoning_parser::{
parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// DeepSeek-R1 reasoning parser.
///
......
// GLM45 specific reasoning parser.
// Uses the same format as Qwen3 but has its own implementation for debugging.
use crate::reasoning_parser::parsers::BaseReasoningParser;
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
use crate::reasoning_parser::{
parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// GLM45 reasoning parser.
///
......
// Kimi specific reasoning parser.
// This parser uses Unicode tokens and starts with in_reasoning=false.
use crate::reasoning_parser::parsers::BaseReasoningParser;
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
use crate::reasoning_parser::{
parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Kimi reasoning parser.
///
......
......@@ -2,8 +2,10 @@
// This parser starts with in_reasoning=false, requiring an explicit
// start token to enter reasoning mode.
use crate::reasoning_parser::parsers::BaseReasoningParser;
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
use crate::reasoning_parser::{
parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Qwen3 reasoning parser.
///
......
// Step3 specific reasoning parser.
// Uses the same format as DeepSeek-R1 but has its own implementation for debugging.
use crate::reasoning_parser::parsers::BaseReasoningParser;
use crate::reasoning_parser::traits::{ParseError, ParserConfig, ParserResult, ReasoningParser};
use crate::reasoning_parser::{
parsers::BaseReasoningParser,
traits::{ParseError, ParserConfig, ParserResult, ReasoningParser},
};
/// Step3 reasoning parser.
///
......
//! Factory for creating router instances
use super::grpc::pd_router::GrpcPDRouter;
use super::grpc::router::GrpcRouter;
use std::sync::Arc;
use super::{
grpc::{pd_router::GrpcPDRouter, router::GrpcRouter},
http::{pd_router::PDRouter, router::Router},
openai::OpenAIRouter,
RouterTrait,
};
use crate::config::{ConnectionMode, PolicyConfig, RoutingMode};
use crate::policies::PolicyFactory;
use crate::server::AppContext;
use std::sync::Arc;
use crate::{
config::{ConnectionMode, PolicyConfig, RoutingMode},
policies::PolicyFactory,
server::AppContext,
};
/// Factory for creating router instances based on configuration
pub struct RouterFactory;
......
......@@ -4,20 +4,22 @@
//! eliminating deep parameter passing chains and providing a single source of truth
//! for request state.
use std::collections::HashMap;
use std::sync::Arc;
use std::{collections::HashMap, sync::Arc};
use axum::http::HeaderMap;
use serde_json::Value;
use crate::core::Worker;
use crate::grpc_client::{proto, SglangSchedulerClient};
use crate::protocols::chat::{ChatCompletionRequest, ChatCompletionResponse};
use crate::protocols::generate::{GenerateRequest, GenerateResponse};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::stop::StopSequenceDecoder;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use crate::{
core::Worker,
grpc_client::{proto, SglangSchedulerClient},
protocols::{
chat::{ChatCompletionRequest, ChatCompletionResponse},
generate::{GenerateRequest, GenerateResponse},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{stop::StopSequenceDecoder, traits::Tokenizer},
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================
// Core Context Types
......
//! gRPC router implementations
use crate::grpc_client::proto;
use crate::protocols::common::StringOrArray;
use crate::{grpc_client::proto, protocols::common::StringOrArray};
pub mod context;
pub mod pd_router;
......
// PD (Prefill-Decode) gRPC Router Implementation
use crate::config::types::RetryConfig;
use crate::core::{ConnectionMode, WorkerRegistry, WorkerType};
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::completion::CompletionRequest;
use crate::protocols::embedding::EmbeddingRequest;
use crate::protocols::generate::GenerateRequest;
use crate::protocols::rerank::RerankRequest;
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::routers::RouterTrait;
use crate::server::AppContext;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use std::sync::Arc;
use async_trait::async_trait;
use axum::{
body::Body,
......@@ -21,12 +9,27 @@ use axum::{
http::{HeaderMap, StatusCode},
response::{IntoResponse, Response},
};
use std::sync::Arc;
use tracing::debug;
use super::context::SharedComponents;
use super::pipeline::RequestPipeline;
use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::{
config::types::RetryConfig,
core::{ConnectionMode, WorkerRegistry, WorkerType},
policies::PolicyRegistry,
protocols::{
chat::ChatCompletionRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
server::AppContext,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC PD (Prefill-Decode) router implementation for SGLang
#[derive(Clone)]
......
......@@ -3,29 +3,29 @@
//! This module defines the core pipeline abstraction and individual processing stages
//! that transform a RequestContext through its lifecycle.
use std::{
sync::Arc,
time::{Instant, SystemTime, UNIX_EPOCH},
};
use async_trait::async_trait;
use axum::response::{IntoResponse, Response};
use tracing::{debug, error, warn};
use super::context::*;
use super::processing;
use super::streaming;
use super::utils;
use crate::core::{ConnectionMode, Worker, WorkerRegistry, WorkerType};
use crate::grpc_client::proto;
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::common::InputIds;
use crate::protocols::generate::GenerateRequest;
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::DisaggregatedParams;
use rand::Rng;
use std::sync::Arc;
use std::time::{Instant, SystemTime, UNIX_EPOCH};
use tracing::{debug, error, warn};
use uuid::Uuid;
use super::{context::*, processing, streaming, utils};
use crate::{
core::{ConnectionMode, Worker, WorkerRegistry, WorkerType},
grpc_client::proto,
policies::PolicyRegistry,
protocols::{chat::ChatCompletionRequest, common::InputIds, generate::GenerateRequest},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
// ============================================================================
// Pipeline Trait
// ============================================================================
......
......@@ -3,28 +3,30 @@
//! This module contains response processing functions that are shared between
//! the regular router and PD router, eliminating ~1,200 lines of exact duplicates.
use std::sync::Arc;
use std::{sync::Arc, time::Instant};
use proto::generate_complete::MatchedStop;
use serde_json::Value;
use tracing::error;
use crate::grpc_client::proto;
use crate::protocols::chat::{
ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse,
use super::{
context::{DispatchMetadata, ExecutionResult},
utils,
};
use crate::protocols::common::{
FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage,
use crate::{
grpc_client::proto,
protocols::{
chat::{ChatChoice, ChatCompletionMessage, ChatCompletionRequest, ChatCompletionResponse},
common::{FunctionCallResponse, ToolCall, ToolChoice, ToolChoiceValue, Usage},
generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
tokenizer::{
stop::{SequenceDecoderOutput, StopSequenceDecoder},
traits::Tokenizer,
},
tool_parser::ParserFactory as ToolParserFactory,
};
use crate::protocols::generate::{GenerateMetaInfo, GenerateRequest, GenerateResponse};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::tokenizer::stop::{SequenceDecoderOutput, StopSequenceDecoder};
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use proto::generate_complete::MatchedStop;
use std::time::Instant;
use super::context::{DispatchMetadata, ExecutionResult};
use super::utils;
// ============================================================================
// Response Processor - Main Entry Point
......
......@@ -11,23 +11,25 @@ use axum::{
};
use tracing::debug;
use crate::config::types::RetryConfig;
use crate::core::WorkerRegistry;
use crate::policies::PolicyRegistry;
use crate::protocols::chat::ChatCompletionRequest;
use crate::protocols::completion::CompletionRequest;
use crate::protocols::embedding::EmbeddingRequest;
use crate::protocols::generate::GenerateRequest;
use crate::protocols::rerank::RerankRequest;
use crate::protocols::responses::{ResponsesGetParams, ResponsesRequest};
use crate::reasoning_parser::ParserFactory as ReasoningParserFactory;
use crate::routers::RouterTrait;
use crate::server::AppContext;
use crate::tokenizer::traits::Tokenizer;
use crate::tool_parser::ParserFactory as ToolParserFactory;
use super::context::SharedComponents;
use super::pipeline::RequestPipeline;
use super::{context::SharedComponents, pipeline::RequestPipeline};
use crate::{
config::types::RetryConfig,
core::WorkerRegistry,
policies::PolicyRegistry,
protocols::{
chat::ChatCompletionRequest,
completion::CompletionRequest,
embedding::EmbeddingRequest,
generate::GenerateRequest,
rerank::RerankRequest,
responses::{ResponsesGetParams, ResponsesRequest},
},
reasoning_parser::ParserFactory as ReasoningParserFactory,
routers::RouterTrait,
server::AppContext,
tokenizer::traits::Tokenizer,
tool_parser::ParserFactory as ToolParserFactory,
};
/// gRPC router implementation for SGLang
#[derive(Clone)]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment