Unverified Commit 92777135 authored by Chang Su's avatar Chang Su Committed by GitHub
Browse files

[router][grpc] Consolidate parser checks for chat completions (#11439)

parent c4958331
......@@ -861,6 +861,44 @@ impl ResponseProcessingStage {
let chat_request = ctx.chat_request_arc();
let history_tool_calls_count = utils::get_history_tool_calls_count(&chat_request);
// Check parser availability once upfront (not per choice)
let reasoning_parser_available = chat_request.separate_reasoning
&& utils::check_reasoning_parser_availability(
&self.processor.reasoning_parser_factory,
self.processor.configured_reasoning_parser.as_ref(),
&chat_request.model,
);
let tool_choice_enabled = !matches!(
&chat_request.tool_choice,
Some(crate::protocols::spec::ToolChoice::Value(
crate::protocols::spec::ToolChoiceValue::None
))
);
let tool_parser_available = tool_choice_enabled
&& chat_request.tools.is_some()
&& utils::check_tool_parser_availability(
&self.processor.tool_parser_factory,
self.processor.configured_tool_parser.as_ref(),
&chat_request.model,
);
// Log once per request (not per choice)
if chat_request.separate_reasoning && !reasoning_parser_available {
debug!(
"No reasoning parser found for model '{}', skipping reasoning parsing",
chat_request.model
);
}
if chat_request.tools.is_some() && tool_choice_enabled && !tool_parser_available {
debug!(
"No tool parser found for model '{}', skipping tool call parsing",
chat_request.model
);
}
let stop_decoder = ctx
.state
.response
......@@ -878,6 +916,8 @@ impl ResponseProcessingStage {
&chat_request,
stop_decoder,
history_tool_calls_count,
reasoning_parser_available,
tool_parser_available,
)
.await
{
......
......@@ -30,8 +30,8 @@ pub struct ResponseProcessor {
pub tokenizer: Arc<dyn Tokenizer>,
pub tool_parser_factory: ToolParserFactory,
pub reasoning_parser_factory: ReasoningParserFactory,
configured_tool_parser: Option<String>,
configured_reasoning_parser: Option<String>,
pub configured_tool_parser: Option<String>,
pub configured_reasoning_parser: Option<String>,
}
impl ResponseProcessor {
......@@ -52,6 +52,7 @@ impl ResponseProcessor {
}
/// Process a single choice from GenerateComplete response (EXACT COPY from router.rs:1573-1725)
#[allow(clippy::too_many_arguments)]
pub async fn process_single_choice(
&self,
complete: &proto::GenerateComplete,
......@@ -59,6 +60,8 @@ impl ResponseProcessor {
original_request: &ChatCompletionRequest,
stop_decoder: &mut StopSequenceDecoder,
history_tool_calls_count: usize,
reasoning_parser_available: bool,
tool_parser_available: bool,
) -> Result<ChatChoice, String> {
stop_decoder.reset();
// Decode tokens
......@@ -89,8 +92,8 @@ impl ResponseProcessor {
let mut reasoning_text: Option<String> = None;
let mut processed_text = final_text;
// Check if reasoning parsing is enabled and separate_reasoning is requested
if original_request.separate_reasoning {
// Check if reasoning parsing is enabled and parser is available
if original_request.separate_reasoning && reasoning_parser_available {
let pooled_parser = utils::get_reasoning_parser(
&self.reasoning_parser_factory,
self.configured_reasoning_parser.as_ref(),
......@@ -113,8 +116,6 @@ impl ResponseProcessor {
// Step 2: Handle tool call parsing
let mut tool_calls: Option<Vec<ToolCall>> = None;
// Check if tool calls should be processed
let tool_choice_enabled = !matches!(
&original_request.tool_choice,
Some(ToolChoice::Value(ToolChoiceValue::None))
......@@ -134,7 +135,7 @@ impl ResponseProcessor {
&processed_text,
&original_request.tool_choice,
);
} else {
} else if tool_parser_available {
(tool_calls, processed_text) = self
.parse_tool_calls(
&processed_text,
......
......@@ -195,41 +195,29 @@ impl StreamingProcessor {
let system_fingerprint = dispatch.weight_version.as_deref();
// Check parser availability once upfront (log warning only once per request)
let reasoning_parser_available = if separate_reasoning {
if let Some(parser_name) = self.configured_reasoning_parser.as_ref() {
self.reasoning_parser_factory
.registry()
.has_parser(parser_name)
} else {
self.reasoning_parser_factory
.registry()
.has_parser_for_model(model)
}
} else {
false
};
let reasoning_parser_available = separate_reasoning
&& utils::check_reasoning_parser_availability(
&self.reasoning_parser_factory,
self.configured_reasoning_parser.as_ref(),
model,
);
let tool_parser_available = if tools.is_some() {
if let Some(parser_name) = self.configured_tool_parser.as_ref() {
self.tool_parser_factory.registry().has_parser(parser_name)
} else {
self.tool_parser_factory
.registry()
.has_parser_for_model(model)
}
} else {
false
};
let tool_parser_available = tools.is_some()
&& utils::check_tool_parser_availability(
&self.tool_parser_factory,
self.configured_tool_parser.as_ref(),
model,
);
if separate_reasoning && !reasoning_parser_available {
warn!(
debug!(
"No reasoning parser found for model '{}', skipping reasoning parsing",
model
);
}
if tools.is_some() && !tool_parser_available {
warn!(
debug!(
"No tool parser found for model '{}', skipping tool call parsing",
model
);
......
......@@ -675,6 +675,34 @@ pub fn generate_tool_call_id(
}
}
/// Check if a reasoning parser is available for the given model
pub fn check_reasoning_parser_availability(
reasoning_parser_factory: &crate::reasoning_parser::ParserFactory,
configured_parser: Option<&String>,
model: &str,
) -> bool {
if let Some(parser_name) = configured_parser {
reasoning_parser_factory.registry().has_parser(parser_name)
} else {
reasoning_parser_factory
.registry()
.has_parser_for_model(model)
}
}
/// Check if a tool parser is available for the given model
pub fn check_tool_parser_availability(
tool_parser_factory: &crate::tool_parser::ParserFactory,
configured_parser: Option<&String>,
model: &str,
) -> bool {
if let Some(parser_name) = configured_parser {
tool_parser_factory.registry().has_parser(parser_name)
} else {
tool_parser_factory.registry().has_parser_for_model(model)
}
}
/// Get the appropriate reasoning parser for a model
///
/// If a parser name is explicitly configured, use that parser.
......
......@@ -6,7 +6,7 @@ use tokio::sync::Mutex;
use crate::tool_parser::parsers::{
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
LlamaParser, MistralParser, PassthroughParser, PythonicParser, QwenParser, Step3Parser,
};
use crate::tool_parser::traits::ToolParser;
......@@ -36,7 +36,7 @@ impl ParserRegistry {
creators: Arc::new(RwLock::new(HashMap::new())),
pool: Arc::new(RwLock::new(HashMap::new())),
model_mapping: Arc::new(RwLock::new(HashMap::new())),
default_parser: Arc::new(RwLock::new("json".to_string())),
default_parser: Arc::new(RwLock::new("passthrough".to_string())),
}
}
......@@ -124,10 +124,9 @@ impl ParserRegistry {
}
}
// Check if default parser exists
let default = self.default_parser.read().unwrap().clone();
let creators = self.creators.read().unwrap();
creators.contains_key(&default)
// Return false if no specific parser found for this model
// (get_pooled will still fall back to default parser)
false
}
/// Create a fresh (non-pooled) parser instance for a specific model.
......@@ -228,6 +227,7 @@ impl ParserFactory {
let registry = ParserRegistry::new();
// Register default parsers
registry.register_parser("passthrough", || Box::new(PassthroughParser::new()));
registry.register_parser("json", || Box::new(JsonParser::new()));
registry.register_parser("mistral", || Box::new(MistralParser::new()));
registry.register_parser("qwen", || Box::new(QwenParser::new()));
......@@ -311,15 +311,15 @@ impl ParserFactory {
/// Get a pooled parser for the given model ID.
/// Returns a shared instance that can be used concurrently.
/// Falls back to JSON parser if model is not recognized.
/// Falls back to passthrough parser if model is not recognized.
pub fn get_pooled(&self, model_id: &str) -> PooledParser {
self.registry
.get_pooled_for_model(model_id)
.unwrap_or_else(|| {
// Fallback to JSON parser
// Fallback to passthrough parser (no-op, returns text unchanged)
self.registry
.get_pooled_parser("json")
.expect("JSON parser should always be registered")
.get_pooled_parser("passthrough")
.expect("Passthrough parser should always be registered")
})
}
......
......@@ -11,6 +11,7 @@ pub mod json_parser;
pub mod kimik2_parser;
pub mod llama_parser;
pub mod mistral_parser;
pub mod passthrough_parser;
pub mod pythonic_parser;
pub mod qwen_parser;
pub mod step3_parser;
......@@ -27,6 +28,7 @@ pub use json_parser::JsonParser;
pub use kimik2_parser::KimiK2Parser;
pub use llama_parser::LlamaParser;
pub use mistral_parser::MistralParser;
pub use passthrough_parser::PassthroughParser;
pub use pythonic_parser::PythonicParser;
pub use qwen_parser::QwenParser;
pub use step3_parser::Step3Parser;
//! Passthrough parser that returns text unchanged
//!
//! This parser is used as a fallback for unknown models where no specific
//! tool call parsing should be performed. It simply returns the input text
//! with no tool calls detected.
use crate::protocols::spec::Tool;
use crate::tool_parser::errors::ParserResult;
use crate::tool_parser::traits::ToolParser;
use crate::tool_parser::types::{StreamingParseResult, ToolCall, ToolCallItem};
use async_trait::async_trait;
/// Passthrough parser that returns text unchanged with no tool calls
#[derive(Default)]
pub struct PassthroughParser;
impl PassthroughParser {
pub fn new() -> Self {
Self
}
}
#[async_trait]
impl ToolParser for PassthroughParser {
async fn parse_complete(&self, output: &str) -> ParserResult<(String, Vec<ToolCall>)> {
// Return text unchanged with no tool calls
Ok((output.to_string(), vec![]))
}
async fn parse_incremental(
&mut self,
chunk: &str,
_tools: &[Tool],
) -> ParserResult<StreamingParseResult> {
// Return chunk unchanged with no tool calls
Ok(StreamingParseResult {
normal_text: chunk.to_string(),
calls: vec![],
})
}
fn has_tool_markers(&self, _text: &str) -> bool {
// Passthrough never detects tool calls
false
}
fn get_unstreamed_tool_args(&self) -> Option<Vec<ToolCallItem>> {
None
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment