Unverified Commit 25728863 authored by Simo Lin's avatar Simo Lin Committed by GitHub
Browse files

[router] add harmony tool parser base structure and interface (#11036)

parent dba751a8
use async_trait::async_trait;
use crate::tool_parser::{
errors::ToolParserResult,
state::ParseState,
traits::{TokenToolParser, ToolParser},
types::{StreamResult, ToolCall},
};
/// Placeholder for the Harmony-backed GPT-OSS parser.
///
/// regex implementation. This struct will be fleshed out in subsequent phases to
/// reuse Harmony's tokenizer and message reconstruction logic.
#[derive(Default)]
pub struct GptOssHarmonyParser;
impl GptOssHarmonyParser {
pub fn new() -> Self {
Self
}
}
#[async_trait]
impl ToolParser for GptOssHarmonyParser {
async fn parse_complete(&self, output: &str) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Temporary stub: fall back to returning the raw text with no tool calls.
// Later phases will decode Harmony tokens into structured tool calls.
Ok((output.to_string(), Vec::new()))
}
async fn parse_incremental(
&self,
_chunk: &str,
_state: &mut ParseState,
) -> ToolParserResult<StreamResult> {
// Temporary stub until the Harmony streaming pipeline is implemented.
Ok(StreamResult::Incomplete)
}
fn detect_format(&self, text: &str) -> bool {
// Reuse the legacy heuristics for now; this will be replaced with Harmony-specific
// start-token detection when the parser is fully implemented.
text.contains("<|channel|>commentary")
}
fn as_token_parser(&self) -> Option<&dyn TokenToolParser> {
Some(self)
}
}
#[async_trait]
impl TokenToolParser for GptOssHarmonyParser {
async fn parse_complete_tokens(
&self,
_tokens: &[u32],
) -> ToolParserResult<(String, Vec<ToolCall>)> {
// Placeholder until Harmony integration lands. Returning an empty tool list ensures
// that enabling the parser without full implementation results in a no-op rather
// than a runtime panic.
Ok((String::new(), Vec::new()))
}
async fn parse_incremental_tokens(
&self,
_tokens: &[u32],
_state: &mut ParseState,
) -> ToolParserResult<StreamResult> {
Ok(StreamResult::Incomplete)
}
}
......@@ -5,6 +5,7 @@
// Individual parser modules
pub mod deepseek_parser;
pub mod glm4_moe_parser;
pub mod gpt_oss_harmony_parser;
pub mod gpt_oss_parser;
pub mod json_parser;
pub mod kimik2_parser;
......@@ -17,6 +18,7 @@ pub mod step3_parser;
// Re-export parser types for convenience
pub use deepseek_parser::DeepSeekParser;
pub use glm4_moe_parser::Glm4MoeParser;
pub use gpt_oss_harmony_parser::GptOssHarmonyParser;
pub use gpt_oss_parser::GptOssParser;
pub use json_parser::JsonParser;
pub use kimik2_parser::KimiK2Parser;
......
use crate::tool_parser::parsers::{
DeepSeekParser, Glm4MoeParser, GptOssParser, JsonParser, KimiK2Parser, LlamaParser,
MistralParser, PythonicParser, QwenParser, Step3Parser,
DeepSeekParser, Glm4MoeParser, GptOssHarmonyParser, GptOssParser, JsonParser, KimiK2Parser,
LlamaParser, MistralParser, PythonicParser, QwenParser, Step3Parser,
};
use crate::tool_parser::traits::ToolParser;
use once_cell::sync::Lazy;
use std::collections::HashMap;
use std::sync::Arc;
use std::{collections::HashMap, env, sync::Arc};
/// Global singleton registry instance - created once and reused
pub static GLOBAL_REGISTRY: Lazy<ParserRegistry> = Lazy::new(ParserRegistry::new_internal);
......@@ -139,8 +138,18 @@ impl ParserRegistry {
// Kimi K2 parser - Token-based with indexed functions
self.register_parser("kimik2", Arc::new(KimiK2Parser::new()));
// GPT-OSS parser - Channel format
self.register_parser("gpt_oss", Arc::new(GptOssParser::new()));
// GPT-OSS parsers - register legacy and Harmony variants
let gpt_oss_legacy = Arc::new(GptOssParser::new());
let gpt_oss_harmony = Arc::new(GptOssHarmonyParser::new());
self.register_parser("gpt_oss_legacy", gpt_oss_legacy.clone());
self.register_parser("gpt_oss_harmony", gpt_oss_harmony.clone());
if use_harmony_gpt_oss() {
self.register_parser("gpt_oss", gpt_oss_harmony);
} else {
self.register_parser("gpt_oss", gpt_oss_legacy);
}
}
/// Register default model mappings
......@@ -216,6 +225,19 @@ impl ParserRegistry {
}
}
fn use_harmony_gpt_oss() -> bool {
env::var("ROUTER_USE_HARMONY_GPT_OSS")
.ok()
.map(|value| {
let normalized = value.trim();
matches!(
normalized,
"1" | "true" | "TRUE" | "True" | "yes" | "YES" | "Yes" | "on" | "ON" | "On"
)
})
.unwrap_or(false)
}
impl Default for &'static ParserRegistry {
fn default() -> Self {
ParserRegistry::new()
......
......@@ -34,6 +34,8 @@ pub struct ParseState {
pub escape_next: bool,
/// Current tool index (for streaming)
pub tool_index: usize,
/// Optional Harmony-specific streaming state (populated by token-aware parsers)
pub harmony_stream: Option<HarmonyStreamState>,
}
impl ParseState {
......@@ -49,6 +51,7 @@ impl ParseState {
in_string: false,
escape_next: false,
tool_index: 0,
harmony_stream: None,
}
}
......@@ -59,6 +62,7 @@ impl ParseState {
self.bracket_depth = 0;
self.in_string = false;
self.escape_next = false;
self.harmony_stream = None;
}
/// Process a single character for JSON parsing
......@@ -179,3 +183,20 @@ impl Default for ParseState {
Self::new()
}
}
/// Placeholder for Harmony streaming metadata captured during token-aware parsing.
#[derive(Debug, Clone, Default)]
pub struct HarmonyStreamState {
/// All tokens observed so far for the current assistant response.
pub tokens: Vec<u32>,
/// Number of tokens that have already been processed by the Harmony parser.
pub processed_tokens: usize,
/// Number of tool calls emitted downstream.
pub emitted_calls: usize,
/// Pending analysis-channel content awaiting flush into normal text output.
pub analysis_buffer: String,
/// Whether the tool name has been surfaced for the current call.
pub emitted_name: bool,
/// Whether arguments have been surfaced for the current call.
pub emitted_args: bool,
}
......@@ -21,6 +21,12 @@ pub trait ToolParser: Send + Sync {
/// Check if text contains tool calls in this parser's format
fn detect_format(&self, text: &str) -> bool;
/// Optionally expose a token-aware parser implementation.
/// Default returns `None`, meaning the parser only supports text input.
fn as_token_parser(&self) -> Option<&dyn TokenToolParser> {
None
}
}
/// Trait for partial JSON parsing
......@@ -34,3 +40,19 @@ pub trait PartialJsonParser: Send + Sync {
/// Get the maximum parsing depth
fn max_depth(&self) -> usize;
}
#[async_trait]
pub trait TokenToolParser: ToolParser {
/// Parse complete tool calls when provided with raw token IDs.
async fn parse_complete_tokens(
&self,
tokens: &[u32],
) -> ToolParserResult<(String, Vec<ToolCall>)>;
/// Streaming parser entrypoint for token chunks.
async fn parse_incremental_tokens(
&self,
tokens: &[u32],
state: &mut ParseState,
) -> ToolParserResult<StreamResult>;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment