registry.rs 7.46 KB
Newer Older
1
use crate::tool_parser::parsers::{
2
3
    DeepSeekParser, Glm4MoeParser, GptOssParser, JsonParser, KimiK2Parser, LlamaParser,
    MistralParser, PythonicParser, QwenParser, Step3Parser,
4
};
5
use crate::tool_parser::traits::ToolParser;
6
use once_cell::sync::Lazy;
7
8
9
use std::collections::HashMap;
use std::sync::Arc;

10
11
12
/// Global singleton registry instance - created once and reused
pub static GLOBAL_REGISTRY: Lazy<ParserRegistry> = Lazy::new(ParserRegistry::new_internal);

13
14
15
16
17
18
19
20
21
22
23
/// Registry for tool parsers and model mappings
pub struct ParserRegistry {
    /// Map of parser name to parser instance
    parsers: HashMap<String, Arc<dyn ToolParser>>,
    /// Map of model name/pattern to parser name
    model_mapping: HashMap<String, String>,
    /// Default parser to use when no match found
    default_parser: String,
}

impl ParserRegistry {
24
25
26
27
28
29
30
31
32
33
34
35
36
    /// Get the global singleton instance
    pub fn new() -> &'static Self {
        &GLOBAL_REGISTRY
    }

    /// Create a new instance for testing (not the singleton)
    #[cfg(test)]
    pub fn new_for_testing() -> Self {
        Self::new_internal()
    }

    /// Internal constructor for creating the singleton instance
    fn new_internal() -> Self {
37
38
39
40
41
42
        let mut registry = Self {
            parsers: HashMap::new(),
            model_mapping: HashMap::new(),
            default_parser: "json".to_string(),
        };

43
44
45
        // Register default parsers
        registry.register_default_parsers();

46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
        // Register default model mappings
        registry.register_default_mappings();

        registry
    }

    /// Register a parser
    pub fn register_parser(&mut self, name: impl Into<String>, parser: Arc<dyn ToolParser>) {
        self.parsers.insert(name.into(), parser);
    }

    /// Map a model name/pattern to a parser
    pub fn map_model(&mut self, model: impl Into<String>, parser: impl Into<String>) {
        self.model_mapping.insert(model.into(), parser.into());
    }

    /// Get parser for a specific model
    pub fn get_parser(&self, model: &str) -> Option<Arc<dyn ToolParser>> {
        // Try exact match first
        if let Some(parser_name) = self.model_mapping.get(model) {
            if let Some(parser) = self.parsers.get(parser_name) {
                return Some(parser.clone());
            }
        }

71
72
73
74
75
76
77
78
79
80
81
        // Try prefix matching with more specific patterns first
        // Collect all matching patterns and sort by specificity (longer = more specific)
        let mut matches: Vec<(&String, &String)> = self
            .model_mapping
            .iter()
            .filter(|(pattern, _)| {
                if pattern.ends_with('*') {
                    let prefix = &pattern[..pattern.len() - 1];
                    model.starts_with(prefix)
                } else {
                    false
82
                }
83
84
85
86
87
88
89
90
91
92
            })
            .collect();

        // Sort by pattern length in descending order (longer patterns are more specific)
        matches.sort_by_key(|(pattern, _)| std::cmp::Reverse(pattern.len()));

        // Return the first matching parser
        for (_, parser_name) in matches {
            if let Some(parser) = self.parsers.get(parser_name) {
                return Some(parser.clone());
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
            }
        }

        // Fall back to default parser if it exists
        self.parsers.get(&self.default_parser).cloned()
    }

    /// List all registered parsers
    pub fn list_parsers(&self) -> Vec<&str> {
        self.parsers.keys().map(|s| s.as_str()).collect()
    }

    /// List all model mappings
    pub fn list_mappings(&self) -> Vec<(&str, &str)> {
        self.model_mapping
            .iter()
            .map(|(k, v)| (k.as_str(), v.as_str()))
            .collect()
    }

113
114
115
116
117
    /// Register default parsers
    fn register_default_parsers(&mut self) {
        // JSON parser - most common format
        self.register_parser("json", Arc::new(JsonParser::new()));

118
119
120
121
122
        // Mistral parser - [TOOL_CALLS] [...] format
        self.register_parser("mistral", Arc::new(MistralParser::new()));

        // Qwen parser - <tool_call>...</tool_call> format
        self.register_parser("qwen", Arc::new(QwenParser::new()));
123
124
125

        // Pythonic parser - [func(arg=val)] format
        self.register_parser("pythonic", Arc::new(PythonicParser::new()));
126
127
128

        // Llama parser - <|python_tag|>{...} or plain JSON format
        self.register_parser("llama", Arc::new(LlamaParser::new()));
129
130
131

        // DeepSeek V3 parser - Unicode tokens with JSON blocks
        self.register_parser("deepseek", Arc::new(DeepSeekParser::new()));
132

133
134
135
        // GLM-4 MoE parser - XML-style key-value format
        self.register_parser("glm4_moe", Arc::new(Glm4MoeParser::new()));

136
137
        // Step3 parser - StepTML XML format
        self.register_parser("step3", Arc::new(Step3Parser::new()));
138
139
140

        // Kimi K2 parser - Token-based with indexed functions
        self.register_parser("kimik2", Arc::new(KimiK2Parser::new()));
141
142
143

        // GPT-OSS parser - Channel format
        self.register_parser("gpt_oss", Arc::new(GptOssParser::new()));
144
145
    }

146
147
148
149
150
151
152
153
154
155
    /// Register default model mappings
    fn register_default_mappings(&mut self) {
        // OpenAI models
        self.map_model("gpt-4*", "json");
        self.map_model("gpt-3.5*", "json");
        self.map_model("gpt-4o*", "json");

        // Anthropic models
        self.map_model("claude-*", "json");

156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
        // Mistral models - use Mistral parser
        self.map_model("mistral-*", "mistral");
        self.map_model("mixtral-*", "mistral");

        // Qwen models - use Qwen parser
        self.map_model("qwen*", "qwen");
        self.map_model("Qwen*", "qwen");

        // Llama models
        // Llama 4 uses pythonic format
        self.map_model("llama-4*", "pythonic");
        self.map_model("meta-llama-4*", "pythonic");
        // Llama 3.2 uses python_tag format
        self.map_model("llama-3.2*", "llama");
        self.map_model("meta-llama-3.2*", "llama");
        // Other Llama models use JSON
172
173
        self.map_model("llama-*", "json");
        self.map_model("meta-llama-*", "json");
174

175
176
177
178
179
        // DeepSeek models
        // DeepSeek V3 uses custom Unicode token format
        self.map_model("deepseek-v3*", "deepseek");
        self.map_model("deepseek-ai/DeepSeek-V3*", "deepseek");
        // DeepSeek V2 uses pythonic format
180
181
        self.map_model("deepseek-*", "pythonic");

182
        // GLM models
183
        // GLM-4.5 and GLM-4.6 uses XML-style format
184
        self.map_model("glm-4.5*", "glm4_moe");
185
        self.map_model("glm-4.6*", "glm4_moe");
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
        // Other GLM models may use JSON
        self.map_model("glm-*", "json");

        // Step3 models
        self.map_model("step3*", "step3");
        self.map_model("Step-3*", "step3");

        // Kimi models
        self.map_model("kimi-k2*", "kimik2");
        self.map_model("Kimi-K2*", "kimik2");
        self.map_model("moonshot*/Kimi-K2*", "kimik2");

        // GPT-OSS models (T4-style)
        self.map_model("gpt-oss*", "gpt_oss");
        self.map_model("t4-*", "gpt_oss");

202
203
204
        // Other models default to JSON
        self.map_model("gemini-*", "json");
        self.map_model("palm-*", "json");
205
        self.map_model("gemma-*", "json");
206
207
208
209
210
211
212
213
214
215
216
217
218
    }

    /// Set the default parser
    pub fn set_default_parser(&mut self, name: impl Into<String>) {
        self.default_parser = name.into();
    }

    /// Check if a parser is registered
    pub fn has_parser(&self, name: &str) -> bool {
        self.parsers.contains_key(name)
    }
}

219
impl Default for &'static ParserRegistry {
220
    fn default() -> Self {
221
        ParserRegistry::new()
222
223
    }
}