parser.rs 19 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
//! Harmony response parser
//!
//! Adapter for openai_harmony::StreamableParser that handles channel-based parsing.

use openai_harmony::{chat::Role, HarmonyEncoding, StreamableParser};
use uuid::Uuid;

use super::types::{HarmonyChannelDelta, HarmonyChannelOutput};
use crate::protocols::common::{FunctionCallResponse, ToolCall};

/// Get the global Harmony encoding
///
/// References the same encoding used by the builder for consistency
fn get_harmony_encoding() -> &'static HarmonyEncoding {
    use super::builder::get_harmony_encoding;
    get_harmony_encoding()
}

/// Harmony parser adapter
///
/// Wraps openai_harmony::StreamableParser and provides methods for parsing
/// complete responses and streaming chunks.
pub struct HarmonyParserAdapter {
    parser: StreamableParser,
    prev_recipient: Option<String>,
}

impl HarmonyParserAdapter {
    /// Create a new Harmony parser
    pub fn new() -> Result<Self, String> {
        let encoding = get_harmony_encoding();
        let parser = StreamableParser::new(encoding.clone(), Some(Role::Assistant))
            .map_err(|e| format!("Failed to create StreamableParser: {}", e))?;

        Ok(Self {
            parser,
            prev_recipient: None,
        })
    }

    /// Extract text from message content (private helper)
    ///
    /// Filters text content from a message's content array and joins them into a single string.
    ///
    /// # Arguments
    ///
    /// * `content` - The content array from a Harmony message
    ///
    /// # Returns
    ///
    /// Joined text string from all text content items
    fn extract_text_from_content(content: &[openai_harmony::chat::Content]) -> String {
        content
            .iter()
            .filter_map(|c| match c {
                openai_harmony::chat::Content::Text(tc) => Some(tc.text.as_str()),
                _ => None,
            })
            .collect::<Vec<_>>()
            .join("")
    }

    /// Handle incomplete content from parser state (private helper)
    ///
    /// Checks for any remaining incomplete content in the parser and appends it
    /// to the appropriate channel (analysis or final_text).
    ///
    /// # Arguments
    ///
    /// * `parser` - Reference to the StreamableParser
    /// * `analysis` - Mutable reference to analysis content
    /// * `final_text` - Mutable reference to final text content
    fn handle_incomplete_content(
        parser: &StreamableParser,
        analysis: &mut Option<String>,
        final_text: &mut String,
    ) {
        if let Ok(current_content) = parser.current_content() {
            if !current_content.is_empty() {
                let current_channel = parser.current_channel();
                match current_channel.as_deref() {
                    Some("analysis") => {
                        *analysis = Some(current_content);
                    }
                    Some("final") | None => {
                        final_text.push_str(&current_content);
                    }
                    _ => {}
                }
            }
        }
    }

    /// Parse messages into channel outputs (private helper)
    ///
    /// Extracts analysis, commentary (tool calls), and final text from Harmony messages.
    /// This is the core parsing logic shared by both parse_complete and finalize.
    ///
    /// # Arguments
    ///
    /// * `messages` - The messages to parse from the Harmony parser
    ///
    /// # Returns
    ///
    /// Tuple of (analysis, commentary, final_text)
106
    pub fn parse_messages(
107
108
        messages: &[openai_harmony::chat::Message],
    ) -> (Option<String>, Option<Vec<ToolCall>>, String) {
109
        let mut analysis: Option<String> = None;
110
111
112
113
114
115
116
117
118
119
120
121
        let mut commentary: Option<Vec<ToolCall>> = None;
        let mut final_text = String::new();

        for msg in messages {
            // Filter: Only process assistant messages
            if msg.author.role != Role::Assistant {
                continue;
            }

            let channel = msg.channel.as_deref().unwrap_or("");
            let recipient = msg.recipient.as_deref();

122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
            // IMPORTANT: Check recipient FIRST before channel
            // The model sometimes generates tool calls with channel="analysis" + recipient="functions.*"
            // instead of channel="commentary" + recipient="functions.*"
            // We should trust the recipient field to determine if this is a tool call
            if let Some(recipient_str) = recipient {
                if recipient_str.starts_with("functions.") {
                    // This is a tool call, regardless of channel
                    let function_name = recipient_str.strip_prefix("functions.").unwrap();

                    // Process each content item separately
                    for content in &msg.content {
                        if let openai_harmony::chat::Content::Text(tc) = content {
                            let call_id = format!("call_{}", Uuid::new_v4());
                            let tool_call = ToolCall {
                                id: call_id,
                                tool_type: "function".to_string(),
                                function: FunctionCallResponse {
                                    name: function_name.to_string(),
                                    arguments: Some(tc.text.clone()),
                                },
                            };

                            match commentary.as_mut() {
                                Some(calls) => calls.push(tool_call),
                                None => commentary = Some(vec![tool_call]),
                            }
                        }
                    }
                    // Skip further channel processing for this message
                    continue;
                } else if recipient_str.starts_with("python")
                    || recipient_str.starts_with("browser")
                    || recipient_str.starts_with("container")
                {
                    // Built-in tools → treat as reasoning
                    // For Chat API, we add to analysis content
                    let text = Self::extract_text_from_content(&msg.content);

                    if !text.is_empty() {
                        // Append to analysis (built-in tools are reasoning)
                        match analysis.as_mut() {
                            Some(existing) => {
                                existing.push('\n');
                                existing.push_str(&text);
                            }
                            None => analysis = Some(text),
                        }
                    }
                    // Skip further channel processing
                    continue;
                }
            }

            // Now process by channel (only if not already handled by recipient)
176
177
178
179
180
181
182
183
184
185
186
            match channel {
                "analysis" => {
                    // Process each content item
                    // For Chat API, we join them into a single reasoning_content
                    let text = Self::extract_text_from_content(&msg.content);

                    if !text.is_empty() {
                        analysis = Some(text);
                    }
                }
                "commentary" => {
187
188
189
190
191
192
193
194
                    // If we reach here, recipient was not "functions.*" or built-in tools
                    // Commentary channel should always have a recipient
                    // This is likely a model bug - log warning and treat as reasoning
                    tracing::warn!(
                        channel = "commentary",
                        recipient = ?recipient,
                        "Commentary message without valid recipient, treating as reasoning"
                    );
195

196
                    let text = Self::extract_text_from_content(&msg.content);
197

198
199
200
201
202
                    if !text.is_empty() {
                        match analysis.as_mut() {
                            Some(existing) => {
                                existing.push('\n');
                                existing.push_str(&text);
203
                            }
204
                            None => analysis = Some(text),
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
                        }
                    }
                }
                "final" => {
                    // Process final channel content
                    let text = Self::extract_text_from_content(&msg.content);
                    final_text.push_str(&text);
                }
                _ => {
                    // Unknown channel, append to final text as fallback
                    let text = Self::extract_text_from_content(&msg.content);
                    final_text.push_str(&text);
                }
            }
        }

        (analysis, commentary, final_text)
    }

    /// Parse complete response
    ///
    /// Parses all output token IDs and returns the complete channel output
    /// containing analysis, commentary (tool calls), and final text.
    ///
    /// # Arguments
    ///
    /// * `output_ids` - The complete output token IDs from the model
    /// * `finish_reason` - The finish reason from GenerateComplete ("stop", "length", etc.)
    /// * `matched_stop` - Optional matched stop token information from GenerateComplete
    ///
    /// # Returns
    ///
    /// Complete HarmonyChannelOutput with all three channels parsed
    pub fn parse_complete(
        &mut self,
        output_ids: &[u32],
        finish_reason: String,
        matched_stop: Option<serde_json::Value>,
    ) -> Result<HarmonyChannelOutput, String> {
        // Feed all tokens to the parser
        for &token_id in output_ids {
246
247
248
            self.parser
                .process(token_id)
                .map_err(|e| format!("Failed to process token {}: {}", token_id, e))?;
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        }

        // Extract all completed messages from the parser
        let messages = self.parser.messages();

        // Parse messages into channel outputs using shared helper
        let (mut analysis, commentary, mut final_text) = Self::parse_messages(messages);

        // Check for incomplete content in parser state
        Self::handle_incomplete_content(&self.parser, &mut analysis, &mut final_text);

        // Determine finish reason: override to "tool_calls" if commentary has tool calls
        let final_finish_reason = if commentary.is_some() {
            "tool_calls".to_string()
        } else {
264
            finish_reason.clone()
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
        };

        Ok(HarmonyChannelOutput {
            analysis,
            commentary,
            final_text,
            finish_reason: final_finish_reason,
            matched_stop,
        })
    }

    /// Get all messages from the parser
    ///
    /// Returns the raw messages extracted by the Harmony parser.
    /// Used for validation checks.
    pub fn get_messages(&self) -> Vec<openai_harmony::chat::Message> {
        self.parser.messages().to_vec()
    }

284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
    /// Extract incomplete commentary content from parser state
    ///
    /// When the stream ends, there may be incomplete commentary content in the parser
    /// that hasn't been finalized into a completed message. This method extracts
    /// such content and converts it to tool calls.
    ///
    /// # Returns
    ///
    /// Optional vector of ToolCall if incomplete commentary is found
    pub fn extract_incomplete_commentary(&self) -> Option<Vec<ToolCall>> {
        // Check if current channel is commentary
        let current_channel = self.parser.current_channel();
        if current_channel.as_deref() != Some("commentary") {
            return None;
        }

        // Get current recipient (should be "functions.{name}")
        let recipient = self.parser.current_recipient()?;
        if !recipient.starts_with("functions.") {
            return None;
        }

        // Get current incomplete content
        let content = self.parser.current_content().ok()?;
        if content.is_empty() {
            return None;
        }

        // Extract function name from recipient
        let function_name = recipient.strip_prefix("functions.").unwrap();

        // Create tool call from incomplete content
        let call_id = format!("call_{}", Uuid::new_v4());
        let tool_call = ToolCall {
            id: call_id,
            tool_type: "function".to_string(),
            function: FunctionCallResponse {
                name: function_name.to_string(),
                arguments: Some(content),
            },
        };

        Some(vec![tool_call])
    }

329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
    /// Parse streaming chunk
    ///
    /// Parses incremental token IDs and returns a delta with any new content
    /// from the analysis, commentary, or final channels.
    ///
    /// # Arguments
    ///
    /// * `chunk_ids` - New token IDs from the current chunk
    ///
    /// # Returns
    ///
    /// Optional HarmonyChannelDelta if there's new content to emit
    pub fn parse_chunk(
        &mut self,
        chunk_ids: &[u32],
    ) -> Result<Option<HarmonyChannelDelta>, String> {
        let mut has_delta = false;
        let mut analysis_delta = None;
        let mut commentary_delta = None;
        let mut final_delta = None;

        // Track message count before processing
        let prev_message_count = self.parser.messages().len();

        // Accumulate delta text for commentary channel
        let mut accumulated_delta = String::new();

        // Process each token
        for &token_id in chunk_ids {
            self.parser
                .process(token_id)
                .map_err(|e| format!("Failed to process token {}: {}", token_id, e))?;

            // Check for content delta
            if let Ok(Some(delta_text)) = self.parser.last_content_delta() {
                has_delta = true;

                // Determine which channel this delta belongs to
                let channel = self.parser.current_channel();
                match channel.as_deref() {
                    Some("analysis") => {
                        analysis_delta = Some(delta_text);
                    }
                    Some("final") | None => {
                        final_delta = Some(delta_text);
                    }
                    Some("commentary") => {
                        // Accumulate delta for commentary
                        accumulated_delta.push_str(&delta_text);
                    }
                    _ => {}
                }
            }
        }

        // Handle commentary channel tool call deltas
        if self.parser.current_channel().as_deref() == Some("commentary") {
            if let Some(cur_recipient) = self.parser.current_recipient() {
                if cur_recipient.starts_with("functions.") {
                    has_delta = true;

                    // Count completed tool calls for index
                    let base_index = self
                        .parser
                        .messages()
                        .iter()
                        .filter(|msg| {
                            msg.channel.as_deref() == Some("commentary")
                                && msg
                                    .recipient
                                    .as_deref()
                                    .is_some_and(|r| r.starts_with("functions."))
                        })
                        .count();

                    // Check if recipient changed (new tool call)
                    let recipient_changed = self.prev_recipient.as_deref() != Some(&cur_recipient);

                    if recipient_changed {
                        // NEW tool call: emit name + id
                        let tool_name = cur_recipient.strip_prefix("functions.").unwrap();
                        let call_id = format!("call_{}", Uuid::new_v4());

                        commentary_delta = Some(super::types::ToolCallDelta {
                            index: base_index,
                            id: Some(call_id),
                            function: Some(super::types::FunctionDelta {
                                name: Some(tool_name.to_string()),
                                arguments: Some(String::new()),
                            }),
                        });

                        // Update prev_recipient
                        self.prev_recipient = Some(cur_recipient);
                    } else if !accumulated_delta.is_empty() {
                        // CONTINUING tool call: emit arguments delta
                        commentary_delta = Some(super::types::ToolCallDelta {
                            index: base_index,
                            id: None,
                            function: Some(super::types::FunctionDelta {
                                name: None,
                                arguments: Some(accumulated_delta),
                            }),
                        });
                    }
                }
            }
        }

        // Check if new messages were completed
        let current_message_count = self.parser.messages().len();
        let is_final = current_message_count > prev_message_count;

        if has_delta {
            Ok(Some(HarmonyChannelDelta {
                analysis_delta,
                commentary_delta,
                final_delta,
                is_final,
            }))
        } else {
            Ok(None)
        }
    }

    /// Finalize parsing
    ///
    /// Called at the end of streaming to get the final state and any
    /// remaining content.
    ///
    /// # Arguments
    ///
    /// * `finish_reason` - The finish reason from GenerateComplete ("stop", "length", etc.)
    /// * `matched_stop` - Optional matched stop token information from GenerateComplete
    ///
    /// # Returns
    ///
    /// Final HarmonyChannelOutput with complete parsed content
    pub fn finalize(
        &mut self,
        finish_reason: String,
        matched_stop: Option<serde_json::Value>,
    ) -> Result<HarmonyChannelOutput, String> {
        // Extract all completed messages
        let messages = self.parser.messages();

        // Parse messages into channel outputs using shared helper
        let (mut analysis, commentary, mut final_text) = Self::parse_messages(messages);

        // Check for remaining incomplete content
        Self::handle_incomplete_content(&self.parser, &mut analysis, &mut final_text);

        // Determine finish reason: override to "tool_calls" if commentary has tool calls
        let final_finish_reason = if commentary.is_some() {
            "tool_calls".to_string()
        } else {
            finish_reason
        };

        Ok(HarmonyChannelOutput {
            analysis,
            commentary,
            final_text,
            finish_reason: final_finish_reason,
            matched_stop,
        })
    }

    /// Reset parser state
    ///
    /// Resets the parser to initial state for reuse
    pub fn reset(&mut self) -> Result<(), String> {
        // Create a new parser instance (StreamableParser doesn't have a reset method)
        let encoding = get_harmony_encoding();
        self.parser = StreamableParser::new(encoding.clone(), Some(Role::Assistant))
            .map_err(|e| format!("Failed to reset parser: {}", e))?;
        self.prev_recipient = None;
        Ok(())
    }
}

impl Default for HarmonyParserAdapter {
    fn default() -> Self {
        Self::new().expect("Failed to create default parser")
    }
}