[router][grpc] Support mixin tool calls in Responses API (#12736)

837b08eb · Chang Su · GitHub · bb6a21cd · 837b08eb · 837b08eb
Unverified Commit 837b08eb authored Nov 05, 2025 by Chang Su Committed by GitHub Nov 05, 2025
4 changed files
--- a/sgl-router/src/protocols/responses.rs
+++ b/sgl-router/src/protocols/responses.rs
@@ -56,7 +56,7 @@ impl Default for ResponseTool {
    }
 }

-#[derive(Debug, Clone, Deserialize, Serialize)]
+#[derive(Debug, Clone, Deserialize, Serialize, PartialEq)]
 #[serde(rename_all = "snake_case")]
 pub enum ResponseToolType {
    Function,

--- a/sgl-router/src/routers/grpc/harmony/responses.rs
+++ b/sgl-router/src/routers/grpc/harmony/responses.rs
@@ -20,9 +20,9 @@
 //!
 //!     match result {
 //!         ToolCallsFound { tool_calls, .. } => {
-//!             // Execute MCP tools
-//!             // Build next request with tool results
-//!             // Continue loop
+//!             // Separate MCP tools from function tools
+//!             // Execute MCP tools, return if function tools found
+//!             // Continue loop with MCP results if only MCP tools
 //!         }
 //!         Completed { response, .. } => {
 //!             return Ok(response);
@@ -30,12 +30,6 @@
 //!     }
 //! }
 //! ```
-//!
-//! ## Design Reference
-//!
-//! See `/Users/simolin/workspace/sglang/.claude/docs/harmony_pipeline/tool_loop_design.md`
-//! for complete architecture, rationale, and implementation details.
-
 use std::{
    sync::Arc,
    time::{SystemTime, UNIX_EPOCH},
@@ -210,6 +204,18 @@ impl HarmonyResponsesContext {
    }
 }

+/// Build a HashSet of MCP tool names for O(1) lookup
+///
+/// Creates a HashSet containing the names of all MCP tools in the request,
+/// allowing for efficient O(1) lookups when partitioning tool calls.
+fn build_mcp_tool_names_set(request_tools: &[ResponseTool]) -> std::collections::HashSet<&str> {
+    request_tools
+        .iter()
+        .filter(|t| t.r#type == ResponseToolType::Mcp)
+        .filter_map(|t| t.function.as_ref().map(|f| f.name.as_str()))
+        .collect()
+}
+
 /// Execute Harmony Responses API request with multi-turn MCP tool support
 ///
 /// This function orchestrates the multi-turn conversation flow:
@@ -354,7 +360,20 @@ async fn execute_with_mcp_loop(
                    tool_call_count = tool_calls.len(),
                    has_analysis = analysis.is_some(),
                    partial_text_len = partial_text.len(),
-                    "Tool calls found - checking limits before executing MCP tools"
+                    "Tool calls found - separating MCP and function tools"
+                );
+
+                // Separate MCP and function tool calls based on tool type
+                let request_tools = current_request.tools.as_deref().unwrap_or(&[]);
+                let mcp_tool_names = build_mcp_tool_names_set(request_tools);
+                let (mcp_tool_calls, function_tool_calls): (Vec<_>, Vec<_>) = tool_calls
+                    .into_iter()
+                    .partition(|tc| mcp_tool_names.contains(tc.function.name.as_str()));
+
+                debug!(
+                    mcp_calls = mcp_tool_calls.len(),
+                    function_calls = function_tool_calls.len(),
+                    "Tool calls separated by type"
                );

                // Check combined limit (user's max_tool_calls vs safety limit)
@@ -363,21 +382,29 @@ async fn execute_with_mcp_loop(
                    None => MAX_TOOL_ITERATIONS,
                };

-                // Check if we would exceed the limit with these new tool calls
-                let total_calls_after = mcp_tracking.total_calls() + tool_calls.len();
+                // Check if we would exceed the limit with these new MCP tool calls
+                let total_calls_after = mcp_tracking.total_calls() + mcp_tool_calls.len();
                if total_calls_after > effective_limit {
                    warn!(
                        current_calls = mcp_tracking.total_calls(),
-                        new_calls = tool_calls.len(),
+                        new_calls = mcp_tool_calls.len() + function_tool_calls.len(),
                        total_after = total_calls_after,
                        effective_limit = effective_limit,
                        user_max = ?max_tool_calls,
                        "Reached tool call limit - returning incomplete response"
                    );

-                    // Build response with incomplete status
-                    let mut response = build_function_tool_response(
-                        tool_calls,
+                    // Combine back for response
+                    let all_tool_calls: Vec<_> = mcp_tool_calls
+                        .into_iter()
+                        .chain(function_tool_calls)
+                        .collect();
+
+                    // Build response with incomplete status - no tools executed due to limit
+                    let mut response = build_tool_response(
+                        vec![],         // No MCP tools executed
+                        vec![],         // No MCP results
+                        all_tool_calls, // All tools returned as function calls (not executed)
                        analysis,
                        partial_text,
                        usage,
@@ -397,15 +424,50 @@ async fn execute_with_mcp_loop(
                    return Ok(response);
                }

-                // Execute MCP tools
-                let tool_results =
-                    execute_mcp_tools(&ctx.mcp_manager, &tool_calls, &mut mcp_tracking).await?;
+                // Execute MCP tools (if any)
+                let mcp_results = if !mcp_tool_calls.is_empty() {
+                    execute_mcp_tools(&ctx.mcp_manager, &mcp_tool_calls, &mut mcp_tracking).await?
+                } else {
+                    Vec::new()
+                };
+
+                // If there are function tools, exit MCP loop and return response
+                if !function_tool_calls.is_empty() {
+                    debug!(
+                        "Function tool calls present - exiting MCP loop and returning to caller"
+                    );
+
+                    // Build response that includes:
+                    // 1. Reasoning/message from this iteration
+                    // 2. MCP tools as completed (with output) - these were executed
+                    // 3. Function tools as completed (without output) - need caller execution
+                    let mut response = build_tool_response(
+                        mcp_tool_calls,
+                        mcp_results,
+                        function_tool_calls,
+                        analysis,
+                        partial_text,
+                        usage,
+                        request_id,
+                        Arc::new(current_request),
+                    );
+
+                    // Inject MCP metadata for all executed calls
+                    if mcp_tracking.total_calls() > 0 {
+                        inject_mcp_metadata(&mut response, &mcp_tracking, &ctx.mcp_manager);
+                    }
+
+                    return Ok(response);
+                }
+
+                // Only MCP tools - continue loop with their results
+                debug!("Only MCP tools - continuing loop with results");

                // Build next request with appended history
                current_request = build_next_request_with_tools(
                    current_request,
-                    tool_calls,
-                    tool_results,
+                    mcp_tool_calls,
+                    mcp_results,
                    analysis,
                    partial_text,
                )
@@ -469,7 +531,9 @@ async fn execute_without_mcp_loop(
                "Function tool calls found - returning to caller"
            );

-            Ok(build_function_tool_response(
+            Ok(build_tool_response(
+                vec![],
+                vec![],
                tool_calls,
                analysis,
                partial_text,
@@ -602,6 +666,20 @@ async fn execute_mcp_tool_loop_streaming(
        );
    }

+    // Build HashSet of MCP tool names for O(1) lookup during streaming
+    // Clone tool names to owned strings to avoid borrowing current_request
+    let mcp_tool_names: std::collections::HashSet<String> = current_request
+        .tools
+        .as_ref()
+        .map(|tools| {
+            tools
+                .iter()
+                .filter(|t| t.r#type == ResponseToolType::Mcp)
+                .filter_map(|t| t.function.as_ref().map(|f| f.name.clone()))
+                .collect()
+        })
+        .unwrap_or_default();
+
    // Emit mcp_list_tools on first iteration
    let (output_index, item_id) = emitter.allocate_output_index(OutputItemType::McpListTools);

@@ -705,21 +783,21 @@ async fn execute_mcp_tool_loop_streaming(
            }
        };

-        // Process stream with token-level streaming (MCP path - emits mcp_call.* events)
-        let iteration_result =
-            match HarmonyStreamingProcessor::process_responses_iteration_stream_mcp(
-                execution_result,
-                emitter,
-                tx,
-            )
-            .await
-            {
-                Ok(result) => result,
-                Err(err_msg) => {
-                    emitter.emit_error(&err_msg, Some("processing_error"), tx);
-                    return;
-                }
-            };
+        // Process stream with token-level streaming (mixed tools - emits correct events per tool type)
+        let iteration_result = match HarmonyStreamingProcessor::process_responses_iteration_stream(
+            execution_result,
+            emitter,
+            tx,
+            &mcp_tool_names,
+        )
+        .await
+        {
+            Ok(result) => result,
+            Err(err_msg) => {
+                emitter.emit_error(&err_msg, Some("processing_error"), tx);
+                return;
+            }
+        };

        // Handle iteration result (tool calls or completion)
        match iteration_result {
@@ -734,7 +812,20 @@ async fn execute_mcp_tool_loop_streaming(
                    tool_call_count = tool_calls.len(),
                    has_analysis = analysis.is_some(),
                    partial_text_len = partial_text.len(),
-                    "MCP tool calls found in commentary channel - checking limits"
+                    "Tool calls found - separating MCP and function tools"
+                );
+
+                // Separate MCP and function tool calls based on tool type
+                let request_tools = current_request.tools.as_deref().unwrap_or(&[]);
+                let mcp_tool_names = build_mcp_tool_names_set(request_tools);
+                let (mcp_tool_calls, function_tool_calls): (Vec<_>, Vec<_>) = tool_calls
+                    .into_iter()
+                    .partition(|tc| mcp_tool_names.contains(tc.function.name.as_str()));
+
+                debug!(
+                    mcp_calls = mcp_tool_calls.len(),
+                    function_calls = function_tool_calls.len(),
+                    "Tool calls separated by type in streaming"
                );

                // Check combined limit (user's max_tool_calls vs safety limit)
@@ -743,12 +834,12 @@ async fn execute_mcp_tool_loop_streaming(
                    None => MAX_TOOL_ITERATIONS,
                };

-                // Check if we would exceed the limit with these new tool calls
-                let total_calls_after = mcp_tracking.total_calls() + tool_calls.len();
+                // Check if we would exceed the limit with these new MCP tool calls
+                let total_calls_after = mcp_tracking.total_calls() + mcp_tool_calls.len();
                if total_calls_after > effective_limit {
                    warn!(
                        current_calls = mcp_tracking.total_calls(),
-                        new_calls = tool_calls.len(),
+                        new_calls = mcp_tool_calls.len() + function_tool_calls.len(),
                        total_after = total_calls_after,
                        effective_limit = effective_limit,
                        user_max = ?max_tool_calls,
@@ -768,9 +859,10 @@ async fn execute_mcp_tool_loop_streaming(
                    return;
                }

-                // Execute MCP tools and continue loop
-                let tool_results =
-                    match execute_mcp_tools(&ctx.mcp_manager, &tool_calls, &mut mcp_tracking).await
+                // Execute MCP tools (if any)
+                let mcp_results = if !mcp_tool_calls.is_empty() {
+                    match execute_mcp_tools(&ctx.mcp_manager, &mcp_tool_calls, &mut mcp_tracking)
+                        .await
                    {
                        Ok(results) => results,
                        Err(err_response) => {
@@ -781,16 +873,42 @@ async fn execute_mcp_tool_loop_streaming(
                            );
                            return;
                        }
-                    };
+                    }
+                } else {
+                    Vec::new()
+                };

-                // Update mcp_call output items with execution results
-                emitter.update_mcp_call_outputs(&tool_results);
+                // Update mcp_call output items with execution results (if any MCP tools were executed)
+                if !mcp_results.is_empty() {
+                    emitter.update_mcp_call_outputs(&mcp_results);
+                }
+
+                // If there are function tools, exit MCP loop and emit completion
+                if !function_tool_calls.is_empty() {
+                    debug!(
+                        "Function tool calls present - exiting MCP loop and emitting completion"
+                    );
+
+                    // Function tool calls were already emitted during streaming processing
+                    // Just emit response.completed with usage
+                    let usage_json = json!({
+                        "input_tokens": usage.prompt_tokens,
+                        "output_tokens": usage.completion_tokens,
+                        "total_tokens": usage.total_tokens,
+                    });
+                    let event = emitter.emit_completed(Some(&usage_json));
+                    emitter.send_event_best_effort(&event, tx);
+                    return;
+                }
+
+                // Only MCP tools - continue loop with their results
+                debug!("Only MCP tools - continuing loop with results");

                // Build next request with appended history
                current_request = match build_next_request_with_tools(
                    current_request,
-                    tool_calls,
-                    tool_results,
+                    mcp_tool_calls,
+                    mcp_results,
                    analysis,
                    partial_text,
                ) {
@@ -873,20 +991,22 @@ async fn execute_without_mcp_streaming(
    };

    // Process stream (emits all output items during streaming - function tool path emits function_call_arguments.* events)
-    let iteration_result =
-        match HarmonyStreamingProcessor::process_responses_iteration_stream_function(
-            execution_result,
-            emitter,
-            tx,
-        )
-        .await
-        {
-            Ok(result) => result,
-            Err(err_msg) => {
-                emitter.emit_error(&err_msg, Some("processing_error"), tx);
-                return;
-            }
-        };
+    // Pass empty HashSet so all tools are treated as function tools (per-tool detection)
+    let empty_mcp_tools = std::collections::HashSet::new();
+    let iteration_result = match HarmonyStreamingProcessor::process_responses_iteration_stream(
+        execution_result,
+        emitter,
+        tx,
+        &empty_mcp_tools,
+    )
+    .await
+    {
+        Ok(result) => result,
+        Err(err_msg) => {
+            emitter.emit_error(&err_msg, Some("processing_error"), tx);
+            return;
+        }
+    };

    // Extract usage from iteration result
    let usage = match iteration_result {
@@ -917,17 +1037,17 @@ async fn execute_without_mcp_streaming(
    emitter.send_event_best_effort(&event, tx);
 }

-/// Build ResponsesResponse with function tool calls for caller to execute
-///
-/// When tool calls are found but no MCP client is available (function tools only),
-/// this builds a response with status=Completed and tool calls without output field.
-/// The absence of output signals the caller should execute tools and resume.
+/// Build ResponsesResponse with tool calls (MCP and/or function tools)
 ///
+/// ResponsesResponse with tool calls
 /// TODO: Refactor to use builder pattern
-fn build_function_tool_response(
-    tool_calls: Vec<ToolCall>,
-    analysis: Option<String>,
-    partial_text: String,
+#[allow(clippy::too_many_arguments)]
+fn build_tool_response(
+    mcp_tool_calls: Vec<ToolCall>,
+    mcp_results: Vec<ToolResult>,
+    function_tool_calls: Vec<ToolCall>,
+    analysis: Option<String>, // Analysis channel content (reasoning)
+    partial_text: String,     // Final channel content (message)
    usage: Usage,
    request_id: String,
    responses_request: Arc<ResponsesRequest>,
@@ -960,20 +1080,40 @@ fn build_function_tool_response(
        });
    }

-    // Add function tool calls as completed output items (no output field = needs execution)
-    for tool_call in tool_calls {
+    // Add MCP tool calls WITH output (these were executed)
+    for (tool_call, result) in mcp_tool_calls.iter().zip(mcp_results.iter()) {
+        let output_str = to_string(&result.output).unwrap_or_else(|e| {
+            format!("{{\"error\": \"Failed to serialize tool output: {}\"}}", e)
+        });
+
        output.push(ResponseOutputItem::FunctionToolCall {
            id: tool_call.id.clone(),
            call_id: tool_call.id.clone(),
            name: tool_call.function.name.clone(),
            arguments: tool_call.function.arguments.clone().unwrap_or_default(),
-            output: None, // No output = tool needs execution by caller
+            output: Some(output_str),
+            status: if result.is_error {
+                "failed"
+            } else {
+                "completed"
+            }
+            .to_string(),
+        });
+    }
+
+    // Add function tool calls WITHOUT output (need caller execution)
+    for tool_call in function_tool_calls {
+        output.push(ResponseOutputItem::FunctionToolCall {
+            id: tool_call.id.clone(),
+            call_id: tool_call.id.clone(),
+            name: tool_call.function.name.clone(),
+            arguments: tool_call.function.arguments.clone().unwrap_or_default(),
+            output: None, // No output = needs execution
            status: "completed".to_string(),
        });
    }

    // Build ResponsesResponse with Completed status
-    // The presence of FunctionToolCall items without output signals tool execution needed
    let created_at = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .unwrap()
@@ -1023,13 +1163,6 @@ fn build_function_tool_response(
 /// Tool execution errors are returned as error results to the model
 /// (allows model to handle gracefully).
 ///
-/// # Arguments
-///
-/// * `mcp_manager` - MCP manager for tool execution
-/// * `tool_calls` - Tool calls from commentary channel
-///
-/// # Returns
-///
 /// Vector of tool results (one per tool call)
 async fn execute_mcp_tools(
    mcp_manager: &Arc<McpManager>,
@@ -1151,24 +1284,12 @@ async fn execute_mcp_tools(
 /// 1. Original input items (preserved)
 /// 2. Assistant message with analysis (reasoning) + partial_text + tool_calls
 /// 3. Tool result messages for each tool execution
-///
-/// # Arguments
-///
-/// * `request` - Current request (contains original input)
-/// * `tool_calls` - Tool calls from commentary channel
-/// * `tool_results` - Results from MCP tool execution
-/// * `analysis` - Analysis channel content (becomes reasoning content)
-/// * `partial_text` - Final channel content (becomes message content)
-///
-/// # Returns
-///
-/// New ResponsesRequest with appended history
 fn build_next_request_with_tools(
    mut request: ResponsesRequest,
    tool_calls: Vec<ToolCall>,
    tool_results: Vec<ToolResult>,
-    analysis: Option<String>,
-    partial_text: String,
+    analysis: Option<String>, // Analysis channel content (becomes reasoning content)
+    partial_text: String,     // Final channel content (becomes message content)
 ) -> Result<ResponsesRequest, Box<Response>> {
    // Get current input items (or empty vec if Text variant)
    let mut items = match request.input {

--- a/sgl-router/src/routers/grpc/harmony/streaming.rs
+++ b/sgl-router/src/routers/grpc/harmony/streaming.rs
--- a/sgl-router/src/routers/grpc/regular/responses/tool_loop.rs
+++ b/sgl-router/src/routers/grpc/regular/responses/tool_loop.rs
@@ -40,33 +40,6 @@ use crate::{
    },
 };

-/// Extract function call from a chat completion response
-/// Returns (call_id, tool_name, arguments_json_str) if found
-fn extract_function_call_from_chat(
-    response: &ChatCompletionResponse,
-) -> Option<(String, String, String)> {
-    // Check if response has choices with tool calls
-    let choice = response.choices.first()?;
-    let message = &choice.message;
-
-    // Look for tool_calls in the message
-    if let Some(tool_calls) = &message.tool_calls {
-        if let Some(tool_call) = tool_calls.first() {
-            return Some((
-                tool_call.id.clone(),
-                tool_call.function.name.clone(),
-                tool_call
-                    .function
-                    .arguments
-                    .clone()
-                    .unwrap_or_else(|| "{}".to_string()),
-            ));
-        }
-    }
-
-    None
-}
-
 /// Merge function tools from request with MCP tools and set tool_choice based on iteration
 fn prepare_chat_tools_and_choice(
    chat_request: &mut ChatCompletionRequest,
@@ -294,27 +267,61 @@ pub(super) async fn execute_tool_loop(
            )
            .await?;

-        // Check for function calls
-        if let Some((call_id, tool_name, args_json_str)) =
-            extract_function_call_from_chat(&chat_response)
-        {
+        // Check for function calls (extract all for parallel execution)
+        let tool_calls = extract_all_tool_calls_from_chat(&chat_response);
+
+        if !tool_calls.is_empty() {
            state.iteration += 1;

            debug!(
-                "Tool loop iteration {}: found call to {} (call_id: {})",
-                state.iteration, tool_name, call_id
+                "Tool loop iteration {}: found {} tool call(s)",
+                state.iteration,
+                tool_calls.len()
+            );
+
+            // Separate MCP and function tool calls
+            let mcp_tool_names: std::collections::HashSet<&str> =
+                mcp_tools.iter().map(|t| t.name.as_ref()).collect();
+            let (mcp_tool_calls, function_tool_calls): (Vec<_>, Vec<_>) = tool_calls
+                .into_iter()
+                .partition(|(_, tool_name, _)| mcp_tool_names.contains(tool_name.as_str()));
+
+            debug!(
+                "Separated tool calls: {} MCP, {} function",
+                mcp_tool_calls.len(),
+                function_tool_calls.len()
            );

-            // Check combined limit BEFORE executing
+            // If ANY tool call is a function tool, return to caller immediately
+            if !function_tool_calls.is_empty() {
+                // Convert chat response to responses format (includes all tool calls)
+                let responses_response = conversions::chat_to_responses(
+                    &chat_response,
+                    original_request,
+                    response_id.clone(),
+                )
+                .map_err(|e| {
+                    error::internal_error(format!("Failed to convert to responses format: {}", e))
+                })?;
+
+                // Return response with function tool calls to caller
+                return Ok(responses_response);
+            }
+
+            // All MCP tools - check combined limit BEFORE executing
            let effective_limit = match max_tool_calls {
                Some(user_max) => user_max.min(MAX_ITERATIONS),
                None => MAX_ITERATIONS,
            };

-            if state.total_calls >= effective_limit {
+            if state.total_calls + mcp_tool_calls.len() > effective_limit {
                warn!(
-                    "Reached tool call limit: {} (max_tool_calls={:?}, safety_limit={})",
-                    state.total_calls, max_tool_calls, MAX_ITERATIONS
+                    "Reached tool call limit: {} + {} > {} (max_tool_calls={:?}, safety_limit={})",
+                    state.total_calls,
+                    mcp_tool_calls.len(),
+                    effective_limit,
+                    max_tool_calls,
+                    MAX_ITERATIONS
                );

                // Convert chat response to responses format and mark as incomplete
@@ -334,46 +341,49 @@ pub(super) async fn execute_tool_loop(
                return Ok(responses_response);
            }

-            // Increment after check
-            state.total_calls += 1;
+            // Execute all MCP tools
+            for (call_id, tool_name, args_json_str) in mcp_tool_calls {
+                debug!(
+                    "Calling MCP tool '{}' (call_id: {}) with args: {}",
+                    tool_name, call_id, args_json_str
+                );

-            // Execute the MCP tool - manager handles parsing and type coercion
-            debug!(
-                "Calling MCP tool '{}' with args: {}",
-                tool_name, args_json_str
-            );
-            let (output_str, success, error) = match ctx
-                .mcp_manager
-                .call_tool(tool_name.as_str(), args_json_str.as_str())
-                .await
-            {
-                Ok(result) => match serde_json::to_string(&result) {
-                    Ok(output) => (output, true, None),
-                    Err(e) => {
-                        let err = format!("Failed to serialize tool result: {}", e);
-                        warn!("{}", err);
-                        let error_json = json!({ "error": &err }).to_string();
-                        (error_json, false, Some(err))
+                let (output_str, success, error) = match ctx
+                    .mcp_manager
+                    .call_tool(tool_name.as_str(), args_json_str.as_str())
+                    .await
+                {
+                    Ok(result) => match serde_json::to_string(&result) {
+                        Ok(output) => (output, true, None),
+                        Err(e) => {
+                            let err = format!("Failed to serialize tool result: {}", e);
+                            warn!("{}", err);
+                            let error_json = json!({ "error": &err }).to_string();
+                            (error_json, false, Some(err))
+                        }
+                    },
+                    Err(err) => {
+                        let err_str = format!("tool call failed: {}", err);
+                        warn!("Tool execution failed: {}", err_str);
+                        // Return error as output, let model decide how to proceed
+                        let error_json = json!({ "error": &err_str }).to_string();
+                        (error_json, false, Some(err_str))
                    }
-                },
-                Err(err) => {
-                    let err_str = format!("tool call failed: {}", err);
-                    warn!("Tool execution failed: {}", err_str);
-                    // Return error as output, let model decide how to proceed
-                    let error_json = json!({ "error": &err_str }).to_string();
-                    (error_json, false, Some(err_str))
-                }
-            };
+                };

-            // Record the call in state
-            state.record_call(
-                call_id,
-                tool_name,
-                args_json_str,
-                output_str,
-                success,
-                error,
-            );
+                // Record the call in state
+                state.record_call(
+                    call_id,
+                    tool_name,
+                    args_json_str,
+                    output_str,
+                    success,
+                    error,
+                );
+
+                // Increment total calls counter
+                state.total_calls += 1;
+            }

            // Build resume request with conversation history
            // Start with original input
@@ -687,17 +697,30 @@ async fn execute_tool_loop_streaming_internal(
                tool_calls.len()
            );

-            // Check combined limit
+            // Separate MCP and function tool calls
+            let mcp_tool_names: std::collections::HashSet<&str> =
+                mcp_tools.iter().map(|t| t.name.as_ref()).collect();
+            let (mcp_tool_calls, function_tool_calls): (Vec<_>, Vec<_>) = tool_calls
+                .into_iter()
+                .partition(|(_, tool_name, _)| mcp_tool_names.contains(tool_name.as_str()));
+
+            debug!(
+                "Separated tool calls: {} MCP, {} function",
+                mcp_tool_calls.len(),
+                function_tool_calls.len()
+            );
+
+            // Check combined limit (only count MCP tools since function tools will be returned)
            let effective_limit = match max_tool_calls {
                Some(user_max) => user_max.min(MAX_ITERATIONS),
                None => MAX_ITERATIONS,
            };

-            if state.total_calls + tool_calls.len() > effective_limit {
+            if state.total_calls + mcp_tool_calls.len() > effective_limit {
                warn!(
                    "Reached tool call limit: {} + {} > {} (max_tool_calls={:?}, safety_limit={})",
                    state.total_calls,
-                    tool_calls.len(),
+                    mcp_tool_calls.len(),
                    effective_limit,
                    max_tool_calls,
                    MAX_ITERATIONS
@@ -705,8 +728,8 @@ async fn execute_tool_loop_streaming_internal(
                break;
            }

-            // Process each tool call
-            for (call_id, tool_name, args_json_str) in tool_calls {
+            // Process each MCP tool call
+            for (call_id, tool_name, args_json_str) in mcp_tool_calls {
                state.total_calls += 1;

                debug!(
@@ -846,6 +869,70 @@ async fn execute_tool_loop_streaming_internal(
                );
            }

+            // If there are function tool calls, emit events and exit MCP loop
+            if !function_tool_calls.is_empty() {
+                debug!(
+                    "Found {} function tool call(s) - emitting events and exiting MCP loop",
+                    function_tool_calls.len()
+                );
+
+                // Emit function_tool_call events for each function tool
+                for (call_id, tool_name, args_json_str) in function_tool_calls {
+                    // Allocate output_index for this function_tool_call item
+                    let (output_index, item_id) =
+                        emitter.allocate_output_index(OutputItemType::FunctionCall);
+
+                    // Build initial function_tool_call item
+                    let item = json!({
+                        "id": item_id,
+                        "type": "function_tool_call",
+                        "call_id": call_id,
+                        "name": tool_name,
+                        "status": "in_progress",
+                        "arguments": ""
+                    });
+
+                    // Emit output_item.added
+                    let event = emitter.emit_output_item_added(output_index, &item);
+                    emitter.send_event(&event, &tx)?;
+
+                    // Emit function_call_arguments.delta
+                    let event = emitter.emit_function_call_arguments_delta(
+                        output_index,
+                        &item_id,
+                        &args_json_str,
+                    );
+                    emitter.send_event(&event, &tx)?;
+
+                    // Emit function_call_arguments.done
+                    let event = emitter.emit_function_call_arguments_done(
+                        output_index,
+                        &item_id,
+                        &args_json_str,
+                    );
+                    emitter.send_event(&event, &tx)?;
+
+                    // Build complete item
+                    let item_complete = json!({
+                        "id": item_id,
+                        "type": "function_tool_call",
+                        "call_id": call_id,
+                        "name": tool_name,
+                        "status": "completed",
+                        "arguments": args_json_str
+                    });
+
+                    // Emit output_item.done
+                    let event = emitter.emit_output_item_done(output_index, &item_complete);
+                    emitter.send_event(&event, &tx)?;
+
+                    emitter.complete_output_item(output_index);
+                }
+
+                // Break loop to return response to caller
+                break;
+            }
+
            // Build next request with conversation history
            let mut input_items = match &state.original_input {
                ResponseInput::Text(text) => vec![ResponseInputOutputItem::Message {