feat: streaming tool call and reasoning dispatch SSE events (#7114)

Signed-off-by: Matej Kosec <mkosec@nvidia.com>

feat: streaming tool call and reasoning dispatch SSE events (#7114)
Signed-off-by: Matej Kosec <mkosec@nvidia.com>
5178a4a4 · MatejKosec · GitHub · d16862ad · 5178a4a4 · 5178a4a4
Unverified Commit 5178a4a4 authored Mar 11, 2026 by MatejKosec Committed by GitHub Mar 11, 2026
7 changed files
--- a/components/src/dynamo/frontend/frontend_args.py
+++ b/components/src/dynamo/frontend/frontend_args.py
@@ -73,6 +73,8 @@ class FrontendConfig(KvRouterConfigBase):
    enable_anthropic_api: bool
    strip_anthropic_preamble: bool
    debug_perf: bool
+    enable_streaming_tool_dispatch: bool
+    enable_streaming_reasoning_dispatch: bool
    preprocess_workers: int

    def validate(self) -> None:
@@ -355,6 +357,30 @@ class FrontendArgGroup(ArgGroup):
                "from the system prompt. Saves tokens and improves prompt caching."
            ),
        )
+        add_negatable_bool_argument(
+            g,
+            flag_name="--enable-streaming-tool-dispatch",
+            env_var="DYN_ENABLE_STREAMING_TOOL_DISPATCH",
+            default=False,
+            help=(
+                "[EXPERIMENTAL] Enable streaming tool call dispatch. Emits "
+                "'event: tool_call_dispatch' SSE events on /v1/chat/completions "
+                "for each complete tool call before finish_reason arrives. "
+                "Can be combined with --enable-streaming-reasoning-dispatch."
+            ),
+        )
+        add_negatable_bool_argument(
+            g,
+            flag_name="--enable-streaming-reasoning-dispatch",
+            env_var="DYN_ENABLE_STREAMING_REASONING_DISPATCH",
+            default=False,
+            help=(
+                "[EXPERIMENTAL] Enable streaming reasoning dispatch. Emits a "
+                "single 'event: reasoning_dispatch' SSE event on /v1/chat/completions "
+                "with the complete reasoning block once thinking ends. "
+                "Can be combined with --enable-streaming-tool-dispatch."
+            ),
+        )
        add_argument(
            g,
            flag_name="--dyn-chat-processor",

--- a/components/src/dynamo/frontend/main.py
+++ b/components/src/dynamo/frontend/main.py
@@ -264,6 +264,16 @@ async def async_main():
    else:
        os.environ.pop("DYN_STRIP_ANTHROPIC_PREAMBLE", None)

+    if config.enable_streaming_tool_dispatch:
+        os.environ["DYN_ENABLE_STREAMING_TOOL_DISPATCH"] = "1"
+    else:
+        os.environ.pop("DYN_ENABLE_STREAMING_TOOL_DISPATCH", None)
+
+    if config.enable_streaming_reasoning_dispatch:
+        os.environ["DYN_ENABLE_STREAMING_REASONING_DISPATCH"] = "1"
+    else:
+        os.environ.pop("DYN_ENABLE_STREAMING_REASONING_DISPATCH", None)
+
    if config.chat_processor == "vllm":
        assert (
            vllm_flags is not None

--- a/lib/llm/src/http/service/openai.rs
+++ b/lib/llm/src/http/service/openai.rs
--- a/lib/llm/src/http/service/service_v2.rs
+++ b/lib/llm/src/http/service/service_v2.rs
@@ -154,6 +154,26 @@ impl State {
    pub fn sse_keep_alive(&self) -> Option<Duration> {
        None
    }
+
+    /// Returns true if streaming tool call dispatch is enabled via
+    /// [`env_llm::DYN_ENABLE_STREAMING_TOOL_DISPATCH`].
+    ///
+    /// When enabled, the chat completions streaming path emits `event: tool_call_dispatch`
+    /// SSE events for each complete tool call, letting clients start processing tool calls
+    /// before `finish_reason="tool_calls"` arrives.
+    pub fn streaming_tool_dispatch_enabled(&self) -> bool {
+        env_is_truthy(env_llm::DYN_ENABLE_STREAMING_TOOL_DISPATCH)
+    }
+
+    /// Returns true if streaming reasoning dispatch is enabled via
+    /// [`env_llm::DYN_ENABLE_STREAMING_REASONING_DISPATCH`].
+    ///
+    /// When enabled, the chat completions streaming path accumulates reasoning tokens and
+    /// emits a single `event: reasoning_dispatch` SSE event with the complete reasoning
+    /// block once thinking ends (DeepSeek-R1, Qwen3, etc.).
+    pub fn streaming_reasoning_dispatch_enabled(&self) -> bool {
+        env_is_truthy(env_llm::DYN_ENABLE_STREAMING_REASONING_DISPATCH)
+    }
 }

 #[derive(Clone)]

--- a/lib/llm/src/protocols/anthropic/stream_converter.rs
+++ b/lib/llm/src/protocols/anthropic/stream_converter.rs
@@ -50,6 +50,9 @@ struct ToolCallState {
    accumulated_args: String,
    block_index: u32,
    started: bool,
+    /// Set when `content_block_stop` has already been emitted inline
+    /// (complete tool call detected mid-stream). Prevents duplicate stop in `emit_end_events()`.
+    stopped: bool,
 }

 impl AnthropicStreamConverter {
@@ -261,6 +264,7 @@ impl AnthropicStreamConverter {
                            accumulated_args: String::new(),
                            block_index,
                            started: false,
+                            stopped: false,
                        });
                    }

@@ -313,6 +317,20 @@ impl AnthropicStreamConverter {
                                },
                            };
                            events.push(make_sse_event("content_block_delta", &block_delta));
+
+                            // Emit content_block_stop immediately if the tool call arrived
+                            // complete in a single chunk (id + name + args all present).
+                            // Dynamo backends emit complete tool calls, so this fires on the
+                            // same chunk — no need to wait for finish_reason.
+                            if tc.id.is_some()
+                                && func.name.is_some()
+                                && !self.tool_call_states[tc_index].stopped
+                            {
+                                self.tool_call_states[tc_index].stopped = true;
+                                let block_stop =
+                                    AnthropicStreamEvent::ContentBlockStop { index: block_index };
+                                events.push(make_sse_event("content_block_stop", &block_stop));
+                            }
                        }
                    }
                }
@@ -350,9 +368,9 @@ impl AnthropicStreamConverter {
            events.push(make_sse_event("content_block_stop", &block_stop));
        }

-        // Close tool call blocks
+        // Close tool call blocks (skip any already stopped inline)
        for tc in &self.tool_call_states {
-            if tc.started {
+            if tc.started && !tc.stopped {
                let block_stop = AnthropicStreamEvent::ContentBlockStop {
                    index: tc.block_index,
                };
@@ -569,6 +587,7 @@ impl AnthropicStreamConverter {
                            accumulated_args: String::new(),
                            block_index,
                            started: false,
+                            stopped: false,
                        });
                    }
                    if let Some(id) = &tc.id {
@@ -611,6 +630,20 @@ impl AnthropicStreamConverter {
                                },
                            };
                            events.push(make_tagged_event("content_block_delta", &ev));
+
+                            // Emit content_block_stop immediately if the tool call arrived
+                            // complete in a single chunk (id + name + args all present).
+                            // Dynamo backends emit complete tool calls, so this fires on the
+                            // same chunk — no need to wait for finish_reason.
+                            if tc.id.is_some()
+                                && func.name.is_some()
+                                && !self.tool_call_states[tc_index].stopped
+                            {
+                                self.tool_call_states[tc_index].stopped = true;
+                                let ev =
+                                    AnthropicStreamEvent::ContentBlockStop { index: block_index };
+                                events.push(make_tagged_event("content_block_stop", &ev));
+                            }
                        }
                    }
                }
@@ -647,8 +680,9 @@ impl AnthropicStreamConverter {
            events.push(make_tagged_event("content_block_stop", &ev));
        }

+        // Skip already-stopped tool call blocks
        for tc in &self.tool_call_states {
-            if tc.started {
+            if tc.started && !tc.stopped {
                let ev = AnthropicStreamEvent::ContentBlockStop {
                    index: tc.block_index,
                };
@@ -788,9 +822,10 @@ mod tests {
            vec![
                "content_block_stop",
                "content_block_start",
-                "content_block_delta"
+                "content_block_delta",
+                "content_block_stop",
            ],
-            "text block must be closed before tool block starts"
+            "text block must be closed before tool block starts; complete tool call stopped inline"
        );

        // Verify indices: stop=0 (text), start=1 (tool)
@@ -814,17 +849,13 @@ mod tests {
            other => panic!("expected ContentBlockStart, got {other:?}"),
        }

-        // End events should NOT duplicate the text block stop
+        // End events should NOT duplicate either stop (both already emitted inline)
        let end_events = conv.emit_end_events_tagged();
        assert_eq!(
            event_types(&end_events),
-            vec!["content_block_stop", "message_delta", "message_stop"],
-            "only tool block stop in end events (text already closed)"
+            vec!["message_delta", "message_stop"],
+            "no block stops in end events (both text and tool already closed inline)"
        );
-        match &end_events[0].data {
-            AnthropicStreamEvent::ContentBlockStop { index } => assert_eq!(*index, 1),
-            other => panic!("expected tool stop at index 1, got {other:?}"),
-        }
    }

    /// Tool-only response (no preceding text): no spurious stop events.
@@ -840,13 +871,19 @@ mod tests {
        ));
        assert_eq!(
            event_types(&tool_events),
-            vec!["content_block_start", "content_block_delta"]
+            vec![
+                "content_block_start",
+                "content_block_delta",
+                "content_block_stop"
+            ],
+            "complete tool call emits stop inline"
        );

        let end_events = conv.emit_end_events_tagged();
        assert_eq!(
            event_types(&end_events),
-            vec!["content_block_stop", "message_delta", "message_stop"]
+            vec!["message_delta", "message_stop"],
+            "no block stop in end events (already stopped inline)"
        );
    }

@@ -937,7 +974,9 @@ mod tests {
            AnthropicStreamEvent::ContentBlockStart { index: 1, .. }
        ));

-        // 3. Tool call → text block closes, tool block opens at index 2
+        // 3. Tool call → text block closes, tool block opens at index 2.
+        //    Because the tool call arrives complete (id + name + args in one
+        //    chunk), inline dispatch also emits content_block_stop immediately.
        let ev = conv.process_chunk_tagged(&tool_call_chunk(
            0,
            Some("call-1"),
@@ -949,7 +988,8 @@ mod tests {
            vec![
                "content_block_stop",
                "content_block_start",
-                "content_block_delta"
+                "content_block_delta",
+                "content_block_stop"
            ]
        );
        assert!(matches!(
@@ -979,4 +1019,50 @@ mod tests {
            ]
        );
    }
+
+    /// Multiple tool calls: each gets inline content_block_stop.
+    #[test]
+    fn test_multiple_tool_calls_each_stopped_inline() {
+        let mut conv = AnthropicStreamConverter::new("test-model".into());
+
+        let events1 = conv.process_chunk_tagged(&tool_call_chunk(
+            0,
+            Some("call-1"),
+            Some("Read"),
+            Some("{\"path\":\"/tmp/a.txt\"}"),
+        ));
+        assert_eq!(
+            event_types(&events1),
+            vec![
+                "content_block_start",
+                "content_block_delta",
+                "content_block_stop"
+            ],
+            "first tool call closed inline"
+        );
+
+        let events2 = conv.process_chunk_tagged(&tool_call_chunk(
+            1,
+            Some("call-2"),
+            Some("Write"),
+            Some("{\"path\":\"/tmp/b.txt\"}"),
+        ));
+        assert_eq!(
+            event_types(&events2),
+            vec![
+                "content_block_start",
+                "content_block_delta",
+                "content_block_stop"
+            ],
+            "second tool call closed inline"
+        );
+
+        // End events: no block stops (both already closed)
+        let end_events = conv.emit_end_events_tagged();
+        assert_eq!(
+            event_types(&end_events),
+            vec!["message_delta", "message_stop"],
+            "no block stops in end events"
+        );
+    }
 }
--- a/lib/llm/src/protocols/openai/responses/stream_converter.rs
+++ b/lib/llm/src/protocols/openai/responses/stream_converter.rs
@@ -56,6 +56,9 @@ struct FunctionCallState {
    accumulated_args: String,
    output_index: u32,
    started: bool,
+    /// Set when done/item_done events have already been emitted inline
+    /// (complete tool call detected mid-stream). Prevents duplicate in `emit_end_events()`.
+    done: bool,
 }

 impl ResponseStreamConverter {
@@ -284,6 +287,7 @@ impl ResponseStreamConverter {
                            accumulated_args: String::new(),
                            output_index,
                            started: false,
+                            done: false,
                        });
                    }

@@ -323,19 +327,67 @@ impl ResponseStreamConverter {
                            self.function_call_items[tc_index]
                                .accumulated_args
                                .push_str(args);
-                            let item_id = self.function_call_items[tc_index].item_id.clone();
                            let output_index = self.function_call_items[tc_index].output_index;
+                            let is_complete = tc.id.is_some()
+                                && func.name.is_some()
+                                && !self.function_call_items[tc_index].done;
+
+                            // Clone item_id once; reused by both args_delta and (if complete) done events.
+                            let item_id = self.function_call_items[tc_index].item_id.clone();
                            let seq = self.next_seq();
                            let args_delta =
                                ResponseStreamEvent::ResponseFunctionCallArgumentsDelta(
                                    ResponseFunctionCallArgumentsDeltaEvent {
                                        sequence_number: seq,
-                                        item_id,
+                                        item_id: item_id.clone(),
                                        output_index,
                                        delta: args.clone(),
                                    },
                                );
                            events.push(make_sse_event(&args_delta));
+
+                            // Emit done + output_item.done immediately if the tool call
+                            // arrived complete in a single chunk (id + name + args all present).
+                            // Dynamo backends emit complete tool calls, so this fires on the
+                            // same chunk — no need to wait for finish_reason.
+                            if is_complete {
+                                self.function_call_items[tc_index].done = true;
+                                // Reuse item_id from above; capture remaining values before self.next_seq()
+                                let fc_item_id = item_id;
+                                let fc_call_id = self.function_call_items[tc_index].call_id.clone();
+                                let fc_name = self.function_call_items[tc_index].name.clone();
+                                let fc_args =
+                                    self.function_call_items[tc_index].accumulated_args.clone();
+                                let fc_output_index =
+                                    self.function_call_items[tc_index].output_index;
+
+                                let args_done =
+                                    ResponseStreamEvent::ResponseFunctionCallArgumentsDone(
+                                        ResponseFunctionCallArgumentsDoneEvent {
+                                            sequence_number: self.next_seq(),
+                                            item_id: fc_item_id.clone(),
+                                            output_index: fc_output_index,
+                                            arguments: fc_args.clone(),
+                                            name: Some(fc_name.clone()),
+                                        },
+                                    );
+                                events.push(make_sse_event(&args_done));
+
+                                let item_done = ResponseStreamEvent::ResponseOutputItemDone(
+                                    ResponseOutputItemDoneEvent {
+                                        sequence_number: self.next_seq(),
+                                        output_index: fc_output_index,
+                                        item: OutputItem::FunctionCall(FunctionToolCall {
+                                            id: Some(fc_item_id),
+                                            call_id: fc_call_id,
+                                            name: fc_name,
+                                            arguments: fc_args,
+                                            status: Some(OutputStatus::Completed),
+                                        }),
+                                    },
+                                );
+                                events.push(make_sse_event(&item_done));
+                            }
                        }
                    }
                }
@@ -393,11 +445,11 @@ impl ResponseStreamConverter {
            events.push(make_sse_event(&item_done));
        }

-        // Close any function call items - collect data first to avoid borrow conflicts
+        // Close any function call items not already done inline
        let fc_data: Vec<_> = self
            .function_call_items
            .iter()
-            .filter(|fc| fc.started)
+            .filter(|fc| fc.started && !fc.done)
            .map(|fc| {
                (
                    fc.item_id.clone(),
@@ -598,3 +650,262 @@ fn get_event_type(event: &ResponseStreamEvent) -> &'static str {
        ResponseStreamEvent::ResponseError(_) => "error",
    }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use dynamo_async_openai::types::{
+        ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionMessageToolCallChunk,
+        ChatCompletionStreamResponseDelta, ChatCompletionToolType, FunctionCallStream,
+    };
+
+    fn default_params() -> ResponseParams {
+        ResponseParams {
+            model: None,
+            temperature: None,
+            top_p: None,
+            max_output_tokens: None,
+            store: None,
+            tools: None,
+            tool_choice: None,
+            instructions: None,
+            reasoning: None,
+            text: None,
+            service_tier: None,
+            include: None,
+            truncation: None,
+        }
+    }
+
+    fn tool_call_chunk(
+        tc_index: u32,
+        id: Option<&str>,
+        name: Option<&str>,
+        args: Option<&str>,
+    ) -> NvCreateChatCompletionStreamResponse {
+        #[allow(deprecated)]
+        NvCreateChatCompletionStreamResponse {
+            id: "chat-1".into(),
+            choices: vec![ChatChoiceStream {
+                index: 0,
+                delta: ChatCompletionStreamResponseDelta {
+                    content: None,
+                    function_call: None,
+                    tool_calls: Some(vec![ChatCompletionMessageToolCallChunk {
+                        index: tc_index,
+                        id: id.map(String::from),
+                        r#type: Some(ChatCompletionToolType::Function),
+                        function: Some(FunctionCallStream {
+                            name: name.map(String::from),
+                            arguments: args.map(String::from),
+                        }),
+                    }]),
+                    role: None,
+                    refusal: None,
+                    reasoning_content: None,
+                },
+                finish_reason: None,
+                stop_reason: None,
+                logprobs: None,
+            }],
+            created: 0,
+            model: "test".into(),
+            service_tier: None,
+            system_fingerprint: None,
+            object: "chat.completion.chunk".into(),
+            usage: None,
+            nvext: None,
+        }
+    }
+
+    fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
+        #[allow(deprecated)]
+        NvCreateChatCompletionStreamResponse {
+            id: "chat-1".into(),
+            choices: vec![ChatChoiceStream {
+                index: 0,
+                delta: ChatCompletionStreamResponseDelta {
+                    content: Some(ChatCompletionMessageContent::Text(text.into())),
+                    function_call: None,
+                    tool_calls: None,
+                    role: None,
+                    refusal: None,
+                    reasoning_content: None,
+                },
+                finish_reason: None,
+                stop_reason: None,
+                logprobs: None,
+            }],
+            created: 0,
+            model: "test".into(),
+            service_tier: None,
+            system_fingerprint: None,
+            object: "chat.completion.chunk".into(),
+            usage: None,
+            nvext: None,
+        }
+    }
+
+    /// Extract the SSE event type from a Result<Event, _>.
+    fn event_type(event: &Result<Event, anyhow::Error>) -> String {
+        let debug = format!("{:?}", event.as_ref().unwrap());
+        // Event debug format: Event { ... event: "response.xxx" ... }
+        // Parse the event type from the serialized SSE data
+        if let Some(start) = debug.find("event: ") {
+            let rest = &debug[start + 7..];
+            if let Some(end) = rest.find("\\n") {
+                return rest[..end].to_string();
+            }
+        }
+        "unknown".to_string()
+    }
+
+    fn event_types(events: &[Result<Event, anyhow::Error>]) -> Vec<String> {
+        events.iter().map(event_type).collect()
+    }
+
+    /// Complete tool call emits function_call_arguments.done + output_item.done inline.
+    #[test]
+    fn test_complete_tool_call_emits_done_inline() {
+        let mut conv = ResponseStreamConverter::new("test-model".into(), default_params());
+        let _ = conv.emit_start_events(); // consume start events
+
+        let events = conv.process_chunk(&tool_call_chunk(
+            0,
+            Some("call-1"),
+            Some("get_weather"),
+            Some("{\"city\":\"SF\"}"),
+        ));
+
+        let types = event_types(&events);
+        assert!(
+            types.contains(&"response.output_item.added".to_string()),
+            "should emit output_item.added: {types:?}"
+        );
+        assert!(
+            types.contains(&"response.function_call_arguments.delta".to_string()),
+            "should emit args delta: {types:?}"
+        );
+        assert!(
+            types.contains(&"response.function_call_arguments.done".to_string()),
+            "should emit args done inline: {types:?}"
+        );
+        assert!(
+            types.contains(&"response.output_item.done".to_string()),
+            "should emit output_item.done inline: {types:?}"
+        );
+
+        // End events should NOT duplicate the done events
+        let end_types = event_types(&conv.emit_end_events());
+        assert!(
+            !end_types.contains(&"response.function_call_arguments.done".to_string()),
+            "done should not be duplicated in end events: {end_types:?}"
+        );
+        assert!(
+            !end_types.contains(&"response.output_item.done".to_string())
+                || end_types
+                    .iter()
+                    .filter(|t| *t == "response.output_item.done")
+                    .count()
+                    == 0,
+            "output_item.done for the tool should not appear in end events"
+        );
+    }
+
+    /// Multiple tool calls each get their own inline done events.
+    #[test]
+    fn test_multiple_tool_calls_each_emit_done_inline() {
+        let mut conv = ResponseStreamConverter::new("test-model".into(), default_params());
+        let _ = conv.emit_start_events();
+
+        let events1 = conv.process_chunk(&tool_call_chunk(
+            0,
+            Some("call-1"),
+            Some("get_weather"),
+            Some("{\"city\":\"SF\"}"),
+        ));
+        let types1 = event_types(&events1);
+        assert!(
+            types1.contains(&"response.function_call_arguments.done".to_string()),
+            "first tool call done inline: {types1:?}"
+        );
+
+        let events2 = conv.process_chunk(&tool_call_chunk(
+            1,
+            Some("call-2"),
+            Some("get_time"),
+            Some("{\"tz\":\"PST\"}"),
+        ));
+        let types2 = event_types(&events2);
+        assert!(
+            types2.contains(&"response.function_call_arguments.done".to_string()),
+            "second tool call done inline: {types2:?}"
+        );
+
+        // End events should have no function call done events
+        let end_types = event_types(&conv.emit_end_events());
+        let fc_done_count = end_types
+            .iter()
+            .filter(|t| *t == "response.function_call_arguments.done")
+            .count();
+        assert_eq!(
+            fc_done_count, 0,
+            "no function_call_arguments.done in end events: {end_types:?}"
+        );
+    }
+
+    /// Text-only response: no tool-related events at all.
+    #[test]
+    fn test_text_only_response_no_tool_events() {
+        let mut conv = ResponseStreamConverter::new("test-model".into(), default_params());
+        let _ = conv.emit_start_events();
+
+        let events = conv.process_chunk(&text_chunk("Hello world"));
+        let types = event_types(&events);
+        assert!(
+            !types.contains(&"response.function_call_arguments.done".to_string()),
+            "no tool events in text-only: {types:?}"
+        );
+
+        let end_events = conv.emit_end_events();
+        let end_types = event_types(&end_events);
+        assert!(
+            end_types.contains(&"response.output_text.done".to_string()),
+            "text done in end events: {end_types:?}"
+        );
+        assert!(
+            end_types.contains(&"response.completed".to_string()),
+            "completed in end events: {end_types:?}"
+        );
+    }
+
+    /// Text followed by tool call: both handled correctly.
+    #[test]
+    fn test_text_then_tool_call() {
+        let mut conv = ResponseStreamConverter::new("test-model".into(), default_params());
+        let _ = conv.emit_start_events();
+
+        let text_events = conv.process_chunk(&text_chunk("Let me check that."));
+        let text_types = event_types(&text_events);
+        assert!(
+            text_types.contains(&"response.output_item.added".to_string()),
+            "text message started: {text_types:?}"
+        );
+
+        let tool_events = conv.process_chunk(&tool_call_chunk(
+            0,
+            Some("call-1"),
+            Some("search"),
+            Some("{\"q\":\"rust\"}"),
+        ));
+        let tool_types = event_types(&tool_events);
+        assert!(
+            tool_types.contains(&"response.function_call_arguments.done".to_string()),
+            "tool call done inline after text: {tool_types:?}"
+        );
+        assert!(
+            tool_types.contains(&"response.output_item.done".to_string()),
+            "output_item.done inline after text: {tool_types:?}"
+        );
+    }
+}
--- a/lib/runtime/src/config/environment_names.rs
+++ b/lib/runtime/src/config/environment_names.rs
@@ -285,6 +285,13 @@ pub mod llm {
    /// varies per session and per release, wasting tokens and breaking prompt caching.
    pub const DYN_STRIP_ANTHROPIC_PREAMBLE: &str = "DYN_STRIP_ANTHROPIC_PREAMBLE";

+    /// Enable streaming tool call dispatch (`event: tool_call_dispatch` SSE events)
+    pub const DYN_ENABLE_STREAMING_TOOL_DISPATCH: &str = "DYN_ENABLE_STREAMING_TOOL_DISPATCH";
+
+    /// Enable streaming reasoning dispatch (`event: reasoning_dispatch` SSE events)
+    pub const DYN_ENABLE_STREAMING_REASONING_DISPATCH: &str =
+        "DYN_ENABLE_STREAMING_REASONING_DISPATCH";
+
    /// Metrics configuration
    pub mod metrics {
        /// Custom metrics prefix (overrides default "dynamo_frontend")
@@ -464,6 +471,8 @@ mod tests {
            llm::DYN_LORA_PATH,
            llm::DYN_ENABLE_ANTHROPIC_API,
            llm::DYN_STRIP_ANTHROPIC_PREAMBLE,
+            llm::DYN_ENABLE_STREAMING_TOOL_DISPATCH,
+            llm::DYN_ENABLE_STREAMING_REASONING_DISPATCH,
            llm::metrics::DYN_METRICS_PREFIX,
            // Model
            model::model_express::MODEL_EXPRESS_URL,