feat: text to image vLLM Omni (#5912)

Signed-off-by: ayushag <ayushag@nvidia.com>

feat: text to image vLLM Omni (#5912)
Signed-off-by: ayushag <ayushag@nvidia.com>
9f76d060 · Ayush Agarwal · GitHub · d14d6ff4 · 9f76d060 · 9f76d060
Unverified Commit 9f76d060 authored Feb 09, 2026 by Ayush Agarwal Committed by GitHub Feb 09, 2026
9 changed files
--- a/lib/llm/src/protocols/openai/chat_completions/delta.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/delta.rs
@@ -270,7 +270,7 @@ impl DeltaGenerator {
        stop_reason: Option<dynamo_async_openai::types::StopReason>,
    ) -> NvCreateChatCompletionStreamResponse {
        let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
-            content: text,
+            content: text.map(dynamo_async_openai::types::ChatCompletionMessageContent::Text),
            function_call: None,
            tool_calls: None,
            role: if self.msg_counter == 0 {

--- a/lib/llm/src/protocols/openai/chat_completions/jail.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -112,7 +112,9 @@ fn create_choice_stream(
        index,
        delta: ChatCompletionStreamResponseDelta {
            role,
-            content: Some(content.to_string()),
+            content: Some(
+                dynamo_async_openai::types::ChatCompletionMessageContent::Text(content.to_string()),
+            ),
            tool_calls,
            function_call: None,
            refusal: None,
@@ -533,23 +535,32 @@ impl JailedStream {
                    // Process each choice independently using the new architecture
                    for choice in &chat_response.choices {
                        if let Some(ref content) = choice.delta.content {
-                            let starts_jailed = matches!(self.jail_mode, JailMode::Immediate { .. });
-                            let choice_state = choice_states.get_or_create_state(choice.index, starts_jailed);
-
-                            // Store metadata when any choice becomes jailed (first time only)
-                            if !choice_state.is_jailed && self.should_start_jail(content)
-                                && last_annotated_id.is_none() {
-                                    last_annotated_id = response.id.clone();
-                                    last_annotated_event = response.event.clone();
-                                    last_annotated_comment = response.comment.clone();
-                                }
+                            // Jailing only applies to text content
+                            let text_content = match content {
+                                dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => Some(text.as_str()),
+                                dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => None,
+                            };
+
+                            if let Some(text) = text_content {
+                                let starts_jailed = matches!(self.jail_mode, JailMode::Immediate { .. });
+                                let choice_state = choice_states.get_or_create_state(choice.index, starts_jailed);
+
+                                // Store metadata when any choice becomes jailed (first time only)
+                                if !choice_state.is_jailed && self.should_start_jail(text)
+                                    && last_annotated_id.is_none() {
+                                        last_annotated_id = response.id.clone();
+                                        last_annotated_event = response.event.clone();
+                                        last_annotated_comment = response.comment.clone();
+                                    }

-                            // Track actual stream finish reason in the choice state
-                            choice_state.stream_finish_reason = choice.finish_reason;
+                                // Track actual stream finish reason in the choice state
+                                choice_state.stream_finish_reason = choice.finish_reason;

-                            // Process this choice and get emissions
-                            let emissions = choice_state.process_content(choice, content, &self).await;
-                            all_emissions.extend(emissions);
+                                // Process this choice and get emissions
+                                let emissions = choice_state.process_content(choice, text, &self).await;
+                                all_emissions.extend(emissions);
+                            }
+                            // For multimodal content, pass through unchanged (no jailing)
                        } else {
                            // Handle choices without content (e.g., final chunks with finish_reason)
                            // Only filter out if this choice was ever jailed and lacks role

--- a/lib/llm/src/protocols/openai/responses.rs
+++ b/lib/llm/src/protocols/openai/responses.rs
@@ -222,8 +222,20 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
            .and_then(|choice| choice.message.content)
            .unwrap_or_else(|| {
                tracing::warn!("No choices in chat completion response, using empty content");
-                String::new()
+                dynamo_async_openai::types::ChatCompletionMessageContent::Text(String::new())
            });
+
+        // Extract text from content (only handle text for responses API)
+        let text_content = match content_text {
+            dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => text,
+            dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => {
+                tracing::warn!(
+                    "Multimodal content in responses API not yet supported, using placeholder"
+                );
+                "[multimodal content]".to_string()
+            }
+        };
+
        let message_id = format!("msg_{}", Uuid::new_v4().simple());
        let response_id = format!("resp_{}", Uuid::new_v4().simple());

@@ -232,7 +244,7 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
            role: ResponseRole::Assistant,
            status: OutputStatus::Completed,
            content: vec![Content::OutputText(OutputText {
-                text: content_text,
+                text: text_content,
                annotations: vec![],
            })],
        })];
@@ -363,7 +375,11 @@ mod tests {
            choices: vec![dynamo_async_openai::types::ChatChoice {
                index: 0,
                message: dynamo_async_openai::types::ChatCompletionResponseMessage {
-                    content: Some("This is a reply".into()),
+                    content: Some(
+                        dynamo_async_openai::types::ChatCompletionMessageContent::Text(
+                            "This is a reply".to_string(),
+                        ),
+                    ),
                    refusal: None,
                    tool_calls: None,
                    role: dynamo_async_openai::types::Role::Assistant,

--- a/lib/llm/tests/aggregators.rs
+++ b/lib/llm/tests/aggregators.rs
 // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0

+use dynamo_async_openai::types::ChatCompletionMessageContent;
 use dynamo_llm::protocols::{
    ContentProvider, DataStream,
    codec::{Message, SseCodecError, create_message_stream},
@@ -12,6 +13,13 @@ use dynamo_llm::protocols::{
 };
 use futures::StreamExt;

+fn get_text(content: &ChatCompletionMessageContent) -> &str {
+    match content {
+        ChatCompletionMessageContent::Text(text) => text.as_str(),
+        ChatCompletionMessageContent::Parts(_) => "",
+    }
+}
+
 const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions";
 const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions";

@@ -35,16 +43,17 @@ async fn test_openai_chat_stream() {

    // todo: provide a cleaner way to extract the content from choices
    assert_eq!(
-        result
-            .choices
-            .first()
-            .unwrap()
-            .message
-            .content
-            .clone()
-            .expect("there to be content"),
+        get_text(
+            result
+                .choices
+                .first()
+                .unwrap()
+                .message
+                .content
+                .as_ref()
+                .expect("there to be content")
+        ),
        "Deep learning is a subfield of machine learning that involves the use of artificial"
-            .to_string()
    );
 }

@@ -59,15 +68,17 @@ async fn test_openai_chat_edge_case_multi_line_data() {
    .unwrap();

    assert_eq!(
-        result
-            .choices
-            .first()
-            .unwrap()
-            .message
-            .content
-            .clone()
-            .expect("there to be content"),
-        "Deep learning".to_string()
+        get_text(
+            result
+                .choices
+                .first()
+                .unwrap()
+                .message
+                .content
+                .as_ref()
+                .expect("there to be content")
+        ),
+        "Deep learning"
    );
 }

@@ -82,15 +93,17 @@ async fn test_openai_chat_edge_case_comments_per_response() {
    .unwrap();

    assert_eq!(
-        result
-            .choices
-            .first()
-            .unwrap()
-            .message
-            .content
-            .clone()
-            .expect("there to be content"),
-        "Deep learning".to_string()
+        get_text(
+            result
+                .choices
+                .first()
+                .unwrap()
+                .message
+                .content
+                .as_ref()
+                .expect("there to be content")
+        ),
+        "Deep learning"
    );
 }


--- a/lib/llm/tests/logprob_analysis_integration.rs
+++ b/lib/llm/tests/logprob_analysis_integration.rs
@@ -11,8 +11,8 @@ use dynamo_llm::perf::{RecordedStream, TimestampedResponse};
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;

 use dynamo_async_openai::types::{
-    ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
-    ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
+    ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionMessageContent,
+    ChatCompletionStreamResponseDelta, ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
 };

 // Type aliases to simplify complex test data structures
@@ -380,7 +380,7 @@ fn create_response_with_linear_probs(
    let choice = ChatChoiceStream {
        index: 0,
        delta: ChatCompletionStreamResponseDelta {
-            content: Some(_content.to_string()),
+            content: Some(ChatCompletionMessageContent::Text(_content.to_string())),
            #[expect(deprecated)]
            function_call: None,
            tool_calls: None,
@@ -460,7 +460,7 @@ fn create_multi_choice_response(
            ChatChoiceStream {
                index: choice_idx as u32,
                delta: ChatCompletionStreamResponseDelta {
-                    content: Some("test".to_string()),
+                    content: Some(ChatCompletionMessageContent::Text("test".to_string())),
                    #[expect(deprecated)]
                    function_call: None,
                    tool_calls: None,

--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
@@ -16,6 +16,15 @@ mod tests {
    // Test utilities module - shared test infrastructure
    pub(crate) mod test_utils {
        use super::*;
+        use dynamo_async_openai::types::ChatCompletionMessageContent;
+
+        /// Helper to extract text from ChatCompletionMessageContent
+        pub fn extract_text(content: &ChatCompletionMessageContent) -> &str {
+            match content {
+                ChatCompletionMessageContent::Text(text) => text.as_str(),
+                ChatCompletionMessageContent::Parts(_) => "",
+            }
+        }

        /// Helper function to create a mock chat response chunk
        pub fn create_mock_response_chunk(
@@ -27,7 +36,7 @@ mod tests {
                index,
                delta: ChatCompletionStreamResponseDelta {
                    role: Some(Role::Assistant),
-                    content: Some(content),
+                    content: Some(ChatCompletionMessageContent::Text(content)),
                    tool_calls: None,
                    function_call: None,
                    refusal: None,
@@ -111,7 +120,7 @@ mod tests {
                index,
                delta: ChatCompletionStreamResponseDelta {
                    role: Some(Role::Assistant),
-                    content: Some(content),
+                    content: Some(ChatCompletionMessageContent::Text(content)),
                    tool_calls: None,
                    function_call: None,
                    refusal: None,
@@ -154,7 +163,7 @@ mod tests {
                        index,
                        delta: ChatCompletionStreamResponseDelta {
                            role: Some(Role::Assistant),
-                            content: Some(content),
+                            content: Some(ChatCompletionMessageContent::Text(content)),
                            tool_calls: None,
                            function_call: None,
                            refusal: None,
@@ -245,9 +254,11 @@ mod tests {
                .expect("Expected content in result");

            assert_eq!(
-                content, expected,
+                extract_text(content),
+                expected,
                "Content mismatch: expected '{}', got '{}'",
-                expected, content
+                expected,
+                extract_text(content)
            );
        }

@@ -301,7 +312,11 @@ mod tests {
            {
                assert!(
                    choice.delta.content.is_none()
-                        || choice.delta.content.as_ref().unwrap().is_empty(),
+                        || choice.delta.content.as_ref().is_none_or(|c| match c {
+                            dynamo_async_openai::types::ChatCompletionMessageContent::Text(t) =>
+                                t.is_empty(),
+                            _ => false,
+                        }),
                    "Expected no content but got: {:?}",
                    choice.delta.content
                );
@@ -326,7 +341,7 @@ mod tests {
                        .and_then(|d| d.choices.first())
                        .and_then(|c| c.delta.content.as_ref())
                })
-                .cloned()
+                .map(extract_text)
                .collect::<Vec<_>>()
                .join("")
        }
@@ -338,7 +353,10 @@ mod tests {
                .as_ref()
                .and_then(|d| d.choices.first())
                .and_then(|c| c.delta.content.as_ref())
-                .cloned()
+                .and_then(|content| match content {
+                    ChatCompletionMessageContent::Text(text) => Some(text.clone()),
+                    ChatCompletionMessageContent::Parts(_) => None,
+                })
                .unwrap_or_default()
        }

@@ -361,7 +379,7 @@ mod tests {
                .as_ref()
                .and_then(|d| d.choices.first())
                .and_then(|c| c.delta.content.as_ref())
-                .map(|content| !content.is_empty())
+                .map(|content| !extract_text(content).is_empty())
                .unwrap_or(false)
        }
    }
@@ -402,7 +420,8 @@ mod tests {
            results[0].data.as_ref().unwrap().choices[0]
                .delta
                .content
-                .as_deref(),
+                .as_ref()
+                .map(extract_text),
            Some("Hello ")
        );

@@ -410,9 +429,7 @@ mod tests {
        let unjailed_content = &results[1].data.as_ref().unwrap().choices[0].delta.content;
        assert!(unjailed_content.is_some());
        assert!(
-            unjailed_content
-                .as_ref()
-                .unwrap()
+            extract_text(unjailed_content.as_ref().unwrap())
                .contains("<jail>This is jailed content</jail>")
        );

@@ -421,7 +438,8 @@ mod tests {
            results[2].data.as_ref().unwrap().choices[0]
                .delta
                .content
-                .as_deref(),
+                .as_ref()
+                .map(extract_text),
            Some(" World")
        );
    }
@@ -494,7 +512,8 @@ mod tests {
            results[0].data.as_ref().unwrap().choices[0]
                .delta
                .content
-                .as_deref(),
+                .as_ref()
+                .map(extract_text),
            Some("Normal text ")
        );

@@ -504,7 +523,7 @@ mod tests {
            .content
            .as_ref()
            .expect("Expected accumulated jailed content");
-        assert!(jailed.contains("<jail><TOOLCALL>Jailed content</jail>"));
+        assert!(extract_text(jailed).contains("<jail><TOOLCALL>Jailed content</jail>"));
    }

    #[tokio::test]
@@ -1298,11 +1317,11 @@ mod tests {
        assert!(content.is_some(), "Should have accumulated content");
        let content = content.as_ref().unwrap();
        assert!(
-            content.contains("<tool_call>"),
+            test_utils::extract_text(content).contains("<tool_call>"),
            "Should contain jail start marker in accumulated content"
        );
        assert!(
-            content.contains("incomplete_call"),
+            test_utils::extract_text(content).contains("incomplete_call"),
            "Should contain accumulated incomplete content"
        );
    }
@@ -1672,7 +1691,8 @@ mod tests {
            .as_ref()
            .unwrap();
        assert_eq!(
-            content, "Hello, world!",
+            extract_text(content),
+            "Hello, world!",
            "Content chunk should have 'Hello, world!'"
        );

@@ -1860,7 +1880,10 @@ mod tests {
                .as_ref()
                .and_then(|d| d.choices.first())
                .and_then(|c| c.delta.content.as_ref())
-                .map(|content| content.contains("Need to use function get_current_weather."))
+                .map(|content| {
+                    test_utils::extract_text(content)
+                        .contains("Need to use function get_current_weather.")
+                })
                .unwrap_or(false)
        });
        assert!(has_analysis_text, "Should contain extracted analysis text");
@@ -1912,7 +1935,7 @@ mod tests {
            for choice in data.choices {
                if let Some(content) = choice.delta.content {
                    assert!(
-                        !content.contains("<｜tool▁calls▁end｜>"),
+                        !test_utils::extract_text(&content).contains("<｜tool▁calls▁end｜>"),
                        "Should not contain deepseek special tokens in content"
                    );
                }
@@ -1986,7 +2009,7 @@ mod tests {
            for choice in data.choices {
                if let Some(content) = choice.delta.content {
                    assert!(
-                        !content.contains("<｜tool▁calls▁end｜>"),
+                        !test_utils::extract_text(&content).contains("<｜tool▁calls▁end｜>"),
                        "Should not contain deepseek special tokens in content"
                    );
                }
@@ -2184,7 +2207,8 @@ mod tests {
                    .and_then(|c| c.delta.content.as_ref())
            })
            .filter(|content| {
-                content.contains("<tool_call>") || content.contains("should not jail")
+                test_utils::extract_text(content).contains("<tool_call>")
+                    || test_utils::extract_text(content).contains("should not jail")
            })
            .collect();

@@ -2202,7 +2226,10 @@ mod tests {
                    .and_then(|d| d.choices.first())
                    .and_then(|c| c.delta.content.as_ref())
            })
-            .find(|content| content.contains("[[START]]") && content.contains("jailed content"));
+            .find(|content| {
+                test_utils::extract_text(content).contains("[[START]]")
+                    && test_utils::extract_text(content).contains("jailed content")
+            });

        assert!(
            jailed_chunk.is_some(),
@@ -2320,6 +2347,7 @@ mod tests {
 mod parallel_jail_tests {
    use super::tests::test_utils;
    use super::*;
+    use dynamo_async_openai::types::ChatCompletionMessageContent;
    use futures::StreamExt;
    use futures::stream;
    use serde_json::json;
@@ -2337,7 +2365,7 @@ mod parallel_jail_tests {
                    index: i as u32,
                    delta: ChatCompletionStreamResponseDelta {
                        role: Some(Role::Assistant),
-                        content: Some(content),
+                        content: Some(ChatCompletionMessageContent::Text(content)),
                        tool_calls: None,
                        function_call: None,
                        refusal: None,
@@ -2589,10 +2617,9 @@ mod parallel_jail_tests {
        let normal_text_before = results.iter().find(|r| {
            r.data.as_ref().is_some_and(|d| {
                d.choices.iter().any(|c| {
-                    c.delta
-                        .content
-                        .as_ref()
-                        .is_some_and(|content| content.contains("I'll check the weather"))
+                    c.delta.content.as_ref().is_some_and(|content| {
+                        test_utils::extract_text(content).contains("I'll check the weather")
+                    })
                })
            })
        });
@@ -2619,10 +2646,9 @@ mod parallel_jail_tests {
        let normal_text_after = results.iter().find(|r| {
            r.data.as_ref().is_some_and(|d| {
                d.choices.iter().any(|c| {
-                    c.delta
-                        .content
-                        .as_ref()
-                        .is_some_and(|content| content.contains("Let me get that information"))
+                    c.delta.content.as_ref().is_some_and(|content| {
+                        test_utils::extract_text(content).contains("Let me get that information")
+                    })
                })
            })
        });
@@ -2982,8 +3008,8 @@ mod parallel_jail_tests {
            r.data.as_ref().is_some_and(|d| {
                d.choices.iter().any(|c| {
                    c.delta.content.as_ref().is_some_and(|content| {
-                        content.contains("I'll help you")
-                            || content.contains("don't need any tools")
+                        test_utils::extract_text(content).contains("I'll help you")
+                            || test_utils::extract_text(content).contains("don't need any tools")
                    })
                })
            })

--- a/lib/llm/tests/test_reasoning_parser.rs
+++ b/lib/llm/tests/test_reasoning_parser.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0

-use dynamo_async_openai::types::{ChatChoiceStream, ChatCompletionStreamResponseDelta, Role};
+use dynamo_async_openai::types::{
+    ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role,
+};
 use dynamo_llm::preprocessor::OpenAIPreprocessor;
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
 use dynamo_runtime::protocols::annotated::Annotated;
 use futures::{StreamExt, stream};

+/// Helper to extract text from ChatCompletionMessageContent
+fn get_text(content: &ChatCompletionMessageContent) -> &str {
+    match content {
+        ChatCompletionMessageContent::Text(text) => text.as_str(),
+        ChatCompletionMessageContent::Parts(_) => "",
+    }
+}
+
 /// Helper function to create a mock chat response chunk
 fn create_mock_response_chunk(
    content: String,
@@ -17,7 +27,7 @@ fn create_mock_response_chunk(
        index: 0,
        delta: ChatCompletionStreamResponseDelta {
            role: Some(Role::Assistant),
-            content: Some(content),
+            content: Some(ChatCompletionMessageContent::Text(content)),
            tool_calls: None,
            function_call: None,
            refusal: None,
@@ -61,7 +71,7 @@ mod tests {
        match expected_content {
            Some(expected) => {
                assert_eq!(
-                    choice.delta.content.as_deref(),
+                    choice.delta.content.as_ref().map(get_text),
                    Some(expected),
                    "Content mismatch"
                );
@@ -69,7 +79,7 @@ mod tests {
            None => {
                assert!(
                    choice.delta.content.is_none()
-                        || choice.delta.content.as_ref().unwrap().is_empty(),
+                        || get_text(choice.delta.content.as_ref().unwrap()).is_empty(),
                    "Expected content to be None or empty, got: {:?}",
                    choice.delta.content
                );
@@ -260,7 +270,7 @@ mod tests {
            let output_choice = &output.data.as_ref().unwrap().choices[0];
            assert_choice(
                output_choice,
-                input_choice.delta.content.as_deref(),
+                input_choice.delta.content.as_ref().map(get_text),
                input_choice.delta.reasoning_content.as_deref(),
            );
        }
@@ -316,7 +326,8 @@ mod tests {
            "Should contain Mistral reasoning content"
        );
        assert!(
-            normal_content.contains("Let me think") || normal_content.contains("Here's my answer"),
+            get_text(normal_content).contains("Let me think")
+                || get_text(normal_content).contains("Here's my answer"),
            "Should contain normal content"
        );
    }
@@ -379,7 +390,7 @@ mod tests {

                    // Collect normal content
                    if let Some(ref content) = choice.delta.content {
-                        all_normal_content.push_str(content);
+                        all_normal_content.push_str(get_text(content));
                    }
                }
            }
@@ -450,8 +461,8 @@ mod tests {
            "Should contain Kimi reasoning content"
        );
        assert!(
-            normal_content.contains("Let me analyze")
-                || normal_content.contains("Here's my conclusion"),
+            get_text(normal_content).contains("Let me analyze")
+                || get_text(normal_content).contains("Here's my conclusion"),
            "Should contain normal content"
        );
    }
@@ -518,7 +529,7 @@ mod tests {

                    // Collect normal content
                    if let Some(ref content) = choice.delta.content {
-                        all_normal_content.push_str(content);
+                        all_normal_content.push_str(get_text(content));
                    }

                    // Check for tool calls
@@ -624,7 +635,7 @@ mod tests {
                        all_reasoning.push_str(reasoning);
                    }
                    if let Some(ref content) = choice.delta.content {
-                        all_normal_content.push_str(content);
+                        all_normal_content.push_str(get_text(content));
                    }
                    if let Some(ref tool_calls) = choice.delta.tool_calls
                        && !tool_calls.is_empty()

--- a/lib/llm/tests/test_streaming_tool_parsers.rs
+++ b/lib/llm/tests/test_streaming_tool_parsers.rs
@@ -26,7 +26,7 @@ across backends.

 */

-use dynamo_async_openai::types::{ChatChoiceStream, FinishReason};
+use dynamo_async_openai::types::{ChatChoiceStream, ChatCompletionMessageContent, FinishReason};
 use dynamo_llm::preprocessor::OpenAIPreprocessor;
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
 use dynamo_runtime::protocols::annotated::Annotated;
@@ -35,6 +35,13 @@ use std::pin::Pin;

 const DATA_ROOT_PATH: &str = "tests/data/";

+fn get_text(content: &ChatCompletionMessageContent) -> &str {
+    match content {
+        ChatCompletionMessageContent::Text(text) => text.as_str(),
+        ChatCompletionMessageContent::Parts(_) => "",
+    }
+}
+
 /// Test data structure containing expected results and stream data
 struct TestData {
    expected_normal_content: String,
@@ -230,7 +237,7 @@ fn aggregate_content_from_chunks(

                // Collect normal content
                if let Some(ref content) = choice.delta.content {
-                    normal_content.push_str(content);
+                    normal_content.push_str(get_text(content));
                }

                // Collect tool calls

--- a/lib/llm/tests/tool_choice.rs
+++ b/lib/llm/tests/tool_choice.rs
@@ -2,12 +2,21 @@
 // SPDX-License-Identifier: Apache-2.0

 use dynamo_async_openai::types::{
-    ChatCompletionNamedToolChoice, ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
-    ChatCompletionRequestUserMessageContent, ChatCompletionToolChoiceOption,
-    ChatCompletionToolType, CreateChatCompletionRequest, FunctionName,
+    ChatCompletionMessageContent, ChatCompletionNamedToolChoice, ChatCompletionRequestMessage,
+    ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent,
+    ChatCompletionToolChoiceOption, ChatCompletionToolType, CreateChatCompletionRequest,
+    FunctionName,
 };
 use dynamo_llm::protocols::common;
 use dynamo_llm::protocols::common::llm_backend::BackendOutput;
+
+/// Helper to extract text from ChatCompletionMessageContent
+fn get_text(content: &ChatCompletionMessageContent) -> &str {
+    match content {
+        ChatCompletionMessageContent::Text(text) => text.as_str(),
+        ChatCompletionMessageContent::Parts(_) => "",
+    }
+}
 use dynamo_llm::protocols::openai::DeltaGeneratorExt;
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;

@@ -153,7 +162,7 @@ async fn test_named_tool_choice_parses_json() {
        Some(dynamo_async_openai::types::FinishReason::Stop)
    );
    let delta = &choice.delta;
-    assert!(delta.content.is_none() || delta.content.as_deref() == Some(""));
+    assert!(delta.content.is_none() || delta.content.as_ref().map(get_text) == Some(""));
    let tool_calls = delta.tool_calls.as_ref().unwrap();

    assert_eq!(tool_calls.len(), 1);
@@ -195,7 +204,7 @@ async fn test_required_tool_choice_parses_json_array() {
        Some(dynamo_async_openai::types::FinishReason::ToolCalls)
    );
    let delta = &choice.delta;
-    assert!(delta.content.is_none() || delta.content.as_deref() == Some(""));
+    assert!(delta.content.is_none() || delta.content.as_ref().map(get_text) == Some(""));
    let tool_calls = delta.tool_calls.as_ref().unwrap();

    assert_eq!(tool_calls.len(), 2);
@@ -252,7 +261,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {

    // Jail stream behavior: if parsing fails, return accumulated content as-is
    // This matches marker-based FC behavior
-    assert_eq!(delta.content.as_deref(), Some("not-json"));
+    assert_eq!(delta.content.as_ref().map(get_text), Some("not-json"));
    assert!(delta.tool_calls.is_none());
 }

@@ -434,7 +443,7 @@ fn test_no_tool_choice_outputs_normal_text() {
        .expect("normal text");

    assert_eq!(
-        response.choices[0].delta.content.as_deref(),
+        response.choices[0].delta.content.as_ref().map(get_text),
        Some("Hello world")
    );
    assert!(response.choices[0].delta.tool_calls.is_none());