Unverified Commit 9f76d060 authored by Ayush Agarwal's avatar Ayush Agarwal Committed by GitHub
Browse files

feat: text to image vLLM Omni (#5912)


Signed-off-by: default avatarayushag <ayushag@nvidia.com>
parent d14d6ff4
......@@ -270,7 +270,7 @@ impl DeltaGenerator {
stop_reason: Option<dynamo_async_openai::types::StopReason>,
) -> NvCreateChatCompletionStreamResponse {
let delta = dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
content: text,
content: text.map(dynamo_async_openai::types::ChatCompletionMessageContent::Text),
function_call: None,
tool_calls: None,
role: if self.msg_counter == 0 {
......
......@@ -112,7 +112,9 @@ fn create_choice_stream(
index,
delta: ChatCompletionStreamResponseDelta {
role,
content: Some(content.to_string()),
content: Some(
dynamo_async_openai::types::ChatCompletionMessageContent::Text(content.to_string()),
),
tool_calls,
function_call: None,
refusal: None,
......@@ -533,23 +535,32 @@ impl JailedStream {
// Process each choice independently using the new architecture
for choice in &chat_response.choices {
if let Some(ref content) = choice.delta.content {
let starts_jailed = matches!(self.jail_mode, JailMode::Immediate { .. });
let choice_state = choice_states.get_or_create_state(choice.index, starts_jailed);
// Store metadata when any choice becomes jailed (first time only)
if !choice_state.is_jailed && self.should_start_jail(content)
&& last_annotated_id.is_none() {
last_annotated_id = response.id.clone();
last_annotated_event = response.event.clone();
last_annotated_comment = response.comment.clone();
}
// Jailing only applies to text content
let text_content = match content {
dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => Some(text.as_str()),
dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => None,
};
if let Some(text) = text_content {
let starts_jailed = matches!(self.jail_mode, JailMode::Immediate { .. });
let choice_state = choice_states.get_or_create_state(choice.index, starts_jailed);
// Store metadata when any choice becomes jailed (first time only)
if !choice_state.is_jailed && self.should_start_jail(text)
&& last_annotated_id.is_none() {
last_annotated_id = response.id.clone();
last_annotated_event = response.event.clone();
last_annotated_comment = response.comment.clone();
}
// Track actual stream finish reason in the choice state
choice_state.stream_finish_reason = choice.finish_reason;
// Track actual stream finish reason in the choice state
choice_state.stream_finish_reason = choice.finish_reason;
// Process this choice and get emissions
let emissions = choice_state.process_content(choice, content, &self).await;
all_emissions.extend(emissions);
// Process this choice and get emissions
let emissions = choice_state.process_content(choice, text, &self).await;
all_emissions.extend(emissions);
}
// For multimodal content, pass through unchanged (no jailing)
} else {
// Handle choices without content (e.g., final chunks with finish_reason)
// Only filter out if this choice was ever jailed and lacks role
......
......@@ -222,8 +222,20 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
.and_then(|choice| choice.message.content)
.unwrap_or_else(|| {
tracing::warn!("No choices in chat completion response, using empty content");
String::new()
dynamo_async_openai::types::ChatCompletionMessageContent::Text(String::new())
});
// Extract text from content (only handle text for responses API)
let text_content = match content_text {
dynamo_async_openai::types::ChatCompletionMessageContent::Text(text) => text,
dynamo_async_openai::types::ChatCompletionMessageContent::Parts(_) => {
tracing::warn!(
"Multimodal content in responses API not yet supported, using placeholder"
);
"[multimodal content]".to_string()
}
};
let message_id = format!("msg_{}", Uuid::new_v4().simple());
let response_id = format!("resp_{}", Uuid::new_v4().simple());
......@@ -232,7 +244,7 @@ impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
role: ResponseRole::Assistant,
status: OutputStatus::Completed,
content: vec![Content::OutputText(OutputText {
text: content_text,
text: text_content,
annotations: vec![],
})],
})];
......@@ -363,7 +375,11 @@ mod tests {
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: Some("This is a reply".into()),
content: Some(
dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"This is a reply".to_string(),
),
),
refusal: None,
tool_calls: None,
role: dynamo_async_openai::types::Role::Assistant,
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::ChatCompletionMessageContent;
use dynamo_llm::protocols::{
ContentProvider, DataStream,
codec::{Message, SseCodecError, create_message_stream},
......@@ -12,6 +13,13 @@ use dynamo_llm::protocols::{
};
use futures::StreamExt;
fn get_text(content: &ChatCompletionMessageContent) -> &str {
match content {
ChatCompletionMessageContent::Text(text) => text.as_str(),
ChatCompletionMessageContent::Parts(_) => "",
}
}
const CMPL_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/completions";
const CHAT_ROOT_PATH: &str = "tests/data/replays/meta/llama-3.1-8b-instruct/chat_completions";
......@@ -35,16 +43,17 @@ async fn test_openai_chat_stream() {
// todo: provide a cleaner way to extract the content from choices
assert_eq!(
result
.choices
.first()
.unwrap()
.message
.content
.clone()
.expect("there to be content"),
get_text(
result
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.expect("there to be content")
),
"Deep learning is a subfield of machine learning that involves the use of artificial"
.to_string()
);
}
......@@ -59,15 +68,17 @@ async fn test_openai_chat_edge_case_multi_line_data() {
.unwrap();
assert_eq!(
result
.choices
.first()
.unwrap()
.message
.content
.clone()
.expect("there to be content"),
"Deep learning".to_string()
get_text(
result
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.expect("there to be content")
),
"Deep learning"
);
}
......@@ -82,15 +93,17 @@ async fn test_openai_chat_edge_case_comments_per_response() {
.unwrap();
assert_eq!(
result
.choices
.first()
.unwrap()
.message
.content
.clone()
.expect("there to be content"),
"Deep learning".to_string()
get_text(
result
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.expect("there to be content")
),
"Deep learning"
);
}
......
......@@ -11,8 +11,8 @@ use dynamo_llm::perf::{RecordedStream, TimestampedResponse};
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
use dynamo_async_openai::types::{
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionStreamResponseDelta,
ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
ChatChoiceLogprobs, ChatChoiceStream, ChatCompletionMessageContent,
ChatCompletionStreamResponseDelta, ChatCompletionTokenLogprob, FinishReason, Role, TopLogprobs,
};
// Type aliases to simplify complex test data structures
......@@ -380,7 +380,7 @@ fn create_response_with_linear_probs(
let choice = ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(_content.to_string()),
content: Some(ChatCompletionMessageContent::Text(_content.to_string())),
#[expect(deprecated)]
function_call: None,
tool_calls: None,
......@@ -460,7 +460,7 @@ fn create_multi_choice_response(
ChatChoiceStream {
index: choice_idx as u32,
delta: ChatCompletionStreamResponseDelta {
content: Some("test".to_string()),
content: Some(ChatCompletionMessageContent::Text("test".to_string())),
#[expect(deprecated)]
function_call: None,
tool_calls: None,
......
......@@ -16,6 +16,15 @@ mod tests {
// Test utilities module - shared test infrastructure
pub(crate) mod test_utils {
use super::*;
use dynamo_async_openai::types::ChatCompletionMessageContent;
/// Helper to extract text from ChatCompletionMessageContent
pub fn extract_text(content: &ChatCompletionMessageContent) -> &str {
match content {
ChatCompletionMessageContent::Text(text) => text.as_str(),
ChatCompletionMessageContent::Parts(_) => "",
}
}
/// Helper function to create a mock chat response chunk
pub fn create_mock_response_chunk(
......@@ -27,7 +36,7 @@ mod tests {
index,
delta: ChatCompletionStreamResponseDelta {
role: Some(Role::Assistant),
content: Some(content),
content: Some(ChatCompletionMessageContent::Text(content)),
tool_calls: None,
function_call: None,
refusal: None,
......@@ -111,7 +120,7 @@ mod tests {
index,
delta: ChatCompletionStreamResponseDelta {
role: Some(Role::Assistant),
content: Some(content),
content: Some(ChatCompletionMessageContent::Text(content)),
tool_calls: None,
function_call: None,
refusal: None,
......@@ -154,7 +163,7 @@ mod tests {
index,
delta: ChatCompletionStreamResponseDelta {
role: Some(Role::Assistant),
content: Some(content),
content: Some(ChatCompletionMessageContent::Text(content)),
tool_calls: None,
function_call: None,
refusal: None,
......@@ -245,9 +254,11 @@ mod tests {
.expect("Expected content in result");
assert_eq!(
content, expected,
extract_text(content),
expected,
"Content mismatch: expected '{}', got '{}'",
expected, content
expected,
extract_text(content)
);
}
......@@ -301,7 +312,11 @@ mod tests {
{
assert!(
choice.delta.content.is_none()
|| choice.delta.content.as_ref().unwrap().is_empty(),
|| choice.delta.content.as_ref().is_none_or(|c| match c {
dynamo_async_openai::types::ChatCompletionMessageContent::Text(t) =>
t.is_empty(),
_ => false,
}),
"Expected no content but got: {:?}",
choice.delta.content
);
......@@ -326,7 +341,7 @@ mod tests {
.and_then(|d| d.choices.first())
.and_then(|c| c.delta.content.as_ref())
})
.cloned()
.map(extract_text)
.collect::<Vec<_>>()
.join("")
}
......@@ -338,7 +353,10 @@ mod tests {
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|c| c.delta.content.as_ref())
.cloned()
.and_then(|content| match content {
ChatCompletionMessageContent::Text(text) => Some(text.clone()),
ChatCompletionMessageContent::Parts(_) => None,
})
.unwrap_or_default()
}
......@@ -361,7 +379,7 @@ mod tests {
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|c| c.delta.content.as_ref())
.map(|content| !content.is_empty())
.map(|content| !extract_text(content).is_empty())
.unwrap_or(false)
}
}
......@@ -402,7 +420,8 @@ mod tests {
results[0].data.as_ref().unwrap().choices[0]
.delta
.content
.as_deref(),
.as_ref()
.map(extract_text),
Some("Hello ")
);
......@@ -410,9 +429,7 @@ mod tests {
let unjailed_content = &results[1].data.as_ref().unwrap().choices[0].delta.content;
assert!(unjailed_content.is_some());
assert!(
unjailed_content
.as_ref()
.unwrap()
extract_text(unjailed_content.as_ref().unwrap())
.contains("<jail>This is jailed content</jail>")
);
......@@ -421,7 +438,8 @@ mod tests {
results[2].data.as_ref().unwrap().choices[0]
.delta
.content
.as_deref(),
.as_ref()
.map(extract_text),
Some(" World")
);
}
......@@ -494,7 +512,8 @@ mod tests {
results[0].data.as_ref().unwrap().choices[0]
.delta
.content
.as_deref(),
.as_ref()
.map(extract_text),
Some("Normal text ")
);
......@@ -504,7 +523,7 @@ mod tests {
.content
.as_ref()
.expect("Expected accumulated jailed content");
assert!(jailed.contains("<jail><TOOLCALL>Jailed content</jail>"));
assert!(extract_text(jailed).contains("<jail><TOOLCALL>Jailed content</jail>"));
}
#[tokio::test]
......@@ -1298,11 +1317,11 @@ mod tests {
assert!(content.is_some(), "Should have accumulated content");
let content = content.as_ref().unwrap();
assert!(
content.contains("<tool_call>"),
test_utils::extract_text(content).contains("<tool_call>"),
"Should contain jail start marker in accumulated content"
);
assert!(
content.contains("incomplete_call"),
test_utils::extract_text(content).contains("incomplete_call"),
"Should contain accumulated incomplete content"
);
}
......@@ -1672,7 +1691,8 @@ mod tests {
.as_ref()
.unwrap();
assert_eq!(
content, "Hello, world!",
extract_text(content),
"Hello, world!",
"Content chunk should have 'Hello, world!'"
);
......@@ -1860,7 +1880,10 @@ mod tests {
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|c| c.delta.content.as_ref())
.map(|content| content.contains("Need to use function get_current_weather."))
.map(|content| {
test_utils::extract_text(content)
.contains("Need to use function get_current_weather.")
})
.unwrap_or(false)
});
assert!(has_analysis_text, "Should contain extracted analysis text");
......@@ -1912,7 +1935,7 @@ mod tests {
for choice in data.choices {
if let Some(content) = choice.delta.content {
assert!(
!content.contains("<|tool▁calls▁end|>"),
!test_utils::extract_text(&content).contains("<|tool▁calls▁end|>"),
"Should not contain deepseek special tokens in content"
);
}
......@@ -1986,7 +2009,7 @@ mod tests {
for choice in data.choices {
if let Some(content) = choice.delta.content {
assert!(
!content.contains("<|tool▁calls▁end|>"),
!test_utils::extract_text(&content).contains("<|tool▁calls▁end|>"),
"Should not contain deepseek special tokens in content"
);
}
......@@ -2184,7 +2207,8 @@ mod tests {
.and_then(|c| c.delta.content.as_ref())
})
.filter(|content| {
content.contains("<tool_call>") || content.contains("should not jail")
test_utils::extract_text(content).contains("<tool_call>")
|| test_utils::extract_text(content).contains("should not jail")
})
.collect();
......@@ -2202,7 +2226,10 @@ mod tests {
.and_then(|d| d.choices.first())
.and_then(|c| c.delta.content.as_ref())
})
.find(|content| content.contains("[[START]]") && content.contains("jailed content"));
.find(|content| {
test_utils::extract_text(content).contains("[[START]]")
&& test_utils::extract_text(content).contains("jailed content")
});
assert!(
jailed_chunk.is_some(),
......@@ -2320,6 +2347,7 @@ mod tests {
mod parallel_jail_tests {
use super::tests::test_utils;
use super::*;
use dynamo_async_openai::types::ChatCompletionMessageContent;
use futures::StreamExt;
use futures::stream;
use serde_json::json;
......@@ -2337,7 +2365,7 @@ mod parallel_jail_tests {
index: i as u32,
delta: ChatCompletionStreamResponseDelta {
role: Some(Role::Assistant),
content: Some(content),
content: Some(ChatCompletionMessageContent::Text(content)),
tool_calls: None,
function_call: None,
refusal: None,
......@@ -2589,10 +2617,9 @@ mod parallel_jail_tests {
let normal_text_before = results.iter().find(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
c.delta
.content
.as_ref()
.is_some_and(|content| content.contains("I'll check the weather"))
c.delta.content.as_ref().is_some_and(|content| {
test_utils::extract_text(content).contains("I'll check the weather")
})
})
})
});
......@@ -2619,10 +2646,9 @@ mod parallel_jail_tests {
let normal_text_after = results.iter().find(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
c.delta
.content
.as_ref()
.is_some_and(|content| content.contains("Let me get that information"))
c.delta.content.as_ref().is_some_and(|content| {
test_utils::extract_text(content).contains("Let me get that information")
})
})
})
});
......@@ -2982,8 +3008,8 @@ mod parallel_jail_tests {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
c.delta.content.as_ref().is_some_and(|content| {
content.contains("I'll help you")
|| content.contains("don't need any tools")
test_utils::extract_text(content).contains("I'll help you")
|| test_utils::extract_text(content).contains("don't need any tools")
})
})
})
......
// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::{ChatChoiceStream, ChatCompletionStreamResponseDelta, Role};
use dynamo_async_openai::types::{
ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta, Role,
};
use dynamo_llm::preprocessor::OpenAIPreprocessor;
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
use dynamo_runtime::protocols::annotated::Annotated;
use futures::{StreamExt, stream};
/// Helper to extract text from ChatCompletionMessageContent
fn get_text(content: &ChatCompletionMessageContent) -> &str {
match content {
ChatCompletionMessageContent::Text(text) => text.as_str(),
ChatCompletionMessageContent::Parts(_) => "",
}
}
/// Helper function to create a mock chat response chunk
fn create_mock_response_chunk(
content: String,
......@@ -17,7 +27,7 @@ fn create_mock_response_chunk(
index: 0,
delta: ChatCompletionStreamResponseDelta {
role: Some(Role::Assistant),
content: Some(content),
content: Some(ChatCompletionMessageContent::Text(content)),
tool_calls: None,
function_call: None,
refusal: None,
......@@ -61,7 +71,7 @@ mod tests {
match expected_content {
Some(expected) => {
assert_eq!(
choice.delta.content.as_deref(),
choice.delta.content.as_ref().map(get_text),
Some(expected),
"Content mismatch"
);
......@@ -69,7 +79,7 @@ mod tests {
None => {
assert!(
choice.delta.content.is_none()
|| choice.delta.content.as_ref().unwrap().is_empty(),
|| get_text(choice.delta.content.as_ref().unwrap()).is_empty(),
"Expected content to be None or empty, got: {:?}",
choice.delta.content
);
......@@ -260,7 +270,7 @@ mod tests {
let output_choice = &output.data.as_ref().unwrap().choices[0];
assert_choice(
output_choice,
input_choice.delta.content.as_deref(),
input_choice.delta.content.as_ref().map(get_text),
input_choice.delta.reasoning_content.as_deref(),
);
}
......@@ -316,7 +326,8 @@ mod tests {
"Should contain Mistral reasoning content"
);
assert!(
normal_content.contains("Let me think") || normal_content.contains("Here's my answer"),
get_text(normal_content).contains("Let me think")
|| get_text(normal_content).contains("Here's my answer"),
"Should contain normal content"
);
}
......@@ -379,7 +390,7 @@ mod tests {
// Collect normal content
if let Some(ref content) = choice.delta.content {
all_normal_content.push_str(content);
all_normal_content.push_str(get_text(content));
}
}
}
......@@ -450,8 +461,8 @@ mod tests {
"Should contain Kimi reasoning content"
);
assert!(
normal_content.contains("Let me analyze")
|| normal_content.contains("Here's my conclusion"),
get_text(normal_content).contains("Let me analyze")
|| get_text(normal_content).contains("Here's my conclusion"),
"Should contain normal content"
);
}
......@@ -518,7 +529,7 @@ mod tests {
// Collect normal content
if let Some(ref content) = choice.delta.content {
all_normal_content.push_str(content);
all_normal_content.push_str(get_text(content));
}
// Check for tool calls
......@@ -624,7 +635,7 @@ mod tests {
all_reasoning.push_str(reasoning);
}
if let Some(ref content) = choice.delta.content {
all_normal_content.push_str(content);
all_normal_content.push_str(get_text(content));
}
if let Some(ref tool_calls) = choice.delta.tool_calls
&& !tool_calls.is_empty()
......
......@@ -26,7 +26,7 @@ across backends.
*/
use dynamo_async_openai::types::{ChatChoiceStream, FinishReason};
use dynamo_async_openai::types::{ChatChoiceStream, ChatCompletionMessageContent, FinishReason};
use dynamo_llm::preprocessor::OpenAIPreprocessor;
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
use dynamo_runtime::protocols::annotated::Annotated;
......@@ -35,6 +35,13 @@ use std::pin::Pin;
const DATA_ROOT_PATH: &str = "tests/data/";
fn get_text(content: &ChatCompletionMessageContent) -> &str {
match content {
ChatCompletionMessageContent::Text(text) => text.as_str(),
ChatCompletionMessageContent::Parts(_) => "",
}
}
/// Test data structure containing expected results and stream data
struct TestData {
expected_normal_content: String,
......@@ -230,7 +237,7 @@ fn aggregate_content_from_chunks(
// Collect normal content
if let Some(ref content) = choice.delta.content {
normal_content.push_str(content);
normal_content.push_str(get_text(content));
}
// Collect tool calls
......
......@@ -2,12 +2,21 @@
// SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::{
ChatCompletionNamedToolChoice, ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
ChatCompletionRequestUserMessageContent, ChatCompletionToolChoiceOption,
ChatCompletionToolType, CreateChatCompletionRequest, FunctionName,
ChatCompletionMessageContent, ChatCompletionNamedToolChoice, ChatCompletionRequestMessage,
ChatCompletionRequestUserMessage, ChatCompletionRequestUserMessageContent,
ChatCompletionToolChoiceOption, ChatCompletionToolType, CreateChatCompletionRequest,
FunctionName,
};
use dynamo_llm::protocols::common;
use dynamo_llm::protocols::common::llm_backend::BackendOutput;
/// Helper to extract text from ChatCompletionMessageContent
fn get_text(content: &ChatCompletionMessageContent) -> &str {
match content {
ChatCompletionMessageContent::Text(text) => text.as_str(),
ChatCompletionMessageContent::Parts(_) => "",
}
}
use dynamo_llm::protocols::openai::DeltaGeneratorExt;
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionRequest;
......@@ -153,7 +162,7 @@ async fn test_named_tool_choice_parses_json() {
Some(dynamo_async_openai::types::FinishReason::Stop)
);
let delta = &choice.delta;
assert!(delta.content.is_none() || delta.content.as_deref() == Some(""));
assert!(delta.content.is_none() || delta.content.as_ref().map(get_text) == Some(""));
let tool_calls = delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 1);
......@@ -195,7 +204,7 @@ async fn test_required_tool_choice_parses_json_array() {
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
);
let delta = &choice.delta;
assert!(delta.content.is_none() || delta.content.as_deref() == Some(""));
assert!(delta.content.is_none() || delta.content.as_ref().map(get_text) == Some(""));
let tool_calls = delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 2);
......@@ -252,7 +261,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {
// Jail stream behavior: if parsing fails, return accumulated content as-is
// This matches marker-based FC behavior
assert_eq!(delta.content.as_deref(), Some("not-json"));
assert_eq!(delta.content.as_ref().map(get_text), Some("not-json"));
assert!(delta.tool_calls.is_none());
}
......@@ -434,7 +443,7 @@ fn test_no_tool_choice_outputs_normal_text() {
.expect("normal text");
assert_eq!(
response.choices[0].delta.content.as_deref(),
response.choices[0].delta.content.as_ref().map(get_text),
Some("Hello world")
);
assert!(response.choices[0].delta.tool_calls.is_none());
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment