Unverified Commit 2887cd1c authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)

parent d6136f4a
......@@ -214,50 +214,9 @@ pub struct AgentHints {
pub latency_sensitivity: Option<f64>,
}
/// Anthropic-style cache control hint for prefix pinning with TTL.
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
pub struct CacheControl {
#[serde(rename = "type")]
pub control_type: CacheControlType,
/// TTL as seconds (integer) or shorthand ("5m" = 300s, "1h" = 3600s). Clamped to [300, 3600].
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ttl: Option<String>,
}
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum CacheControlType {
#[default]
Ephemeral,
#[serde(other)]
Unknown,
}
const MIN_TTL_SECONDS: u64 = 300;
const MAX_TTL_SECONDS: u64 = 3600;
impl CacheControl {
/// Parse TTL string to seconds, clamped to [300, 3600].
///
/// Accepts integer seconds ("120", "600") or shorthand ("5m", "1h").
/// Values below 300 are clamped to 300; values above 3600 are clamped to 3600.
/// Unrecognized strings default to 300s.
pub fn ttl_seconds(&self) -> u64 {
let raw = match self.ttl.as_deref() {
None => return MIN_TTL_SECONDS,
Some("5m") => 300,
Some("1h") => 3600,
Some(other) => match other.parse::<u64>() {
Ok(secs) => secs,
Err(_) => {
tracing::warn!("Unrecognized TTL '{}', defaulting to 300s", other);
return MIN_TTL_SECONDS;
}
},
};
raw.clamp(MIN_TTL_SECONDS, MAX_TTL_SECONDS)
}
}
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
pub use dynamo_async_openai::types::anthropic::{CacheControl, CacheControlType};
impl Default for NvExt {
fn default() -> Self {
......
......@@ -696,8 +696,8 @@ pub fn chat_completion_to_response(
nv_resp: NvCreateChatCompletionResponse,
params: &ResponseParams,
) -> Result<NvResponse, anyhow::Error> {
let chat_resp = nv_resp;
let nvext = chat_resp.nvext.clone();
let nvext = nv_resp.nvext.clone();
let chat_resp = nv_resp.inner;
let message_id = format!("msg_{}", Uuid::new_v4().simple());
let response_id = format!("resp_{}", Uuid::new_v4().simple());
......@@ -1163,6 +1163,7 @@ mod tests {
fn test_into_nvresponse_from_chat_response() {
let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
......@@ -1189,6 +1190,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
},
nvext: None,
};
......@@ -1218,6 +1220,7 @@ mod tests {
fn test_response_with_tool_calls() {
let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
......@@ -1247,6 +1250,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
},
nvext: None,
};
......@@ -1432,6 +1436,7 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
......@@ -1440,6 +1445,7 @@ thinking
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1463,6 +1469,7 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
......@@ -1471,6 +1478,7 @@ thinking
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1489,6 +1497,7 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
......@@ -1497,6 +1506,7 @@ thinking
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1555,6 +1565,7 @@ thinking
ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason,
};
NvCreateChatCompletionResponse {
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![ChatChoice {
index: 0,
#[allow(deprecated)]
......@@ -1578,6 +1589,7 @@ thinking
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
}
}
......
......@@ -183,7 +183,7 @@ impl ResponseStreamConverter {
let mut events = Vec::new();
// Capture usage stats from the final chunk (sent when stream_options.include_usage=true)
if let Some(ref u) = chunk.usage {
if let Some(ref u) = chunk.inner.usage {
self.usage = Some(ResponseUsage {
input_tokens: u.prompt_tokens,
input_tokens_details: InputTokenDetails {
......@@ -205,7 +205,7 @@ impl ResponseStreamConverter {
});
}
for choice in &chunk.choices {
for choice in &chunk.inner.choices {
let delta = &choice.delta;
// Handle text content deltas — extract text from the enum
......@@ -685,6 +685,7 @@ mod tests {
) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
......@@ -714,6 +715,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None,
}
}
......@@ -721,6 +723,7 @@ mod tests {
fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
......@@ -742,6 +745,7 @@ mod tests {
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None,
}
}
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::ChatCompletionMessageContent;
use dynamo_async_openai::types::{
ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta,
CreateChatCompletionStreamResponse, Role,
};
use dynamo_llm::protocols::{
ContentProvider, DataStream,
Annotated, ContentProvider, DataStream,
codec::{Message, SseCodecError, create_message_stream},
openai::{
ParsingOptions,
chat_completions::{NvCreateChatCompletionResponse, aggregator::ChatCompletionAggregator},
chat_completions::{
NvCreateChatCompletionResponse, NvCreateChatCompletionStreamResponse,
aggregator::ChatCompletionAggregator,
},
completions::NvCreateCompletionResponse,
},
};
......@@ -45,6 +51,7 @@ async fn test_openai_chat_stream() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -70,6 +77,7 @@ async fn test_openai_chat_edge_case_multi_line_data() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -95,6 +103,7 @@ async fn test_openai_chat_edge_case_comments_per_response() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -138,3 +147,113 @@ async fn test_openai_cmpl_stream() {
" This is a question that is often asked by those outside of AI research and development"
);
}
// ===================================
// nvext aggregation regression tests
// ===================================
#[allow(deprecated)]
fn make_stream_delta(
content: Option<&str>,
nvext: Option<serde_json::Value>,
) -> Annotated<NvCreateChatCompletionStreamResponse> {
Annotated::from_data(NvCreateChatCompletionStreamResponse {
inner: CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: if let Some(text) = content {
vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text(text.to_string())),
function_call: None,
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}]
} else {
vec![]
},
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext,
})
}
/// Verify that nvext set on a stream delta survives aggregation into the final response.
#[tokio::test]
async fn test_nvext_passthrough_aggregation() {
let nvext_value = serde_json::json!({"custom_field": "test_value"});
let deltas = vec![
make_stream_delta(Some("Hello"), None),
make_stream_delta(Some(" world"), Some(nvext_value.clone())),
make_stream_delta(Some("!"), None),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(nvext_value));
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.unwrap()
),
"Hello world!"
);
}
/// Verify that the last non-None nvext wins when multiple deltas carry nvext.
#[tokio::test]
async fn test_nvext_last_value_wins() {
let first_nvext = serde_json::json!({"version": 1});
let last_nvext = serde_json::json!({"version": 2});
let deltas = vec![
make_stream_delta(Some("a"), Some(first_nvext)),
make_stream_delta(Some("b"), None),
make_stream_delta(Some("c"), Some(last_nvext.clone())),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(last_nvext));
}
/// Verify that nvext remains None when no delta carries it.
#[tokio::test]
async fn test_nvext_none_when_absent() {
let deltas = vec![make_stream_delta(Some("hello"), None)];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, None);
}
......@@ -397,6 +397,7 @@ fn create_response_with_linear_probs(
};
NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -405,6 +406,7 @@ fn create_response_with_linear_probs(
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None,
}
}
......@@ -479,6 +481,7 @@ fn create_multi_choice_response(
.collect();
NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices,
created: 1234567890,
......@@ -487,6 +490,7 @@ fn create_multi_choice_response(
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None,
}
}
......@@ -192,7 +192,7 @@ async fn postprocessor_parsing_stream_replays_interval_20_fixture() {
continue;
};
for choice in &output_data.choices {
for choice in &output_data.inner.choices {
if let Some(reasoning_content) = &choice.delta.reasoning_content {
reasoning.push_str(reasoning_content);
}
......
......@@ -48,6 +48,7 @@ mod tests {
};
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -56,6 +57,7 @@ mod tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -89,6 +91,7 @@ mod tests {
};
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -97,6 +100,7 @@ mod tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -134,6 +138,7 @@ mod tests {
};
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -142,6 +147,7 @@ mod tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -180,6 +186,7 @@ mod tests {
.collect();
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices,
created: 1234567890,
......@@ -188,6 +195,7 @@ mod tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -226,6 +234,7 @@ mod tests {
.collect();
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices,
created: 1234567890,
......@@ -234,6 +243,7 @@ mod tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -254,7 +264,7 @@ mod tests {
let content = result
.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
.expect("Expected content in result");
......@@ -276,7 +286,7 @@ mod tests {
let tool_calls = result
.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.tool_calls.as_ref())
.expect("Expected tool calls in result");
......@@ -313,7 +323,7 @@ mod tests {
#[allow(dead_code)]
pub fn assert_empty_emission(result: &Annotated<NvCreateChatCompletionStreamResponse>) {
if let Some(data) = &result.data
&& let Some(choice) = data.choices.first()
&& let Some(choice) = data.inner.choices.first()
{
assert!(
choice.delta.content.is_none()
......@@ -343,7 +353,7 @@ mod tests {
.filter_map(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
})
.map(extract_text)
......@@ -356,7 +366,7 @@ mod tests {
result
.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
.and_then(|content| match content {
ChatCompletionMessageContent::Text(text) => Some(text.clone()),
......@@ -370,7 +380,7 @@ mod tests {
result
.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.tool_calls.as_ref())
.map(|tc| !tc.is_empty())
.unwrap_or(false)
......@@ -382,7 +392,7 @@ mod tests {
result
.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
.map(|content| !extract_text(content).is_empty())
.unwrap_or(false)
......@@ -422,7 +432,7 @@ mod tests {
// First chunk should pass through
assert_eq!(
results[0].data.as_ref().unwrap().choices[0]
results[0].data.as_ref().unwrap().inner.choices[0]
.delta
.content
.as_ref()
......@@ -431,7 +441,9 @@ mod tests {
);
// When jail ends, accumulated content should be released
let unjailed_content = &results[1].data.as_ref().unwrap().choices[0].delta.content;
let unjailed_content = &results[1].data.as_ref().unwrap().inner.choices[0]
.delta
.content;
assert!(unjailed_content.is_some());
assert!(
extract_text(unjailed_content.as_ref().unwrap())
......@@ -440,7 +452,7 @@ mod tests {
// Last chunk should pass through normally
assert_eq!(
results[2].data.as_ref().unwrap().choices[0]
results[2].data.as_ref().unwrap().inner.choices[0]
.delta
.content
.as_ref()
......@@ -476,7 +488,7 @@ mod tests {
// Check if tool calls were parsed
if let Some(last_result) = results.last()
&& let Some(ref response_data) = last_result.data
&& let Some(ref tool_calls) = response_data.choices[0].delta.tool_calls
&& let Some(ref tool_calls) = response_data.inner.choices[0].delta.tool_calls
{
assert!(!tool_calls.as_slice().is_empty());
assert_eq!(
......@@ -514,7 +526,7 @@ mod tests {
// First chunk should pass through
assert_eq!(
results[0].data.as_ref().unwrap().choices[0]
results[0].data.as_ref().unwrap().inner.choices[0]
.delta
.content
.as_ref()
......@@ -523,7 +535,7 @@ mod tests {
);
// Second chunk should contain the accumulated jailed content
let jailed = results[1].data.as_ref().unwrap().choices[0]
let jailed = results[1].data.as_ref().unwrap().inner.choices[0]
.delta
.content
.as_ref()
......@@ -1226,7 +1238,7 @@ mod tests {
.find(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.map(|c| c.delta.tool_calls.is_some())
.unwrap_or(false)
})
......@@ -1247,7 +1259,7 @@ mod tests {
);
// Verify tool call was parsed correctly
let tool_calls = &tool_call_chunk.data.as_ref().unwrap().choices[0]
let tool_calls = &tool_call_chunk.data.as_ref().unwrap().inner.choices[0]
.delta
.tool_calls;
assert!(tool_calls.is_some(), "Should have tool calls");
......@@ -1318,20 +1330,20 @@ mod tests {
// Verify inner response metadata carries forward real stream values (not placeholders)
let inner = accumulated_chunk.data.as_ref().unwrap();
assert_eq!(
inner.id, "test-id",
inner.inner.id, "test-id",
"Inner response id should carry forward from real stream chunks, not be 'stream-end'"
);
assert_eq!(
inner.model, "test-model",
inner.inner.model, "test-model",
"Inner response model should carry forward from real stream chunks, not be 'unknown'"
);
assert_eq!(
inner.created, 1234567890,
inner.inner.created, 1234567890,
"Inner response created should carry forward from real stream chunks, not be 0"
);
// Verify accumulated content is returned
let content = &inner.choices[0].delta.content;
let content = &inner.inner.choices[0].delta.content;
assert!(content.is_some(), "Should have accumulated content");
let content = content.as_ref().unwrap();
assert!(
......@@ -1379,7 +1391,7 @@ mod tests {
.find(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.map(|c| c.delta.tool_calls.is_some())
.unwrap_or(false)
})
......@@ -1544,7 +1556,7 @@ mod tests {
let choice_1_chunks: Vec<_> = results
.iter()
.filter_map(|r| r.data.as_ref())
.flat_map(|d| &d.choices)
.flat_map(|d| &d.inner.choices)
.filter(|c| c.index == 1 && c.delta.content.is_some())
.collect();
......@@ -1558,7 +1570,7 @@ mod tests {
let choice_0_tool_calls: Vec<_> = results
.iter()
.filter_map(|r| r.data.as_ref())
.flat_map(|d| &d.choices)
.flat_map(|d| &d.inner.choices)
.filter(|c| c.index == 0 && c.finish_reason == Some(FinishReason::ToolCalls))
.collect();
......@@ -1571,7 +1583,7 @@ mod tests {
let choice_2_tool_calls: Vec<_> = results
.iter()
.filter_map(|r| r.data.as_ref())
.flat_map(|d| &d.choices)
.flat_map(|d| &d.inner.choices)
.filter(|c| c.index == 2 && c.finish_reason == Some(FinishReason::ToolCalls))
.collect();
......@@ -1614,7 +1626,7 @@ mod tests {
let mut tool_call_responses: Vec<_> = results
.iter()
.filter_map(|r| r.data.as_ref())
.flat_map(|d| &d.choices)
.flat_map(|d| &d.inner.choices)
.filter(|c| c.finish_reason == Some(FinishReason::ToolCalls))
.collect();
......@@ -1659,7 +1671,7 @@ mod tests {
let run_responses: Vec<_> = run_results
.iter()
.filter_map(|r| r.data.as_ref())
.flat_map(|d| &d.choices)
.flat_map(|d| &d.inner.choices)
.filter(|c| c.finish_reason == Some(FinishReason::ToolCalls))
.collect();
......@@ -1683,8 +1695,8 @@ mod tests {
// Modify the inner data to be a usage-only chunk
if let Some(ref mut data) = usage_chunk.data {
data.choices.clear();
data.usage = Some(CompletionUsage {
data.inner.choices.clear();
data.inner.usage = Some(CompletionUsage {
prompt_tokens: 11,
completion_tokens: 3,
total_tokens: 14,
......@@ -1703,7 +1715,7 @@ mod tests {
assert_eq!(results.len(), 2, "Should have exactly 2 chunks");
// First chunk should be content chunk
let content = results[0].data.as_ref().unwrap().choices[0]
let content = results[0].data.as_ref().unwrap().inner.choices[0]
.delta
.content
.as_ref()
......@@ -1716,10 +1728,17 @@ mod tests {
// Second chunk should be usage-only chunk
assert!(
results[1].data.as_ref().unwrap().choices.is_empty(),
results[1].data.as_ref().unwrap().inner.choices.is_empty(),
"Usage chunk should have no choices"
);
let usage = results[1].data.as_ref().unwrap().usage.as_ref().unwrap();
let usage = results[1]
.data
.as_ref()
.unwrap()
.inner
.usage
.as_ref()
.unwrap();
assert_eq!(usage.prompt_tokens, 11);
assert_eq!(usage.completion_tokens, 3);
assert_eq!(usage.total_tokens, 14);
......@@ -1896,7 +1915,7 @@ mod tests {
let has_analysis_text = results.iter().any(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
.map(|content| {
test_utils::extract_text(content)
......@@ -1950,7 +1969,7 @@ mod tests {
let Some(data) = result.data else {
continue;
};
for choice in data.choices {
for choice in data.inner.choices {
if let Some(content) = choice.delta.content {
assert!(
!test_utils::extract_text(&content).contains("<|tool▁calls▁end|>"),
......@@ -2024,7 +2043,7 @@ mod tests {
let Some(data) = result.data else {
continue;
};
for choice in data.choices {
for choice in data.inner.choices {
if let Some(content) = choice.delta.content {
assert!(
!test_utils::extract_text(&content).contains("<|tool▁calls▁end|>"),
......@@ -2221,7 +2240,7 @@ mod tests {
.filter_map(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
})
.filter(|content| {
......@@ -2241,7 +2260,7 @@ mod tests {
.filter_map(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref())
})
.find(|content| {
......@@ -2264,7 +2283,7 @@ mod tests {
.filter(|r| {
r.data
.as_ref()
.and_then(|d| d.choices.first())
.and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.tool_calls.as_ref())
.map(|tc| !tc.is_empty())
.unwrap_or(false)
......@@ -2397,6 +2416,7 @@ mod parallel_jail_tests {
.collect();
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices,
created: 1234567890,
......@@ -2405,6 +2425,7 @@ mod parallel_jail_tests {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -2428,7 +2449,7 @@ mod parallel_jail_tests {
.filter(|r| {
r.data
.as_ref()
.is_some_and(|d| d.choices.iter().any(|c| c.delta.tool_calls.is_some()))
.is_some_and(|d| d.inner.choices.iter().any(|c| c.delta.tool_calls.is_some()))
})
.collect();
......@@ -2441,7 +2462,7 @@ mod parallel_jail_tests {
let mut all_tool_calls = Vec::new();
for result in &tool_call_results {
if let Some(ref data) = result.data {
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref tool_calls) = choice.delta.tool_calls {
all_tool_calls.extend(tool_calls.iter());
}
......@@ -2635,7 +2656,7 @@ mod parallel_jail_tests {
// Should have normal text before tool calls
let normal_text_before = results.iter().find(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
d.inner.choices.iter().any(|c| {
c.delta.content.as_ref().is_some_and(|content| {
test_utils::extract_text(content).contains("I'll check the weather")
})
......@@ -2664,7 +2685,7 @@ mod parallel_jail_tests {
// Should have normal text after tool calls
let normal_text_after = results.iter().find(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
d.inner.choices.iter().any(|c| {
c.delta.content.as_ref().is_some_and(|content| {
test_utils::extract_text(content).contains("Let me get that information")
})
......@@ -2705,7 +2726,8 @@ mod parallel_jail_tests {
.iter()
.map(|r| {
r.data.as_ref().map_or(0, |d| {
d.choices
d.inner
.choices
.iter()
.map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
.sum::<usize>()
......@@ -2795,7 +2817,8 @@ mod parallel_jail_tests {
.iter()
.map(|r| {
r.data.as_ref().map_or(0, |d| {
d.choices
d.inner
.choices
.iter()
.map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
.sum::<usize>()
......@@ -2865,7 +2888,8 @@ mod parallel_jail_tests {
.iter()
.map(|r| {
r.data.as_ref().map_or(0, |d| {
d.choices
d.inner
.choices
.iter()
.map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
.sum::<usize>()
......@@ -2881,14 +2905,14 @@ mod parallel_jail_tests {
.filter(|r| {
r.data
.as_ref()
.is_some_and(|d| d.choices.iter().any(|c| c.delta.tool_calls.is_some()))
.is_some_and(|d| d.inner.choices.iter().any(|c| c.delta.tool_calls.is_some()))
})
.collect();
if let Some(result) = tool_call_results.first()
&& let Some(ref data) = result.data
{
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref tool_calls) = choice.delta.tool_calls {
for tool_call in tool_calls {
if let Some(ref function) = tool_call.function
......@@ -2943,7 +2967,8 @@ mod parallel_jail_tests {
.iter()
.map(|r| {
r.data.as_ref().map_or(0, |d| {
d.choices
d.inner
.choices
.iter()
.map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
.sum::<usize>()
......@@ -2990,7 +3015,8 @@ mod parallel_jail_tests {
// Should try to parse whatever content was accumulated
let has_some_content = results.iter().any(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices
d.inner
.choices
.iter()
.any(|c| c.delta.content.is_some() || c.delta.tool_calls.is_some())
})
......@@ -3025,7 +3051,7 @@ mod parallel_jail_tests {
// Should have normal text content but no tool calls
let has_normal_text = results.iter().any(|r| {
r.data.as_ref().is_some_and(|d| {
d.choices.iter().any(|c| {
d.inner.choices.iter().any(|c| {
c.delta.content.as_ref().is_some_and(|content| {
test_utils::extract_text(content).contains("I'll help you")
|| test_utils::extract_text(content).contains("don't need any tools")
......@@ -3040,7 +3066,8 @@ mod parallel_jail_tests {
.iter()
.map(|r| {
r.data.as_ref().map_or(0, |d| {
d.choices
d.inner
.choices
.iter()
.map(|c| c.delta.tool_calls.as_ref().map_or(0, |tc| tc.len()))
.sum::<usize>()
......
......@@ -39,6 +39,7 @@ fn create_mock_response_chunk(
};
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
......@@ -47,6 +48,7 @@ fn create_mock_response_chunk(
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -125,7 +127,7 @@ mod tests {
let mut all_content = String::new();
while let Some(item) = output_stream.next().await {
if let Some(ref data) = item.data {
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
......@@ -177,15 +179,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>This"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("This"));
// Chunk 1: " is reasoning content"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0];
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" is reasoning content"));
// Chunk 2: "</think> Here's my answer."
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0];
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, Some(" Here's my answer."), None);
}
......@@ -223,15 +225,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>Only"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("Only"));
// Chunk 1: " reasoning"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0];
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" reasoning"));
// Chunk 2: " here</think>"
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0];
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, None, Some(" here"));
}
......@@ -266,7 +268,7 @@ mod tests {
// Verify that only normal content is present
assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(
output_choice,
Some("Just normal text without reasoning tags."),
......@@ -304,8 +306,8 @@ mod tests {
assert_eq!(output_chunks.len(), input_chunks.len());
for (input, output) in input_chunks.iter().zip(output_chunks.iter()) {
let input_choice = &input.data.as_ref().unwrap().choices[0];
let output_choice = &output.data.as_ref().unwrap().choices[0];
let input_choice = &input.data.as_ref().unwrap().inner.choices[0];
let output_choice = &output.data.as_ref().unwrap().inner.choices[0];
assert_choice(
output_choice,
input_choice.delta.content.as_ref().map(get_text),
......@@ -345,7 +347,7 @@ mod tests {
// Verify that Mistral-style reasoning is parsed correctly
assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert!(
output_choice.delta.reasoning_content.is_some(),
......@@ -422,7 +424,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
......@@ -574,7 +576,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
......@@ -685,7 +687,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref data) = chunk.data {
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
......@@ -782,7 +784,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
}
......
......@@ -107,6 +107,7 @@ fn load_test_data(file_path: &str) -> TestData {
.expect("Failed to parse choices");
let response = NvCreateChatCompletionStreamResponse {
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: id.clone(),
choices,
created: 1234567890,
......@@ -115,6 +116,7 @@ fn load_test_data(file_path: &str) -> TestData {
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -231,7 +233,7 @@ fn aggregate_content_from_chunks(
for chunk in chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
reasoning_content.push_str(reasoning);
......@@ -279,7 +281,7 @@ fn validate_finish_reason(
// Count finish_reason occurrences and track position
for (idx, chunk) in chunks.iter().enumerate() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
if let Some(reason) = choice.finish_reason {
finish_reason_count += 1;
last_chunk_index = Some(idx);
......
......@@ -241,12 +241,12 @@ async fn test_streaming_without_usage() {
for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Chunk {} should have usage: None when stream_options not set",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Chunk {} should have choices",
i
);
......@@ -286,12 +286,12 @@ async fn test_streaming_with_usage_compliance() {
for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Content chunk {} should have usage: None",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Content chunk {} should have choices",
i
);
......@@ -301,15 +301,15 @@ async fn test_streaming_with_usage_compliance() {
// Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data {
assert!(
final_response.choices.is_empty(),
final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array"
);
assert!(
final_response.usage.is_some(),
final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics"
);
let usage = final_response.usage.as_ref().unwrap();
let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens, 3,
"Should have 3 completion tokens"
......@@ -359,18 +359,18 @@ async fn test_streaming_with_continuous_usage() {
for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_some(),
response.inner.usage.is_some(),
"Content chunk {} should have usage: Some",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Content chunk {} should have choices",
i
);
// Verify usage counts are properly accumulated for each chunk
let usage = response.usage.as_ref().unwrap();
let usage = response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens,
i as u32 + 1,
......@@ -392,15 +392,15 @@ async fn test_streaming_with_continuous_usage() {
// Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data {
assert!(
final_response.choices.is_empty(),
final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array"
);
assert!(
final_response.usage.is_some(),
final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics"
);
let usage = final_response.usage.as_ref().unwrap();
let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens, 3,
"Should have 3 completion tokens"
......@@ -464,7 +464,7 @@ async fn test_streaming_with_usage_false() {
for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Chunk {} should have usage: None when include_usage is false",
i
);
......@@ -560,7 +560,7 @@ async fn test_nonstreaming_has_usage_field() {
// Aggregate the streaming chunks into a single non-streaming response
// This simulates what the HTTP service does for non-streaming requests
let result = dynamo_async_openai::types::CreateChatCompletionResponse::from_annotated_stream(
let result = dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionResponse::from_annotated_stream(
transformed_stream,
ParsingOptions::default(),
)
......@@ -570,12 +570,12 @@ async fn test_nonstreaming_has_usage_field() {
let response = result.unwrap();
assert!(
response.usage.is_some(),
response.inner.usage.is_some(),
"Non-streaming chat completion response MUST have a usage field populated. \
This is required for OpenAI API compliance."
);
let usage = response.usage.unwrap();
let usage = response.inner.usage.unwrap();
// Verify usage contains valid token counts
// In our mock, we generated 3 tokens (from the 3 backend outputs)
......@@ -725,7 +725,11 @@ async fn test_chat_streaming_with_cached_tokens_propagation() {
assert_eq!(chunks.len(), 4, "Should have 3 content + 1 usage chunk");
if let Some(final_resp) = &chunks[3].data {
let usage = final_resp.usage.as_ref().expect("Usage must be present");
let usage = final_resp
.inner
.usage
.as_ref()
.expect("Usage must be present");
let cached = usage
.prompt_tokens_details
.as_ref()
......
......@@ -157,7 +157,7 @@ async fn test_named_tool_choice_parses_json() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0];
let choice = &response.inner.choices[0];
assert_eq!(
choice.finish_reason,
......@@ -199,7 +199,7 @@ async fn test_required_tool_choice_parses_json_array() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0];
let choice = &response.inner.choices[0];
assert_eq!(
choice.finish_reason,
......@@ -259,7 +259,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let delta = &response.choices[0].delta;
let delta = &response.inner.choices[0].delta;
// Jail stream behavior: if parsing fails, return accumulated content as-is
// This matches marker-based FC behavior
......@@ -317,11 +317,11 @@ async fn test_streaming_named_tool_buffers_until_finish() {
let response = &all_responses[0];
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop)
);
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap();
let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(
tool_calls[0].function.as_ref().unwrap().name.as_deref(),
......@@ -384,11 +384,11 @@ async fn test_streaming_required_tool_parallel() {
let response = &all_responses[0];
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
);
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap();
let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 2);
assert_eq!(
......@@ -445,8 +445,12 @@ fn test_no_tool_choice_outputs_normal_text() {
.expect("normal text");
assert_eq!(
response.choices[0].delta.content.as_ref().map(get_text),
response.inner.choices[0]
.delta
.content
.as_ref()
.map(get_text),
Some("Hello world")
);
assert!(response.choices[0].delta.tool_calls.is_none());
assert!(response.inner.choices[0].delta.tool_calls.is_none());
}
......@@ -116,7 +116,7 @@ async fn test_named_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with Stop
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=named"
);
......@@ -139,7 +139,7 @@ fn test_required_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with ToolCalls
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=required"
);
......@@ -169,7 +169,7 @@ fn test_named_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=named"
);
......@@ -192,7 +192,7 @@ fn test_required_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=required"
);
......@@ -222,7 +222,7 @@ fn test_named_tool_choice_normal_stop_becomes_stop() {
// Normal completion: Stop should remain Stop for named tool choice
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop),
);
}
......@@ -247,7 +247,7 @@ async fn test_required_tool_choice_normal_stop_becomes_tool_calls() {
// Normal completion: Stop should become ToolCalls for required tool choice
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls),
);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment