Unverified Commit 2887cd1c authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)

parent d6136f4a
......@@ -214,50 +214,9 @@ pub struct AgentHints {
pub latency_sensitivity: Option<f64>,
}
/// Anthropic-style cache control hint for prefix pinning with TTL.
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
pub struct CacheControl {
#[serde(rename = "type")]
pub control_type: CacheControlType,
/// TTL as seconds (integer) or shorthand ("5m" = 300s, "1h" = 3600s). Clamped to [300, 3600].
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ttl: Option<String>,
}
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum CacheControlType {
#[default]
Ephemeral,
#[serde(other)]
Unknown,
}
const MIN_TTL_SECONDS: u64 = 300;
const MAX_TTL_SECONDS: u64 = 3600;
impl CacheControl {
/// Parse TTL string to seconds, clamped to [300, 3600].
///
/// Accepts integer seconds ("120", "600") or shorthand ("5m", "1h").
/// Values below 300 are clamped to 300; values above 3600 are clamped to 3600.
/// Unrecognized strings default to 300s.
pub fn ttl_seconds(&self) -> u64 {
let raw = match self.ttl.as_deref() {
None => return MIN_TTL_SECONDS,
Some("5m") => 300,
Some("1h") => 3600,
Some(other) => match other.parse::<u64>() {
Ok(secs) => secs,
Err(_) => {
tracing::warn!("Unrecognized TTL '{}', defaulting to 300s", other);
return MIN_TTL_SECONDS;
}
},
};
raw.clamp(MIN_TTL_SECONDS, MAX_TTL_SECONDS)
}
}
// Re-export CacheControl types from dynamo-async-openai where they are canonically defined
// alongside the Anthropic protocol types they originate from.
pub use dynamo_async_openai::types::anthropic::{CacheControl, CacheControlType};
impl Default for NvExt {
fn default() -> Self {
......
......@@ -696,8 +696,8 @@ pub fn chat_completion_to_response(
nv_resp: NvCreateChatCompletionResponse,
params: &ResponseParams,
) -> Result<NvResponse, anyhow::Error> {
let chat_resp = nv_resp;
let nvext = chat_resp.nvext.clone();
let nvext = nv_resp.nvext.clone();
let chat_resp = nv_resp.inner;
let message_id = format!("msg_{}", Uuid::new_v4().simple());
let response_id = format!("resp_{}", Uuid::new_v4().simple());
......@@ -1163,32 +1163,34 @@ mod tests {
fn test_into_nvresponse_from_chat_response() {
let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: Some(
dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"This is a reply".to_string(),
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: Some(
dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"This is a reply".to_string(),
),
),
),
refusal: None,
tool_calls: None,
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: now,
model: "llama-3.1-8b-instruct".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
refusal: None,
tool_calls: None,
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: now,
model: "llama-3.1-8b-instruct".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
},
nvext: None,
};
......@@ -1218,35 +1220,37 @@ mod tests {
fn test_response_with_tool_calls() {
let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: None,
refusal: None,
tool_calls: Some(vec![ChatCompletionMessageToolCall {
id: "call_abc".into(),
r#type: ChatCompletionToolType::Function,
function: dynamo_async_openai::types::FunctionCall {
name: "get_weather".into(),
arguments: r#"{"location":"SF"}"#.into(),
},
}]),
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: now,
model: "test-model".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
id: "chatcmpl-xyz".into(),
choices: vec![dynamo_async_openai::types::ChatChoice {
index: 0,
message: dynamo_async_openai::types::ChatCompletionResponseMessage {
content: None,
refusal: None,
tool_calls: Some(vec![ChatCompletionMessageToolCall {
id: "call_abc".into(),
r#type: ChatCompletionToolType::Function,
function: dynamo_async_openai::types::FunctionCall {
name: "get_weather".into(),
arguments: r#"{"location":"SF"}"#.into(),
},
}]),
role: dynamo_async_openai::types::Role::Assistant,
function_call: None,
audio: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: now,
model: "test-model".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".to_string(),
usage: None,
},
nvext: None,
};
......@@ -1432,14 +1436,16 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1463,14 +1469,16 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1489,14 +1497,16 @@ thinking
};
let chat_resp = NvCreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
};
......@@ -1555,29 +1565,31 @@ thinking
ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason,
};
NvCreateChatCompletionResponse {
choices: vec![ChatChoice {
index: 0,
#[allow(deprecated)]
message: ChatCompletionResponseMessage {
content: Some(ChatCompletionMessageContent::Text(text.into())),
role: dynamo_async_openai::types::Role::Assistant,
tool_calls: None,
refusal: None,
reasoning_content: None,
function_call: None,
audio: None,
},
finish_reason: Some(FinishReason::Stop),
stop_reason: None,
logprobs: None,
}],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![ChatChoice {
index: 0,
#[allow(deprecated)]
message: ChatCompletionResponseMessage {
content: Some(ChatCompletionMessageContent::Text(text.into())),
role: dynamo_async_openai::types::Role::Assistant,
tool_calls: None,
refusal: None,
reasoning_content: None,
function_call: None,
audio: None,
},
finish_reason: Some(FinishReason::Stop),
stop_reason: None,
logprobs: None,
}],
created: 0,
id: "test".into(),
model: "m".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion".into(),
usage: None,
},
nvext: None,
}
}
......
......@@ -183,7 +183,7 @@ impl ResponseStreamConverter {
let mut events = Vec::new();
// Capture usage stats from the final chunk (sent when stream_options.include_usage=true)
if let Some(ref u) = chunk.usage {
if let Some(ref u) = chunk.inner.usage {
self.usage = Some(ResponseUsage {
input_tokens: u.prompt_tokens,
input_tokens_details: InputTokenDetails {
......@@ -205,7 +205,7 @@ impl ResponseStreamConverter {
});
}
for choice in &chunk.choices {
for choice in &chunk.inner.choices {
let delta = &choice.delta;
// Handle text content deltas — extract text from the enum
......@@ -685,35 +685,37 @@ mod tests {
) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: None,
function_call: None,
tool_calls: Some(vec![ChatCompletionMessageToolCallChunk {
index: tc_index,
id: id.map(String::from),
r#type: Some(ChatCompletionToolType::Function),
function: Some(FunctionCallStream {
name: name.map(String::from),
arguments: args.map(String::from),
}),
}]),
role: None,
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: 0,
model: "test".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: None,
function_call: None,
tool_calls: Some(vec![ChatCompletionMessageToolCallChunk {
index: tc_index,
id: id.map(String::from),
r#type: Some(ChatCompletionToolType::Function),
function: Some(FunctionCallStream {
name: name.map(String::from),
arguments: args.map(String::from),
}),
}]),
role: None,
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: 0,
model: "test".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None,
}
}
......@@ -721,27 +723,29 @@ mod tests {
fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)]
NvCreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text(text.into())),
function_call: None,
tool_calls: None,
role: None,
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: 0,
model: "test".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "chat-1".into(),
choices: vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text(text.into())),
function_call: None,
tool_calls: None,
role: None,
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}],
created: 0,
model: "test".into(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None,
}
}
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::ChatCompletionMessageContent;
use dynamo_async_openai::types::{
ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta,
CreateChatCompletionStreamResponse, Role,
};
use dynamo_llm::protocols::{
ContentProvider, DataStream,
Annotated, ContentProvider, DataStream,
codec::{Message, SseCodecError, create_message_stream},
openai::{
ParsingOptions,
chat_completions::{NvCreateChatCompletionResponse, aggregator::ChatCompletionAggregator},
chat_completions::{
NvCreateChatCompletionResponse, NvCreateChatCompletionStreamResponse,
aggregator::ChatCompletionAggregator,
},
completions::NvCreateCompletionResponse,
},
};
......@@ -45,6 +51,7 @@ async fn test_openai_chat_stream() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -70,6 +77,7 @@ async fn test_openai_chat_edge_case_multi_line_data() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -95,6 +103,7 @@ async fn test_openai_chat_edge_case_comments_per_response() {
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
......@@ -138,3 +147,113 @@ async fn test_openai_cmpl_stream() {
" This is a question that is often asked by those outside of AI research and development"
);
}
// ===================================
// nvext aggregation regression tests
// ===================================
#[allow(deprecated)]
fn make_stream_delta(
content: Option<&str>,
nvext: Option<serde_json::Value>,
) -> Annotated<NvCreateChatCompletionStreamResponse> {
Annotated::from_data(NvCreateChatCompletionStreamResponse {
inner: CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: if let Some(text) = content {
vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text(text.to_string())),
function_call: None,
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}]
} else {
vec![]
},
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext,
})
}
/// Verify that nvext set on a stream delta survives aggregation into the final response.
#[tokio::test]
async fn test_nvext_passthrough_aggregation() {
let nvext_value = serde_json::json!({"custom_field": "test_value"});
let deltas = vec![
make_stream_delta(Some("Hello"), None),
make_stream_delta(Some(" world"), Some(nvext_value.clone())),
make_stream_delta(Some("!"), None),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(nvext_value));
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.unwrap()
),
"Hello world!"
);
}
/// Verify that the last non-None nvext wins when multiple deltas carry nvext.
#[tokio::test]
async fn test_nvext_last_value_wins() {
let first_nvext = serde_json::json!({"version": 1});
let last_nvext = serde_json::json!({"version": 2});
let deltas = vec![
make_stream_delta(Some("a"), Some(first_nvext)),
make_stream_delta(Some("b"), None),
make_stream_delta(Some("c"), Some(last_nvext.clone())),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(last_nvext));
}
/// Verify that nvext remains None when no delta carries it.
#[tokio::test]
async fn test_nvext_none_when_absent() {
let deltas = vec![make_stream_delta(Some("hello"), None)];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, None);
}
......@@ -397,14 +397,16 @@ fn create_response_with_linear_probs(
};
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![choice],
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices: vec![choice],
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None,
}
}
......@@ -479,14 +481,16 @@ fn create_multi_choice_response(
.collect();
NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices,
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test_id".to_string(),
choices,
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None,
}
}
......@@ -192,7 +192,7 @@ async fn postprocessor_parsing_stream_replays_interval_20_fixture() {
continue;
};
for choice in &output_data.choices {
for choice in &output_data.inner.choices {
if let Some(reasoning_content) = &choice.delta.reasoning_content {
reasoning.push_str(reasoning_content);
}
......
This diff is collapsed.
......@@ -39,14 +39,16 @@ fn create_mock_response_chunk(
};
let response = NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
model: "test-model".to_string(),
system_fingerprint: Some("test-fingerprint".to_string()),
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: vec![choice],
created: 1234567890,
model: "test-model".to_string(),
system_fingerprint: Some("test-fingerprint".to_string()),
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -125,7 +127,7 @@ mod tests {
let mut all_content = String::new();
while let Some(item) = output_stream.next().await {
if let Some(ref data) = item.data {
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
......@@ -177,15 +179,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>This"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("This"));
// Chunk 1: " is reasoning content"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0];
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" is reasoning content"));
// Chunk 2: "</think> Here's my answer."
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0];
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, Some(" Here's my answer."), None);
}
......@@ -223,15 +225,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>Only"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("Only"));
// Chunk 1: " reasoning"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0];
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" reasoning"));
// Chunk 2: " here</think>"
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0];
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, None, Some(" here"));
}
......@@ -266,7 +268,7 @@ mod tests {
// Verify that only normal content is present
assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(
output_choice,
Some("Just normal text without reasoning tags."),
......@@ -304,8 +306,8 @@ mod tests {
assert_eq!(output_chunks.len(), input_chunks.len());
for (input, output) in input_chunks.iter().zip(output_chunks.iter()) {
let input_choice = &input.data.as_ref().unwrap().choices[0];
let output_choice = &output.data.as_ref().unwrap().choices[0];
let input_choice = &input.data.as_ref().unwrap().inner.choices[0];
let output_choice = &output.data.as_ref().unwrap().inner.choices[0];
assert_choice(
output_choice,
input_choice.delta.content.as_ref().map(get_text),
......@@ -345,7 +347,7 @@ mod tests {
// Verify that Mistral-style reasoning is parsed correctly
assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0];
let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert!(
output_choice.delta.reasoning_content.is_some(),
......@@ -422,7 +424,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
......@@ -574,7 +576,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
......@@ -685,7 +687,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref data) = chunk.data {
for choice in &data.choices {
for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r);
}
......@@ -782,7 +784,7 @@ mod tests {
for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning);
}
......
......@@ -107,14 +107,16 @@ fn load_test_data(file_path: &str) -> TestData {
.expect("Failed to parse choices");
let response = NvCreateChatCompletionStreamResponse {
id: id.clone(),
choices,
created: 1234567890,
model: "test-model".to_string(),
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
id: id.clone(),
choices,
created: 1234567890,
model: "test-model".to_string(),
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
service_tier: None,
},
nvext: None,
};
......@@ -231,7 +233,7 @@ fn aggregate_content_from_chunks(
for chunk in chunks.iter() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
// Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content {
reasoning_content.push_str(reasoning);
......@@ -279,7 +281,7 @@ fn validate_finish_reason(
// Count finish_reason occurrences and track position
for (idx, chunk) in chunks.iter().enumerate() {
if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices {
for choice in &response_data.inner.choices {
if let Some(reason) = choice.finish_reason {
finish_reason_count += 1;
last_chunk_index = Some(idx);
......
......@@ -241,12 +241,12 @@ async fn test_streaming_without_usage() {
for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Chunk {} should have usage: None when stream_options not set",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Chunk {} should have choices",
i
);
......@@ -286,12 +286,12 @@ async fn test_streaming_with_usage_compliance() {
for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Content chunk {} should have usage: None",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Content chunk {} should have choices",
i
);
......@@ -301,15 +301,15 @@ async fn test_streaming_with_usage_compliance() {
// Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data {
assert!(
final_response.choices.is_empty(),
final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array"
);
assert!(
final_response.usage.is_some(),
final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics"
);
let usage = final_response.usage.as_ref().unwrap();
let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens, 3,
"Should have 3 completion tokens"
......@@ -359,18 +359,18 @@ async fn test_streaming_with_continuous_usage() {
for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_some(),
response.inner.usage.is_some(),
"Content chunk {} should have usage: Some",
i
);
assert!(
!response.choices.is_empty(),
!response.inner.choices.is_empty(),
"Content chunk {} should have choices",
i
);
// Verify usage counts are properly accumulated for each chunk
let usage = response.usage.as_ref().unwrap();
let usage = response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens,
i as u32 + 1,
......@@ -392,15 +392,15 @@ async fn test_streaming_with_continuous_usage() {
// Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data {
assert!(
final_response.choices.is_empty(),
final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array"
);
assert!(
final_response.usage.is_some(),
final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics"
);
let usage = final_response.usage.as_ref().unwrap();
let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!(
usage.completion_tokens, 3,
"Should have 3 completion tokens"
......@@ -464,7 +464,7 @@ async fn test_streaming_with_usage_false() {
for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data {
assert!(
response.usage.is_none(),
response.inner.usage.is_none(),
"Chunk {} should have usage: None when include_usage is false",
i
);
......@@ -560,7 +560,7 @@ async fn test_nonstreaming_has_usage_field() {
// Aggregate the streaming chunks into a single non-streaming response
// This simulates what the HTTP service does for non-streaming requests
let result = dynamo_async_openai::types::CreateChatCompletionResponse::from_annotated_stream(
let result = dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionResponse::from_annotated_stream(
transformed_stream,
ParsingOptions::default(),
)
......@@ -570,12 +570,12 @@ async fn test_nonstreaming_has_usage_field() {
let response = result.unwrap();
assert!(
response.usage.is_some(),
response.inner.usage.is_some(),
"Non-streaming chat completion response MUST have a usage field populated. \
This is required for OpenAI API compliance."
);
let usage = response.usage.unwrap();
let usage = response.inner.usage.unwrap();
// Verify usage contains valid token counts
// In our mock, we generated 3 tokens (from the 3 backend outputs)
......@@ -725,7 +725,11 @@ async fn test_chat_streaming_with_cached_tokens_propagation() {
assert_eq!(chunks.len(), 4, "Should have 3 content + 1 usage chunk");
if let Some(final_resp) = &chunks[3].data {
let usage = final_resp.usage.as_ref().expect("Usage must be present");
let usage = final_resp
.inner
.usage
.as_ref()
.expect("Usage must be present");
let cached = usage
.prompt_tokens_details
.as_ref()
......
......@@ -157,7 +157,7 @@ async fn test_named_tool_choice_parses_json() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0];
let choice = &response.inner.choices[0];
assert_eq!(
choice.finish_reason,
......@@ -199,7 +199,7 @@ async fn test_required_tool_choice_parses_json_array() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0];
let choice = &response.inner.choices[0];
assert_eq!(
choice.finish_reason,
......@@ -259,7 +259,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {
.expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await;
let delta = &response.choices[0].delta;
let delta = &response.inner.choices[0].delta;
// Jail stream behavior: if parsing fails, return accumulated content as-is
// This matches marker-based FC behavior
......@@ -317,11 +317,11 @@ async fn test_streaming_named_tool_buffers_until_finish() {
let response = &all_responses[0];
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop)
);
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap();
let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 1);
assert_eq!(
tool_calls[0].function.as_ref().unwrap().name.as_deref(),
......@@ -384,11 +384,11 @@ async fn test_streaming_required_tool_parallel() {
let response = &all_responses[0];
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls)
);
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap();
let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 2);
assert_eq!(
......@@ -445,8 +445,12 @@ fn test_no_tool_choice_outputs_normal_text() {
.expect("normal text");
assert_eq!(
response.choices[0].delta.content.as_ref().map(get_text),
response.inner.choices[0]
.delta
.content
.as_ref()
.map(get_text),
Some("Hello world")
);
assert!(response.choices[0].delta.tool_calls.is_none());
assert!(response.inner.choices[0].delta.tool_calls.is_none());
}
......@@ -116,7 +116,7 @@ async fn test_named_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with Stop
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=named"
);
......@@ -139,7 +139,7 @@ fn test_required_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with ToolCalls
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=required"
);
......@@ -169,7 +169,7 @@ fn test_named_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=named"
);
......@@ -192,7 +192,7 @@ fn test_required_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=required"
);
......@@ -222,7 +222,7 @@ fn test_named_tool_choice_normal_stop_becomes_stop() {
// Normal completion: Stop should remain Stop for named tool choice
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop),
);
}
......@@ -247,7 +247,7 @@ async fn test_required_tool_choice_normal_stop_becomes_tool_calls() {
// Normal completion: Stop should become ToolCalls for required tool choice
assert_eq!(
response.choices[0].finish_reason,
response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls),
);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment