"vscode:/vscode.git/clone" did not exist on "2a5eb7e7785814f5b9b6051f790233e1b1c28207"
Unverified Commit 2887cd1c authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)

parent d6136f4a
...@@ -214,50 +214,9 @@ pub struct AgentHints { ...@@ -214,50 +214,9 @@ pub struct AgentHints {
pub latency_sensitivity: Option<f64>, pub latency_sensitivity: Option<f64>,
} }
/// Anthropic-style cache control hint for prefix pinning with TTL. // Re-export CacheControl types from dynamo-async-openai where they are canonically defined
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)] // alongside the Anthropic protocol types they originate from.
pub struct CacheControl { pub use dynamo_async_openai::types::anthropic::{CacheControl, CacheControlType};
#[serde(rename = "type")]
pub control_type: CacheControlType,
/// TTL as seconds (integer) or shorthand ("5m" = 300s, "1h" = 3600s). Clamped to [300, 3600].
#[serde(default, skip_serializing_if = "Option::is_none")]
pub ttl: Option<String>,
}
#[derive(ToSchema, Serialize, Deserialize, Debug, Clone, Default, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum CacheControlType {
#[default]
Ephemeral,
#[serde(other)]
Unknown,
}
const MIN_TTL_SECONDS: u64 = 300;
const MAX_TTL_SECONDS: u64 = 3600;
impl CacheControl {
/// Parse TTL string to seconds, clamped to [300, 3600].
///
/// Accepts integer seconds ("120", "600") or shorthand ("5m", "1h").
/// Values below 300 are clamped to 300; values above 3600 are clamped to 3600.
/// Unrecognized strings default to 300s.
pub fn ttl_seconds(&self) -> u64 {
let raw = match self.ttl.as_deref() {
None => return MIN_TTL_SECONDS,
Some("5m") => 300,
Some("1h") => 3600,
Some(other) => match other.parse::<u64>() {
Ok(secs) => secs,
Err(_) => {
tracing::warn!("Unrecognized TTL '{}', defaulting to 300s", other);
return MIN_TTL_SECONDS;
}
},
};
raw.clamp(MIN_TTL_SECONDS, MAX_TTL_SECONDS)
}
}
impl Default for NvExt { impl Default for NvExt {
fn default() -> Self { fn default() -> Self {
......
...@@ -696,8 +696,8 @@ pub fn chat_completion_to_response( ...@@ -696,8 +696,8 @@ pub fn chat_completion_to_response(
nv_resp: NvCreateChatCompletionResponse, nv_resp: NvCreateChatCompletionResponse,
params: &ResponseParams, params: &ResponseParams,
) -> Result<NvResponse, anyhow::Error> { ) -> Result<NvResponse, anyhow::Error> {
let chat_resp = nv_resp; let nvext = nv_resp.nvext.clone();
let nvext = chat_resp.nvext.clone(); let chat_resp = nv_resp.inner;
let message_id = format!("msg_{}", Uuid::new_v4().simple()); let message_id = format!("msg_{}", Uuid::new_v4().simple());
let response_id = format!("resp_{}", Uuid::new_v4().simple()); let response_id = format!("resp_{}", Uuid::new_v4().simple());
...@@ -1163,32 +1163,34 @@ mod tests { ...@@ -1163,32 +1163,34 @@ mod tests {
fn test_into_nvresponse_from_chat_response() { fn test_into_nvresponse_from_chat_response() {
let now = 1_726_000_000; let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse { let chat_resp = NvCreateChatCompletionResponse {
id: "chatcmpl-xyz".into(), inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![dynamo_async_openai::types::ChatChoice { id: "chatcmpl-xyz".into(),
index: 0, choices: vec![dynamo_async_openai::types::ChatChoice {
message: dynamo_async_openai::types::ChatCompletionResponseMessage { index: 0,
content: Some( message: dynamo_async_openai::types::ChatCompletionResponseMessage {
dynamo_async_openai::types::ChatCompletionMessageContent::Text( content: Some(
"This is a reply".to_string(), dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"This is a reply".to_string(),
),
), ),
), refusal: None,
refusal: None, tool_calls: None,
tool_calls: None, role: dynamo_async_openai::types::Role::Assistant,
role: dynamo_async_openai::types::Role::Assistant, function_call: None,
function_call: None, audio: None,
audio: None, reasoning_content: None,
reasoning_content: None, },
}, finish_reason: None,
finish_reason: None, stop_reason: None,
stop_reason: None, logprobs: None,
logprobs: None, }],
}], created: now,
created: now, model: "llama-3.1-8b-instruct".into(),
model: "llama-3.1-8b-instruct".into(), service_tier: None,
service_tier: None, system_fingerprint: None,
system_fingerprint: None, object: "chat.completion".to_string(),
object: "chat.completion".to_string(), usage: None,
usage: None, },
nvext: None, nvext: None,
}; };
...@@ -1218,35 +1220,37 @@ mod tests { ...@@ -1218,35 +1220,37 @@ mod tests {
fn test_response_with_tool_calls() { fn test_response_with_tool_calls() {
let now = 1_726_000_000; let now = 1_726_000_000;
let chat_resp = NvCreateChatCompletionResponse { let chat_resp = NvCreateChatCompletionResponse {
id: "chatcmpl-xyz".into(), inner: dynamo_async_openai::types::CreateChatCompletionResponse {
choices: vec![dynamo_async_openai::types::ChatChoice { id: "chatcmpl-xyz".into(),
index: 0, choices: vec![dynamo_async_openai::types::ChatChoice {
message: dynamo_async_openai::types::ChatCompletionResponseMessage { index: 0,
content: None, message: dynamo_async_openai::types::ChatCompletionResponseMessage {
refusal: None, content: None,
tool_calls: Some(vec![ChatCompletionMessageToolCall { refusal: None,
id: "call_abc".into(), tool_calls: Some(vec![ChatCompletionMessageToolCall {
r#type: ChatCompletionToolType::Function, id: "call_abc".into(),
function: dynamo_async_openai::types::FunctionCall { r#type: ChatCompletionToolType::Function,
name: "get_weather".into(), function: dynamo_async_openai::types::FunctionCall {
arguments: r#"{"location":"SF"}"#.into(), name: "get_weather".into(),
}, arguments: r#"{"location":"SF"}"#.into(),
}]), },
role: dynamo_async_openai::types::Role::Assistant, }]),
function_call: None, role: dynamo_async_openai::types::Role::Assistant,
audio: None, function_call: None,
reasoning_content: None, audio: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: now, }],
model: "test-model".into(), created: now,
service_tier: None, model: "test-model".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion".to_string(),
usage: None,
},
nvext: None, nvext: None,
}; };
...@@ -1432,14 +1436,16 @@ thinking ...@@ -1432,14 +1436,16 @@ thinking
}; };
let chat_resp = NvCreateChatCompletionResponse { let chat_resp = NvCreateChatCompletionResponse {
choices: vec![], inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, choices: vec![],
id: "test".into(), created: 0,
model: "m".into(), id: "test".into(),
service_tier: None, model: "m".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion".into(), system_fingerprint: None,
usage: None, object: "chat.completion".into(),
usage: None,
},
nvext: None, nvext: None,
}; };
...@@ -1463,14 +1469,16 @@ thinking ...@@ -1463,14 +1469,16 @@ thinking
}; };
let chat_resp = NvCreateChatCompletionResponse { let chat_resp = NvCreateChatCompletionResponse {
choices: vec![], inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, choices: vec![],
id: "test".into(), created: 0,
model: "m".into(), id: "test".into(),
service_tier: None, model: "m".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion".into(), system_fingerprint: None,
usage: None, object: "chat.completion".into(),
usage: None,
},
nvext: None, nvext: None,
}; };
...@@ -1489,14 +1497,16 @@ thinking ...@@ -1489,14 +1497,16 @@ thinking
}; };
let chat_resp = NvCreateChatCompletionResponse { let chat_resp = NvCreateChatCompletionResponse {
choices: vec![], inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, choices: vec![],
id: "test".into(), created: 0,
model: "m".into(), id: "test".into(),
service_tier: None, model: "m".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion".into(), system_fingerprint: None,
usage: None, object: "chat.completion".into(),
usage: None,
},
nvext: None, nvext: None,
}; };
...@@ -1555,29 +1565,31 @@ thinking ...@@ -1555,29 +1565,31 @@ thinking
ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason, ChatChoice, ChatCompletionMessageContent, ChatCompletionResponseMessage, FinishReason,
}; };
NvCreateChatCompletionResponse { NvCreateChatCompletionResponse {
choices: vec![ChatChoice { inner: dynamo_async_openai::types::CreateChatCompletionResponse {
index: 0, choices: vec![ChatChoice {
#[allow(deprecated)] index: 0,
message: ChatCompletionResponseMessage { #[allow(deprecated)]
content: Some(ChatCompletionMessageContent::Text(text.into())), message: ChatCompletionResponseMessage {
role: dynamo_async_openai::types::Role::Assistant, content: Some(ChatCompletionMessageContent::Text(text.into())),
tool_calls: None, role: dynamo_async_openai::types::Role::Assistant,
refusal: None, tool_calls: None,
reasoning_content: None, refusal: None,
function_call: None, reasoning_content: None,
audio: None, function_call: None,
}, audio: None,
finish_reason: Some(FinishReason::Stop), },
stop_reason: None, finish_reason: Some(FinishReason::Stop),
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
id: "test".into(), created: 0,
model: "m".into(), id: "test".into(),
service_tier: None, model: "m".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion".into(), system_fingerprint: None,
usage: None, object: "chat.completion".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
......
...@@ -183,7 +183,7 @@ impl ResponseStreamConverter { ...@@ -183,7 +183,7 @@ impl ResponseStreamConverter {
let mut events = Vec::new(); let mut events = Vec::new();
// Capture usage stats from the final chunk (sent when stream_options.include_usage=true) // Capture usage stats from the final chunk (sent when stream_options.include_usage=true)
if let Some(ref u) = chunk.usage { if let Some(ref u) = chunk.inner.usage {
self.usage = Some(ResponseUsage { self.usage = Some(ResponseUsage {
input_tokens: u.prompt_tokens, input_tokens: u.prompt_tokens,
input_tokens_details: InputTokenDetails { input_tokens_details: InputTokenDetails {
...@@ -205,7 +205,7 @@ impl ResponseStreamConverter { ...@@ -205,7 +205,7 @@ impl ResponseStreamConverter {
}); });
} }
for choice in &chunk.choices { for choice in &chunk.inner.choices {
let delta = &choice.delta; let delta = &choice.delta;
// Handle text content deltas — extract text from the enum // Handle text content deltas — extract text from the enum
...@@ -685,35 +685,37 @@ mod tests { ...@@ -685,35 +685,37 @@ mod tests {
) -> NvCreateChatCompletionStreamResponse { ) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)] #[allow(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "chat-1".into(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "chat-1".into(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: None, delta: ChatCompletionStreamResponseDelta {
function_call: None, content: None,
tool_calls: Some(vec![ChatCompletionMessageToolCallChunk { function_call: None,
index: tc_index, tool_calls: Some(vec![ChatCompletionMessageToolCallChunk {
id: id.map(String::from), index: tc_index,
r#type: Some(ChatCompletionToolType::Function), id: id.map(String::from),
function: Some(FunctionCallStream { r#type: Some(ChatCompletionToolType::Function),
name: name.map(String::from), function: Some(FunctionCallStream {
arguments: args.map(String::from), name: name.map(String::from),
}), arguments: args.map(String::from),
}]), }),
role: None, }]),
refusal: None, role: None,
reasoning_content: None, refusal: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
model: "test".into(), created: 0,
service_tier: None, model: "test".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".into(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -721,27 +723,29 @@ mod tests { ...@@ -721,27 +723,29 @@ mod tests {
fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse { fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)] #[allow(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "chat-1".into(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "chat-1".into(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: Some(ChatCompletionMessageContent::Text(text.into())), delta: ChatCompletionStreamResponseDelta {
function_call: None, content: Some(ChatCompletionMessageContent::Text(text.into())),
tool_calls: None, function_call: None,
role: None, tool_calls: None,
refusal: None, role: None,
reasoning_content: None, refusal: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
model: "test".into(), created: 0,
service_tier: None, model: "test".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".into(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::ChatCompletionMessageContent; use dynamo_async_openai::types::{
ChatChoiceStream, ChatCompletionMessageContent, ChatCompletionStreamResponseDelta,
CreateChatCompletionStreamResponse, Role,
};
use dynamo_llm::protocols::{ use dynamo_llm::protocols::{
ContentProvider, DataStream, Annotated, ContentProvider, DataStream,
codec::{Message, SseCodecError, create_message_stream}, codec::{Message, SseCodecError, create_message_stream},
openai::{ openai::{
ParsingOptions, ParsingOptions,
chat_completions::{NvCreateChatCompletionResponse, aggregator::ChatCompletionAggregator}, chat_completions::{
NvCreateChatCompletionResponse, NvCreateChatCompletionStreamResponse,
aggregator::ChatCompletionAggregator,
},
completions::NvCreateCompletionResponse, completions::NvCreateCompletionResponse,
}, },
}; };
...@@ -45,6 +51,7 @@ async fn test_openai_chat_stream() { ...@@ -45,6 +51,7 @@ async fn test_openai_chat_stream() {
assert_eq!( assert_eq!(
get_text( get_text(
result result
.inner
.choices .choices
.first() .first()
.unwrap() .unwrap()
...@@ -70,6 +77,7 @@ async fn test_openai_chat_edge_case_multi_line_data() { ...@@ -70,6 +77,7 @@ async fn test_openai_chat_edge_case_multi_line_data() {
assert_eq!( assert_eq!(
get_text( get_text(
result result
.inner
.choices .choices
.first() .first()
.unwrap() .unwrap()
...@@ -95,6 +103,7 @@ async fn test_openai_chat_edge_case_comments_per_response() { ...@@ -95,6 +103,7 @@ async fn test_openai_chat_edge_case_comments_per_response() {
assert_eq!( assert_eq!(
get_text( get_text(
result result
.inner
.choices .choices
.first() .first()
.unwrap() .unwrap()
...@@ -138,3 +147,113 @@ async fn test_openai_cmpl_stream() { ...@@ -138,3 +147,113 @@ async fn test_openai_cmpl_stream() {
" This is a question that is often asked by those outside of AI research and development" " This is a question that is often asked by those outside of AI research and development"
); );
} }
// ===================================
// nvext aggregation regression tests
// ===================================
#[allow(deprecated)]
fn make_stream_delta(
content: Option<&str>,
nvext: Option<serde_json::Value>,
) -> Annotated<NvCreateChatCompletionStreamResponse> {
Annotated::from_data(NvCreateChatCompletionStreamResponse {
inner: CreateChatCompletionStreamResponse {
id: "test-id".to_string(),
choices: if let Some(text) = content {
vec![ChatChoiceStream {
index: 0,
delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text(text.to_string())),
function_call: None,
tool_calls: None,
role: Some(Role::Assistant),
refusal: None,
reasoning_content: None,
},
finish_reason: None,
stop_reason: None,
logprobs: None,
}]
} else {
vec![]
},
created: 1234567890,
model: "test-model".to_string(),
service_tier: None,
system_fingerprint: None,
object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext,
})
}
/// Verify that nvext set on a stream delta survives aggregation into the final response.
#[tokio::test]
async fn test_nvext_passthrough_aggregation() {
let nvext_value = serde_json::json!({"custom_field": "test_value"});
let deltas = vec![
make_stream_delta(Some("Hello"), None),
make_stream_delta(Some(" world"), Some(nvext_value.clone())),
make_stream_delta(Some("!"), None),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(nvext_value));
assert_eq!(
get_text(
result
.inner
.choices
.first()
.unwrap()
.message
.content
.as_ref()
.unwrap()
),
"Hello world!"
);
}
/// Verify that the last non-None nvext wins when multiple deltas carry nvext.
#[tokio::test]
async fn test_nvext_last_value_wins() {
let first_nvext = serde_json::json!({"version": 1});
let last_nvext = serde_json::json!({"version": 2});
let deltas = vec![
make_stream_delta(Some("a"), Some(first_nvext)),
make_stream_delta(Some("b"), None),
make_stream_delta(Some("c"), Some(last_nvext.clone())),
];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, Some(last_nvext));
}
/// Verify that nvext remains None when no delta carries it.
#[tokio::test]
async fn test_nvext_none_when_absent() {
let deltas = vec![make_stream_delta(Some("hello"), None)];
let stream = futures::stream::iter(deltas);
let result =
NvCreateChatCompletionResponse::from_annotated_stream(stream, ParsingOptions::default())
.await
.unwrap();
assert_eq!(result.nvext, None);
}
...@@ -397,14 +397,16 @@ fn create_response_with_linear_probs( ...@@ -397,14 +397,16 @@ fn create_response_with_linear_probs(
}; };
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![choice], id: "test_id".to_string(),
created: 1234567890, choices: vec![choice],
model: "test-model".to_string(), created: 1234567890,
service_tier: None, model: "test-model".to_string(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -479,14 +481,16 @@ fn create_multi_choice_response( ...@@ -479,14 +481,16 @@ fn create_multi_choice_response(
.collect(); .collect();
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices, id: "test_id".to_string(),
created: 1234567890, choices,
model: "test-model".to_string(), created: 1234567890,
service_tier: None, model: "test-model".to_string(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -192,7 +192,7 @@ async fn postprocessor_parsing_stream_replays_interval_20_fixture() { ...@@ -192,7 +192,7 @@ async fn postprocessor_parsing_stream_replays_interval_20_fixture() {
continue; continue;
}; };
for choice in &output_data.choices { for choice in &output_data.inner.choices {
if let Some(reasoning_content) = &choice.delta.reasoning_content { if let Some(reasoning_content) = &choice.delta.reasoning_content {
reasoning.push_str(reasoning_content); reasoning.push_str(reasoning_content);
} }
......
This diff is collapsed.
...@@ -39,14 +39,16 @@ fn create_mock_response_chunk( ...@@ -39,14 +39,16 @@ fn create_mock_response_chunk(
}; };
let response = NvCreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![choice], id: "test-id".to_string(),
created: 1234567890, choices: vec![choice],
model: "test-model".to_string(), created: 1234567890,
system_fingerprint: Some("test-fingerprint".to_string()), model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: Some("test-fingerprint".to_string()),
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}; };
...@@ -125,7 +127,7 @@ mod tests { ...@@ -125,7 +127,7 @@ mod tests {
let mut all_content = String::new(); let mut all_content = String::new();
while let Some(item) = output_stream.next().await { while let Some(item) = output_stream.next().await {
if let Some(ref data) = item.data { if let Some(ref data) = item.data {
for choice in &data.choices { for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content { if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r); all_reasoning.push_str(r);
} }
...@@ -177,15 +179,15 @@ mod tests { ...@@ -177,15 +179,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3); assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>This" // Chunk 0: "<think>This"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0]; let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("This")); assert_choice(output_choice_0, None, Some("This"));
// Chunk 1: " is reasoning content" // Chunk 1: " is reasoning content"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0]; let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" is reasoning content")); assert_choice(output_choice_1, None, Some(" is reasoning content"));
// Chunk 2: "</think> Here's my answer." // Chunk 2: "</think> Here's my answer."
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0]; let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, Some(" Here's my answer."), None); assert_choice(output_choice_2, Some(" Here's my answer."), None);
} }
...@@ -223,15 +225,15 @@ mod tests { ...@@ -223,15 +225,15 @@ mod tests {
assert_eq!(output_chunks.len(), 3); assert_eq!(output_chunks.len(), 3);
// Chunk 0: "<think>Only" // Chunk 0: "<think>Only"
let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().choices[0]; let output_choice_0 = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_0, None, Some("Only")); assert_choice(output_choice_0, None, Some("Only"));
// Chunk 1: " reasoning" // Chunk 1: " reasoning"
let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().choices[0]; let output_choice_1 = &output_chunks[1].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_1, None, Some(" reasoning")); assert_choice(output_choice_1, None, Some(" reasoning"));
// Chunk 2: " here</think>" // Chunk 2: " here</think>"
let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().choices[0]; let output_choice_2 = &output_chunks[2].data.as_ref().unwrap().inner.choices[0];
assert_choice(output_choice_2, None, Some(" here")); assert_choice(output_choice_2, None, Some(" here"));
} }
...@@ -266,7 +268,7 @@ mod tests { ...@@ -266,7 +268,7 @@ mod tests {
// Verify that only normal content is present // Verify that only normal content is present
assert_eq!(output_chunks.len(), 1); assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0]; let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert_choice( assert_choice(
output_choice, output_choice,
Some("Just normal text without reasoning tags."), Some("Just normal text without reasoning tags."),
...@@ -304,8 +306,8 @@ mod tests { ...@@ -304,8 +306,8 @@ mod tests {
assert_eq!(output_chunks.len(), input_chunks.len()); assert_eq!(output_chunks.len(), input_chunks.len());
for (input, output) in input_chunks.iter().zip(output_chunks.iter()) { for (input, output) in input_chunks.iter().zip(output_chunks.iter()) {
let input_choice = &input.data.as_ref().unwrap().choices[0]; let input_choice = &input.data.as_ref().unwrap().inner.choices[0];
let output_choice = &output.data.as_ref().unwrap().choices[0]; let output_choice = &output.data.as_ref().unwrap().inner.choices[0];
assert_choice( assert_choice(
output_choice, output_choice,
input_choice.delta.content.as_ref().map(get_text), input_choice.delta.content.as_ref().map(get_text),
...@@ -345,7 +347,7 @@ mod tests { ...@@ -345,7 +347,7 @@ mod tests {
// Verify that Mistral-style reasoning is parsed correctly // Verify that Mistral-style reasoning is parsed correctly
assert_eq!(output_chunks.len(), 1); assert_eq!(output_chunks.len(), 1);
let output_choice = &output_chunks[0].data.as_ref().unwrap().choices[0]; let output_choice = &output_chunks[0].data.as_ref().unwrap().inner.choices[0];
assert!( assert!(
output_choice.delta.reasoning_content.is_some(), output_choice.delta.reasoning_content.is_some(),
...@@ -422,7 +424,7 @@ mod tests { ...@@ -422,7 +424,7 @@ mod tests {
for chunk in output_chunks.iter() { for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data { if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices { for choice in &response_data.inner.choices {
// Collect reasoning content // Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content { if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning); all_reasoning.push_str(reasoning);
...@@ -574,7 +576,7 @@ mod tests { ...@@ -574,7 +576,7 @@ mod tests {
for chunk in output_chunks.iter() { for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data { if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices { for choice in &response_data.inner.choices {
// Collect reasoning content // Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content { if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning); all_reasoning.push_str(reasoning);
...@@ -685,7 +687,7 @@ mod tests { ...@@ -685,7 +687,7 @@ mod tests {
for chunk in output_chunks.iter() { for chunk in output_chunks.iter() {
if let Some(ref data) = chunk.data { if let Some(ref data) = chunk.data {
for choice in &data.choices { for choice in &data.inner.choices {
if let Some(ref r) = choice.delta.reasoning_content { if let Some(ref r) = choice.delta.reasoning_content {
all_reasoning.push_str(r); all_reasoning.push_str(r);
} }
...@@ -782,7 +784,7 @@ mod tests { ...@@ -782,7 +784,7 @@ mod tests {
for chunk in output_chunks.iter() { for chunk in output_chunks.iter() {
if let Some(ref response_data) = chunk.data { if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices { for choice in &response_data.inner.choices {
if let Some(ref reasoning) = choice.delta.reasoning_content { if let Some(ref reasoning) = choice.delta.reasoning_content {
all_reasoning.push_str(reasoning); all_reasoning.push_str(reasoning);
} }
......
...@@ -107,14 +107,16 @@ fn load_test_data(file_path: &str) -> TestData { ...@@ -107,14 +107,16 @@ fn load_test_data(file_path: &str) -> TestData {
.expect("Failed to parse choices"); .expect("Failed to parse choices");
let response = NvCreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: id.clone(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices, id: id.clone(),
created: 1234567890, choices,
model: "test-model".to_string(), created: 1234567890,
system_fingerprint: None, model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}; };
...@@ -231,7 +233,7 @@ fn aggregate_content_from_chunks( ...@@ -231,7 +233,7 @@ fn aggregate_content_from_chunks(
for chunk in chunks.iter() { for chunk in chunks.iter() {
if let Some(ref response_data) = chunk.data { if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices { for choice in &response_data.inner.choices {
// Collect reasoning content // Collect reasoning content
if let Some(ref reasoning) = choice.delta.reasoning_content { if let Some(ref reasoning) = choice.delta.reasoning_content {
reasoning_content.push_str(reasoning); reasoning_content.push_str(reasoning);
...@@ -279,7 +281,7 @@ fn validate_finish_reason( ...@@ -279,7 +281,7 @@ fn validate_finish_reason(
// Count finish_reason occurrences and track position // Count finish_reason occurrences and track position
for (idx, chunk) in chunks.iter().enumerate() { for (idx, chunk) in chunks.iter().enumerate() {
if let Some(ref response_data) = chunk.data { if let Some(ref response_data) = chunk.data {
for choice in &response_data.choices { for choice in &response_data.inner.choices {
if let Some(reason) = choice.finish_reason { if let Some(reason) = choice.finish_reason {
finish_reason_count += 1; finish_reason_count += 1;
last_chunk_index = Some(idx); last_chunk_index = Some(idx);
......
...@@ -241,12 +241,12 @@ async fn test_streaming_without_usage() { ...@@ -241,12 +241,12 @@ async fn test_streaming_without_usage() {
for (i, chunk) in content_chunks.iter().enumerate() { for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data { if let Some(response) = &chunk.data {
assert!( assert!(
response.usage.is_none(), response.inner.usage.is_none(),
"Chunk {} should have usage: None when stream_options not set", "Chunk {} should have usage: None when stream_options not set",
i i
); );
assert!( assert!(
!response.choices.is_empty(), !response.inner.choices.is_empty(),
"Chunk {} should have choices", "Chunk {} should have choices",
i i
); );
...@@ -286,12 +286,12 @@ async fn test_streaming_with_usage_compliance() { ...@@ -286,12 +286,12 @@ async fn test_streaming_with_usage_compliance() {
for (i, chunk) in chunks.iter().take(3).enumerate() { for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data { if let Some(response) = &chunk.data {
assert!( assert!(
response.usage.is_none(), response.inner.usage.is_none(),
"Content chunk {} should have usage: None", "Content chunk {} should have usage: None",
i i
); );
assert!( assert!(
!response.choices.is_empty(), !response.inner.choices.is_empty(),
"Content chunk {} should have choices", "Content chunk {} should have choices",
i i
); );
...@@ -301,15 +301,15 @@ async fn test_streaming_with_usage_compliance() { ...@@ -301,15 +301,15 @@ async fn test_streaming_with_usage_compliance() {
// Verify the final chunk is the usage-only chunk // Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data { if let Some(final_response) = &chunks[3].data {
assert!( assert!(
final_response.choices.is_empty(), final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array" "Final usage chunk should have empty choices array"
); );
assert!( assert!(
final_response.usage.is_some(), final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics" "Final usage chunk should have usage statistics"
); );
let usage = final_response.usage.as_ref().unwrap(); let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!( assert_eq!(
usage.completion_tokens, 3, usage.completion_tokens, 3,
"Should have 3 completion tokens" "Should have 3 completion tokens"
...@@ -359,18 +359,18 @@ async fn test_streaming_with_continuous_usage() { ...@@ -359,18 +359,18 @@ async fn test_streaming_with_continuous_usage() {
for (i, chunk) in chunks.iter().take(3).enumerate() { for (i, chunk) in chunks.iter().take(3).enumerate() {
if let Some(response) = &chunk.data { if let Some(response) = &chunk.data {
assert!( assert!(
response.usage.is_some(), response.inner.usage.is_some(),
"Content chunk {} should have usage: Some", "Content chunk {} should have usage: Some",
i i
); );
assert!( assert!(
!response.choices.is_empty(), !response.inner.choices.is_empty(),
"Content chunk {} should have choices", "Content chunk {} should have choices",
i i
); );
// Verify usage counts are properly accumulated for each chunk // Verify usage counts are properly accumulated for each chunk
let usage = response.usage.as_ref().unwrap(); let usage = response.inner.usage.as_ref().unwrap();
assert_eq!( assert_eq!(
usage.completion_tokens, usage.completion_tokens,
i as u32 + 1, i as u32 + 1,
...@@ -392,15 +392,15 @@ async fn test_streaming_with_continuous_usage() { ...@@ -392,15 +392,15 @@ async fn test_streaming_with_continuous_usage() {
// Verify the final chunk is the usage-only chunk // Verify the final chunk is the usage-only chunk
if let Some(final_response) = &chunks[3].data { if let Some(final_response) = &chunks[3].data {
assert!( assert!(
final_response.choices.is_empty(), final_response.inner.choices.is_empty(),
"Final usage chunk should have empty choices array" "Final usage chunk should have empty choices array"
); );
assert!( assert!(
final_response.usage.is_some(), final_response.inner.usage.is_some(),
"Final usage chunk should have usage statistics" "Final usage chunk should have usage statistics"
); );
let usage = final_response.usage.as_ref().unwrap(); let usage = final_response.inner.usage.as_ref().unwrap();
assert_eq!( assert_eq!(
usage.completion_tokens, 3, usage.completion_tokens, 3,
"Should have 3 completion tokens" "Should have 3 completion tokens"
...@@ -464,7 +464,7 @@ async fn test_streaming_with_usage_false() { ...@@ -464,7 +464,7 @@ async fn test_streaming_with_usage_false() {
for (i, chunk) in content_chunks.iter().enumerate() { for (i, chunk) in content_chunks.iter().enumerate() {
if let Some(response) = &chunk.data { if let Some(response) = &chunk.data {
assert!( assert!(
response.usage.is_none(), response.inner.usage.is_none(),
"Chunk {} should have usage: None when include_usage is false", "Chunk {} should have usage: None when include_usage is false",
i i
); );
...@@ -560,7 +560,7 @@ async fn test_nonstreaming_has_usage_field() { ...@@ -560,7 +560,7 @@ async fn test_nonstreaming_has_usage_field() {
// Aggregate the streaming chunks into a single non-streaming response // Aggregate the streaming chunks into a single non-streaming response
// This simulates what the HTTP service does for non-streaming requests // This simulates what the HTTP service does for non-streaming requests
let result = dynamo_async_openai::types::CreateChatCompletionResponse::from_annotated_stream( let result = dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionResponse::from_annotated_stream(
transformed_stream, transformed_stream,
ParsingOptions::default(), ParsingOptions::default(),
) )
...@@ -570,12 +570,12 @@ async fn test_nonstreaming_has_usage_field() { ...@@ -570,12 +570,12 @@ async fn test_nonstreaming_has_usage_field() {
let response = result.unwrap(); let response = result.unwrap();
assert!( assert!(
response.usage.is_some(), response.inner.usage.is_some(),
"Non-streaming chat completion response MUST have a usage field populated. \ "Non-streaming chat completion response MUST have a usage field populated. \
This is required for OpenAI API compliance." This is required for OpenAI API compliance."
); );
let usage = response.usage.unwrap(); let usage = response.inner.usage.unwrap();
// Verify usage contains valid token counts // Verify usage contains valid token counts
// In our mock, we generated 3 tokens (from the 3 backend outputs) // In our mock, we generated 3 tokens (from the 3 backend outputs)
...@@ -725,7 +725,11 @@ async fn test_chat_streaming_with_cached_tokens_propagation() { ...@@ -725,7 +725,11 @@ async fn test_chat_streaming_with_cached_tokens_propagation() {
assert_eq!(chunks.len(), 4, "Should have 3 content + 1 usage chunk"); assert_eq!(chunks.len(), 4, "Should have 3 content + 1 usage chunk");
if let Some(final_resp) = &chunks[3].data { if let Some(final_resp) = &chunks[3].data {
let usage = final_resp.usage.as_ref().expect("Usage must be present"); let usage = final_resp
.inner
.usage
.as_ref()
.expect("Usage must be present");
let cached = usage let cached = usage
.prompt_tokens_details .prompt_tokens_details
.as_ref() .as_ref()
......
...@@ -157,7 +157,7 @@ async fn test_named_tool_choice_parses_json() { ...@@ -157,7 +157,7 @@ async fn test_named_tool_choice_parses_json() {
.expect("choice generation"); .expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await; let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
assert_eq!( assert_eq!(
choice.finish_reason, choice.finish_reason,
...@@ -199,7 +199,7 @@ async fn test_required_tool_choice_parses_json_array() { ...@@ -199,7 +199,7 @@ async fn test_required_tool_choice_parses_json_array() {
.expect("choice generation"); .expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await; let response = apply_jail_transformation(raw_response, tool_choice).await;
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
assert_eq!( assert_eq!(
choice.finish_reason, choice.finish_reason,
...@@ -259,7 +259,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() { ...@@ -259,7 +259,7 @@ async fn test_tool_choice_parse_failure_returns_as_content() {
.expect("choice generation"); .expect("choice generation");
let response = apply_jail_transformation(raw_response, tool_choice).await; let response = apply_jail_transformation(raw_response, tool_choice).await;
let delta = &response.choices[0].delta; let delta = &response.inner.choices[0].delta;
// Jail stream behavior: if parsing fails, return accumulated content as-is // Jail stream behavior: if parsing fails, return accumulated content as-is
// This matches marker-based FC behavior // This matches marker-based FC behavior
...@@ -317,11 +317,11 @@ async fn test_streaming_named_tool_buffers_until_finish() { ...@@ -317,11 +317,11 @@ async fn test_streaming_named_tool_buffers_until_finish() {
let response = &all_responses[0]; let response = &all_responses[0];
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop) Some(dynamo_async_openai::types::FinishReason::Stop)
); );
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap(); let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 1); assert_eq!(tool_calls.len(), 1);
assert_eq!( assert_eq!(
tool_calls[0].function.as_ref().unwrap().name.as_deref(), tool_calls[0].function.as_ref().unwrap().name.as_deref(),
...@@ -384,11 +384,11 @@ async fn test_streaming_required_tool_parallel() { ...@@ -384,11 +384,11 @@ async fn test_streaming_required_tool_parallel() {
let response = &all_responses[0]; let response = &all_responses[0];
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls) Some(dynamo_async_openai::types::FinishReason::ToolCalls)
); );
let tool_calls = response.choices[0].delta.tool_calls.as_ref().unwrap(); let tool_calls = response.inner.choices[0].delta.tool_calls.as_ref().unwrap();
assert_eq!(tool_calls.len(), 2); assert_eq!(tool_calls.len(), 2);
assert_eq!( assert_eq!(
...@@ -445,8 +445,12 @@ fn test_no_tool_choice_outputs_normal_text() { ...@@ -445,8 +445,12 @@ fn test_no_tool_choice_outputs_normal_text() {
.expect("normal text"); .expect("normal text");
assert_eq!( assert_eq!(
response.choices[0].delta.content.as_ref().map(get_text), response.inner.choices[0]
.delta
.content
.as_ref()
.map(get_text),
Some("Hello world") Some("Hello world")
); );
assert!(response.choices[0].delta.tool_calls.is_none()); assert!(response.inner.choices[0].delta.tool_calls.is_none());
} }
...@@ -116,7 +116,7 @@ async fn test_named_tool_choice_preserves_length_finish_reason() { ...@@ -116,7 +116,7 @@ async fn test_named_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with Stop // Critical: Length finish reason should be preserved, NOT replaced with Stop
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length), Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=named" "Length finish reason must be preserved for tool_choice=named"
); );
...@@ -139,7 +139,7 @@ fn test_required_tool_choice_preserves_length_finish_reason() { ...@@ -139,7 +139,7 @@ fn test_required_tool_choice_preserves_length_finish_reason() {
// Critical: Length finish reason should be preserved, NOT replaced with ToolCalls // Critical: Length finish reason should be preserved, NOT replaced with ToolCalls
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Length), Some(dynamo_async_openai::types::FinishReason::Length),
"Length finish reason must be preserved for tool_choice=required" "Length finish reason must be preserved for tool_choice=required"
); );
...@@ -169,7 +169,7 @@ fn test_named_tool_choice_preserves_content_filter() { ...@@ -169,7 +169,7 @@ fn test_named_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved // Critical: ContentFilter finish reason should be preserved
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter), Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=named" "ContentFilter finish reason must be preserved for tool_choice=named"
); );
...@@ -192,7 +192,7 @@ fn test_required_tool_choice_preserves_content_filter() { ...@@ -192,7 +192,7 @@ fn test_required_tool_choice_preserves_content_filter() {
// Critical: ContentFilter finish reason should be preserved // Critical: ContentFilter finish reason should be preserved
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ContentFilter), Some(dynamo_async_openai::types::FinishReason::ContentFilter),
"ContentFilter finish reason must be preserved for tool_choice=required" "ContentFilter finish reason must be preserved for tool_choice=required"
); );
...@@ -222,7 +222,7 @@ fn test_named_tool_choice_normal_stop_becomes_stop() { ...@@ -222,7 +222,7 @@ fn test_named_tool_choice_normal_stop_becomes_stop() {
// Normal completion: Stop should remain Stop for named tool choice // Normal completion: Stop should remain Stop for named tool choice
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::Stop), Some(dynamo_async_openai::types::FinishReason::Stop),
); );
} }
...@@ -247,7 +247,7 @@ async fn test_required_tool_choice_normal_stop_becomes_tool_calls() { ...@@ -247,7 +247,7 @@ async fn test_required_tool_choice_normal_stop_becomes_tool_calls() {
// Normal completion: Stop should become ToolCalls for required tool choice // Normal completion: Stop should become ToolCalls for required tool choice
assert_eq!( assert_eq!(
response.choices[0].finish_reason, response.inner.choices[0].finish_reason,
Some(dynamo_async_openai::types::FinishReason::ToolCalls), Some(dynamo_async_openai::types::FinishReason::ToolCalls),
); );
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment