Unverified Commit 2887cd1c authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

refactor(1/3): move `nvext` to `dynamo-llm` and move `anthropic` to `dynamo-async-openai` (#7564)

parent d6136f4a
This diff is collapsed.
...@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse { ...@@ -1182,10 +1182,6 @@ pub struct CreateChatCompletionResponse {
/// The object type, which is always `chat.completion`. /// The object type, which is always `chat.completion`.
pub object: String, pub object: String,
pub usage: Option<CompletionUsage>, pub usage: Option<CompletionUsage>,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
} }
/// Parsed server side events stream until an \[DONE\] is received from server. /// Parsed server side events stream until an \[DONE\] is received from server.
...@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse { ...@@ -1281,10 +1277,6 @@ pub struct CreateChatCompletionStreamResponse {
/// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request. /// An optional field that will only be present when you set `stream_options: {"include_usage": true}` in your request.
/// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request. /// When present, it contains a null value except for the last chunk which contains the token usage statistics for the entire request.
pub usage: Option<CompletionUsage>, pub usage: Option<CompletionUsage>,
/// NVIDIA extension field for response metadata
#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
} }
#[cfg(test)] #[cfg(test)]
......
...@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse { ...@@ -224,10 +224,6 @@ pub struct CreateCompletionResponse {
/// The object type, which is always "text_completion" /// The object type, which is always "text_completion"
pub object: String, pub object: String,
pub usage: Option<CompletionUsage>, pub usage: Option<CompletionUsage>,
/// NVIDIA extension field for response metadata (worker IDs, etc.)
#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
} }
/// Parsed server side events stream until an \[DONE\] is received from server. /// Parsed server side events stream until an \[DONE\] is received from server.
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
//! Types used in OpenAI API requests and responses. //! Types used in OpenAI API requests and responses.
//! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi) //! These types are created from component schemas in the [OpenAPI spec](https://github.com/openai/openai-openapi)
pub mod anthropic;
mod assistant; mod assistant;
mod assistant_impls; mod assistant_impls;
mod assistant_stream; mod assistant_stream;
......
...@@ -90,14 +90,16 @@ where ...@@ -90,14 +90,16 @@ where
tracing::warn!("audit: aggregation future canceled/failed"); tracing::warn!("audit: aggregation future canceled/failed");
// Return minimal response if aggregation failed // Return minimal response if aggregation failed
NvCreateChatCompletionResponse { NvCreateChatCompletionResponse {
id: String::new(), inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, id: String::new(),
usage: None, created: 0,
model: String::new(), usage: None,
object: "chat.completion".to_string(), model: String::new(),
system_fingerprint: None, object: "chat.completion".to_string(),
choices: vec![], system_fingerprint: None,
service_tier: None, choices: vec![],
service_tier: None,
},
nvext: None, nvext: None,
} }
}) })
...@@ -125,14 +127,16 @@ where ...@@ -125,14 +127,16 @@ where
Err(e) => { Err(e) => {
tracing::warn!("fold aggregation failed: {e}"); tracing::warn!("fold aggregation failed: {e}");
let fallback = NvCreateChatCompletionResponse { let fallback = NvCreateChatCompletionResponse {
id: String::new(), inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, id: String::new(),
usage: None, created: 0,
model: String::new(), usage: None,
object: "chat.completion".to_string(), model: String::new(),
system_fingerprint: None, object: "chat.completion".to_string(),
choices: vec![], system_fingerprint: None,
service_tier: None, choices: vec![],
service_tier: None,
},
nvext: None, nvext: None,
}; };
let _ = tx.send(fallback.clone()); let _ = tx.send(fallback.clone());
...@@ -145,14 +149,16 @@ where ...@@ -145,14 +149,16 @@ where
rx.await.unwrap_or_else(|_| { rx.await.unwrap_or_else(|_| {
tracing::warn!("fold aggregation future canceled"); tracing::warn!("fold aggregation future canceled");
NvCreateChatCompletionResponse { NvCreateChatCompletionResponse {
id: String::new(), inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: 0, id: String::new(),
usage: None, created: 0,
model: String::new(), usage: None,
object: "chat.completion".to_string(), model: String::new(),
system_fingerprint: None, object: "chat.completion".to_string(),
choices: vec![], system_fingerprint: None,
service_tier: None, choices: vec![],
service_tier: None,
},
nvext: None, nvext: None,
} }
}) })
...@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream( ...@@ -171,8 +177,8 @@ pub fn final_response_to_one_chunk_stream(
) -> std::pin::Pin< ) -> std::pin::Pin<
Box<dyn futures::Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>> + Send>, Box<dyn futures::Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>> + Send>,
> { > {
let mut choices: Vec<ChatChoiceStream> = Vec::with_capacity(resp.choices.len()); let mut choices: Vec<ChatChoiceStream> = Vec::with_capacity(resp.inner.choices.len());
for (idx, ch) in resp.choices.iter().enumerate() { for (idx, ch) in resp.inner.choices.iter().enumerate() {
// Convert FunctionCall to FunctionCallStream if present // Convert FunctionCall to FunctionCallStream if present
#[allow(deprecated)] #[allow(deprecated)]
let function_call = ch.message.function_call.as_ref().map(|fc| { let function_call = ch.message.function_call.as_ref().map(|fc| {
...@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream( ...@@ -222,14 +228,16 @@ pub fn final_response_to_one_chunk_stream(
} }
let chunk = NvCreateChatCompletionStreamResponse { let chunk = NvCreateChatCompletionStreamResponse {
id: resp.id.clone(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
object: "chat.completion.chunk".to_string(), id: resp.inner.id.clone(),
created: resp.created, object: "chat.completion.chunk".to_string(),
model: resp.model.clone(), created: resp.inner.created,
system_fingerprint: resp.system_fingerprint.clone(), model: resp.inner.model.clone(),
service_tier: resp.service_tier.clone(), system_fingerprint: resp.inner.system_fingerprint.clone(),
choices, service_tier: resp.inner.service_tier.clone(),
usage: resp.usage.clone(), choices,
usage: resp.inner.usage.clone(),
},
nvext: resp.nvext.clone(), nvext: resp.nvext.clone(),
}; };
...@@ -275,14 +283,16 @@ mod tests { ...@@ -275,14 +283,16 @@ mod tests {
}; };
let response = NvCreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![choice], id: "test-id".to_string(),
created: 1234567890, choices: vec![choice],
model: "test-model".to_string(), created: 1234567890,
system_fingerprint: Some("test-fingerprint".to_string()), model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: Some("test-fingerprint".to_string()),
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}; };
...@@ -314,14 +324,16 @@ mod tests { ...@@ -314,14 +324,16 @@ mod tests {
}; };
let response = NvCreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![choice], id: "test-id".to_string(),
created: 1234567890, choices: vec![choice],
model: "test-model".to_string(), created: 1234567890,
system_fingerprint: Some("test-fingerprint".to_string()), model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: Some("test-fingerprint".to_string()),
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}; };
...@@ -339,7 +351,7 @@ mod tests { ...@@ -339,7 +351,7 @@ mod tests {
chunk chunk
.data .data
.as_ref() .as_ref()
.and_then(|d| d.choices.first()) .and_then(|d| d.inner.choices.first())
.and_then(|c| c.delta.content.as_ref()) .and_then(|c| c.delta.content.as_ref())
.and_then(|content| match content { .and_then(|content| match content {
ChatCompletionMessageContent::Text(text) => Some(text.clone()), ChatCompletionMessageContent::Text(text) => Some(text.clone()),
...@@ -396,7 +408,7 @@ mod tests { ...@@ -396,7 +408,7 @@ mod tests {
assert_eq!(results.len(), 0, "Empty stream should produce no chunks"); assert_eq!(results.len(), 0, "Empty stream should produce no chunks");
// Verify fallback response (aggregation will fail on empty stream) // Verify fallback response (aggregation will fail on empty stream)
assert_eq!(final_resp.object, "chat.completion"); assert_eq!(final_resp.inner.object, "chat.completion");
// Should get fallback response, not panic // Should get fallback response, not panic
} }
...@@ -415,7 +427,7 @@ mod tests { ...@@ -415,7 +427,7 @@ mod tests {
assert_eq!(extract_content(&results[0]), "Single chunk"); assert_eq!(extract_content(&results[0]), "Single chunk");
// Verify aggregation // Verify aggregation
assert_eq!(final_resp.object, "chat.completion"); assert_eq!(final_resp.inner.object, "chat.completion");
} }
#[tokio::test] #[tokio::test]
...@@ -423,32 +435,34 @@ mod tests { ...@@ -423,32 +435,34 @@ mod tests {
// Test that metadata (id, event, comment) is preserved through passthrough // Test that metadata (id, event, comment) is preserved through passthrough
let chunk_with_metadata = Annotated { let chunk_with_metadata = Annotated {
data: Some(NvCreateChatCompletionStreamResponse { data: Some(NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![{ id: "test-id".to_string(),
#[allow(deprecated)] choices: vec![{
ChatChoiceStream { #[allow(deprecated)]
index: 0, ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
role: Some(Role::Assistant), delta: ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text( role: Some(Role::Assistant),
"Content".to_string(), content: Some(ChatCompletionMessageContent::Text(
)), "Content".to_string(),
tool_calls: None, )),
function_call: None, tool_calls: None,
refusal: None, function_call: None,
reasoning_content: None, refusal: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
} logprobs: None,
}], }
created: 1234567890, }],
model: "test-model".to_string(), created: 1234567890,
system_fingerprint: None, model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}), }),
id: Some("correlation-123".to_string()), id: Some("correlation-123".to_string()),
...@@ -481,7 +495,7 @@ mod tests { ...@@ -481,7 +495,7 @@ mod tests {
let (resp1, resp2) = tokio::join!(future1, future2); let (resp1, resp2) = tokio::join!(future1, future2);
// Both should complete successfully // Both should complete successfully
assert_eq!(resp1.object, "chat.completion"); assert_eq!(resp1.inner.object, "chat.completion");
assert_eq!(resp2.object, "chat.completion"); assert_eq!(resp2.inner.object, "chat.completion");
} }
} }
...@@ -238,8 +238,9 @@ async fn evaluate( ...@@ -238,8 +238,9 @@ async fn evaluate(
match (item.data.as_ref(), item.event.as_deref()) { match (item.data.as_ref(), item.event.as_deref()) {
(Some(data), _) => { (Some(data), _) => {
// Normal case // Normal case
let choice = data.choices.first(); let Some(chat_comp) = data.inner.choices.first() else {
let chat_comp = choice.as_ref().unwrap(); continue;
};
if let Some(c) = &chat_comp.delta.content { if let Some(c) = &chat_comp.delta.content {
match c { match c {
ChatCompletionMessageContent::Text(text) => { ChatCompletionMessageContent::Text(text) => {
......
...@@ -138,8 +138,9 @@ async fn main_loop( ...@@ -138,8 +138,9 @@ async fn main_loop(
match (item.data.as_ref(), item.event.as_deref()) { match (item.data.as_ref(), item.event.as_deref()) {
(Some(data), _) => { (Some(data), _) => {
// Normal case // Normal case
let entry = data.choices.first(); let Some(chat_comp) = data.inner.choices.first() else {
let chat_comp = entry.as_ref().unwrap(); continue;
};
if let Some(c) = &chat_comp.delta.content { if let Some(c) = &chat_comp.delta.content {
match c { match c {
ChatCompletionMessageContent::Text(text) => { ChatCompletionMessageContent::Text(text) => {
......
...@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events( ...@@ -991,7 +991,7 @@ fn streaming_tool_dispatch_events(
}; };
let mut events = vec![]; let mut events = vec![];
for choice in &data.choices { for choice in &data.inner.choices {
let Some(tool_calls) = &choice.delta.tool_calls else { let Some(tool_calls) = &choice.delta.tool_calls else {
continue; continue;
}; };
...@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch( ...@@ -1034,7 +1034,7 @@ fn accumulate_reasoning_dispatch(
}; };
let mut events = vec![]; let mut events = vec![];
for choice in &data.choices { for choice in &data.inner.choices {
let buffer = buffers.entry(choice.index).or_default(); let buffer = buffers.entry(choice.index).or_default();
let has_reasoning = choice let has_reasoning = choice
.delta .delta
...@@ -2892,15 +2892,17 @@ mod tests { ...@@ -2892,15 +2892,17 @@ mod tests {
// Create a normal data event // Create a normal data event
let normal_event = Annotated::<NvCreateChatCompletionStreamResponse> { let normal_event = Annotated::<NvCreateChatCompletionStreamResponse> {
data: Some(CreateChatCompletionStreamResponse { data: Some(NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: CreateChatCompletionStreamResponse {
choices: vec![], id: "test-id".to_string(),
created: 0, choices: vec![],
model: "test-model".to_string(), created: 0,
system_fingerprint: None, model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
service_tier: None, object: "chat.completion.chunk".to_string(),
usage: None, service_tier: None,
usage: None,
},
nvext: None, nvext: None,
}), }),
id: Some("msg-1".to_string()), id: Some("msg-1".to_string()),
...@@ -3162,15 +3164,17 @@ mod tests { ...@@ -3162,15 +3164,17 @@ mod tests {
fn make_stream_response( fn make_stream_response(
choices: Vec<ChatChoiceStream>, choices: Vec<ChatChoiceStream>,
) -> Annotated<NvCreateChatCompletionStreamResponse> { ) -> Annotated<NvCreateChatCompletionStreamResponse> {
let response = CreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: "test-id".to_string(), inner: CreateChatCompletionStreamResponse {
choices, id: "test-id".to_string(),
created: 0, choices,
model: "test-model".to_string(), created: 0,
system_fingerprint: None, model: "test-model".to_string(),
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
service_tier: None, usage: None,
service_tier: None,
},
nvext: None, nvext: None,
}; };
Annotated { Annotated {
......
...@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse { ...@@ -128,7 +128,7 @@ impl LogprobExtractor for NvCreateChatCompletionStreamResponse {
fn extract_logprobs_by_choice(&self) -> HashMap<u32, Vec<TokenLogProbs>> { fn extract_logprobs_by_choice(&self) -> HashMap<u32, Vec<TokenLogProbs>> {
let mut result = HashMap::new(); let mut result = HashMap::new();
for choice in &self.choices { for choice in &self.inner.choices {
let choice_index = choice.index; let choice_index = choice.index;
let choice_logprobs = choice let choice_logprobs = choice
...@@ -949,34 +949,36 @@ mod tests { ...@@ -949,34 +949,36 @@ mod tests {
) -> NvCreateChatCompletionStreamResponse { ) -> NvCreateChatCompletionStreamResponse {
#[expect(deprecated)] #[expect(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "test_id".to_string(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: Some( delta: ChatCompletionStreamResponseDelta {
dynamo_async_openai::types::ChatCompletionMessageContent::Text( content: Some(
"test".to_string(), dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"test".to_string(),
),
), ),
), function_call: None,
function_call: None, tool_calls: None,
tool_calls: None, role: Some(Role::Assistant),
role: Some(Role::Assistant), refusal: None,
refusal: None, reasoning_content: None,
reasoning_content: None, },
}, finish_reason: Some(FinishReason::Stop),
finish_reason: Some(FinishReason::Stop), stop_reason: None,
stop_reason: None, logprobs: Some(ChatChoiceLogprobs {
logprobs: Some(ChatChoiceLogprobs { content: Some(token_logprobs),
content: Some(token_logprobs), refusal: None,
refusal: None, }),
}), }],
}], created: 1234567890,
created: 1234567890, model: "test-model".to_string(),
model: "test-model".to_string(), service_tier: None,
service_tier: None, system_fingerprint: None,
system_fingerprint: None, object: "chat.completion.chunk".to_string(),
object: "chat.completion.chunk".to_string(), usage: None,
usage: None, },
nvext: None, nvext: None,
} }
} }
...@@ -1012,14 +1014,16 @@ mod tests { ...@@ -1012,14 +1014,16 @@ mod tests {
.collect(); .collect();
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices, id: "test_id".to_string(),
created: 1234567890, choices,
model: "test-model".to_string(), created: 1234567890,
service_tier: None, model: "test-model".to_string(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -1341,31 +1345,33 @@ mod tests { ...@@ -1341,31 +1345,33 @@ mod tests {
// Test with choice that has no logprobs // Test with choice that has no logprobs
#[expect(deprecated)] #[expect(deprecated)]
let response = NvCreateChatCompletionStreamResponse { let response = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "test_id".to_string(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: Some( delta: ChatCompletionStreamResponseDelta {
dynamo_async_openai::types::ChatCompletionMessageContent::Text( content: Some(
"test".to_string(), dynamo_async_openai::types::ChatCompletionMessageContent::Text(
"test".to_string(),
),
), ),
), function_call: None,
function_call: None, tool_calls: None,
tool_calls: None, role: Some(Role::Assistant),
role: Some(Role::Assistant), refusal: None,
refusal: None, reasoning_content: None,
reasoning_content: None, },
}, finish_reason: Some(FinishReason::Stop),
finish_reason: Some(FinishReason::Stop), stop_reason: None,
stop_reason: None, logprobs: None, // No logprobs
logprobs: None, // No logprobs }],
}], created: 1234567890,
created: 1234567890, model: "test-model".to_string(),
model: "test-model".to_string(), service_tier: None,
service_tier: None, system_fingerprint: None,
system_fingerprint: None, object: "chat.completion.chunk".to_string(),
object: "chat.completion.chunk".to_string(), usage: None,
usage: None, },
nvext: None, nvext: None,
}; };
...@@ -1573,14 +1579,16 @@ mod tests { ...@@ -1573,14 +1579,16 @@ mod tests {
// In practice, this would have real logprobs data // In practice, this would have real logprobs data
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![], id: "test_id".to_string(),
created: 1234567890, choices: vec![],
model: "test-model".to_string(), created: 1234567890,
service_tier: None, model: "test-model".to_string(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".to_string(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".to_string(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
......
...@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor { ...@@ -1217,7 +1217,7 @@ impl OpenAIPreprocessor {
let processed_response = if let Some(ref mut parser) = state.reasoning_parser { let processed_response = if let Some(ref mut parser) = state.reasoning_parser {
response.map_data(|mut data| { response.map_data(|mut data| {
// Process all choices, not just the first one // Process all choices, not just the first one
for choice in data.choices.iter_mut() { for choice in data.inner.choices.iter_mut() {
// Reasoning parsing only applies to text content // Reasoning parsing only applies to text content
if let Some( if let Some(
dynamo_async_openai::types::ChatCompletionMessageContent::Text( dynamo_async_openai::types::ChatCompletionMessageContent::Text(
......
...@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream( ...@@ -111,7 +111,7 @@ pub fn maybe_wrap_stream(
let mut prefill_tx = Some(tx); let mut prefill_tx = Some(tx);
Box::pin(stream.map(move |item| { Box::pin(stream.map(move |item| {
if let Some(ref resp) = item.data { if let Some(ref resp) = item.data {
for choice in &resp.choices { for choice in &resp.inner.choices {
if let Some(ChatCompletionMessageContent::Text(ref text)) = choice.delta.content { if let Some(ChatCompletionMessageContent::Text(ref text)) = choice.delta.content {
accumulated_text.push_str(text); accumulated_text.push_str(text);
} }
......
...@@ -106,7 +106,7 @@ impl AnthropicStreamConverter { ...@@ -106,7 +106,7 @@ impl AnthropicStreamConverter {
let mut events = Vec::new(); let mut events = Vec::new();
// Capture real token usage from engine when available (typically on the final chunk). // Capture real token usage from engine when available (typically on the final chunk).
if let Some(usage) = &chunk.usage { if let Some(usage) = &chunk.inner.usage {
self.input_token_count = usage.prompt_tokens; self.input_token_count = usage.prompt_tokens;
self.output_token_count = usage.completion_tokens; self.output_token_count = usage.completion_tokens;
self.cached_token_count = usage self.cached_token_count = usage
...@@ -115,7 +115,7 @@ impl AnthropicStreamConverter { ...@@ -115,7 +115,7 @@ impl AnthropicStreamConverter {
.and_then(|d| d.cached_tokens); .and_then(|d| d.cached_tokens);
} }
for choice in &chunk.choices { for choice in &chunk.inner.choices {
let delta = &choice.delta; let delta = &choice.delta;
// Track finish reason // Track finish reason
...@@ -444,7 +444,7 @@ impl AnthropicStreamConverter { ...@@ -444,7 +444,7 @@ impl AnthropicStreamConverter {
) -> Vec<TaggedEvent> { ) -> Vec<TaggedEvent> {
let mut events = Vec::new(); let mut events = Vec::new();
if let Some(usage) = &chunk.usage { if let Some(usage) = &chunk.inner.usage {
self.input_token_count = usage.prompt_tokens; self.input_token_count = usage.prompt_tokens;
self.output_token_count = usage.completion_tokens; self.output_token_count = usage.completion_tokens;
self.cached_token_count = usage self.cached_token_count = usage
...@@ -453,7 +453,7 @@ impl AnthropicStreamConverter { ...@@ -453,7 +453,7 @@ impl AnthropicStreamConverter {
.and_then(|d| d.cached_tokens); .and_then(|d| d.cached_tokens);
} }
for choice in &chunk.choices { for choice in &chunk.inner.choices {
let delta = &choice.delta; let delta = &choice.delta;
if let Some(ref fr) = choice.finish_reason { if let Some(ref fr) = choice.finish_reason {
...@@ -722,27 +722,29 @@ mod tests { ...@@ -722,27 +722,29 @@ mod tests {
fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse { fn text_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)] #[allow(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "chat-1".into(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "chat-1".into(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: Some(ChatCompletionMessageContent::Text(text.into())), delta: ChatCompletionStreamResponseDelta {
function_call: None, content: Some(ChatCompletionMessageContent::Text(text.into())),
tool_calls: None, function_call: None,
role: None, tool_calls: None,
refusal: None, role: None,
reasoning_content: None, refusal: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
model: "test".into(), created: 0,
service_tier: None, model: "test".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".into(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -755,35 +757,37 @@ mod tests { ...@@ -755,35 +757,37 @@ mod tests {
) -> NvCreateChatCompletionStreamResponse { ) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)] #[allow(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "chat-1".into(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "chat-1".into(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: None, delta: ChatCompletionStreamResponseDelta {
function_call: None, content: None,
tool_calls: Some(vec![ChatCompletionMessageToolCallChunk { function_call: None,
index: tc_index, tool_calls: Some(vec![ChatCompletionMessageToolCallChunk {
id: id.map(String::from), index: tc_index,
r#type: Some(ChatCompletionToolType::Function), id: id.map(String::from),
function: Some(FunctionCallStream { r#type: Some(ChatCompletionToolType::Function),
name: name.map(String::from), function: Some(FunctionCallStream {
arguments: args.map(String::from), name: name.map(String::from),
}), arguments: args.map(String::from),
}]), }),
role: None, }]),
refusal: None, role: None,
reasoning_content: None, refusal: None,
}, reasoning_content: None,
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
model: "test".into(), created: 0,
service_tier: None, model: "test".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".into(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
...@@ -908,27 +912,29 @@ mod tests { ...@@ -908,27 +912,29 @@ mod tests {
fn reasoning_chunk(text: &str) -> NvCreateChatCompletionStreamResponse { fn reasoning_chunk(text: &str) -> NvCreateChatCompletionStreamResponse {
#[allow(deprecated)] #[allow(deprecated)]
NvCreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: "chat-1".into(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
choices: vec![ChatChoiceStream { id: "chat-1".into(),
index: 0, choices: vec![ChatChoiceStream {
delta: ChatCompletionStreamResponseDelta { index: 0,
content: None, delta: ChatCompletionStreamResponseDelta {
function_call: None, content: None,
tool_calls: None, function_call: None,
role: None, tool_calls: None,
refusal: None, role: None,
reasoning_content: Some(text.into()), refusal: None,
}, reasoning_content: Some(text.into()),
finish_reason: None, },
stop_reason: None, finish_reason: None,
logprobs: None, stop_reason: None,
}], logprobs: None,
created: 0, }],
model: "test".into(), created: 0,
service_tier: None, model: "test".into(),
system_fingerprint: None, service_tier: None,
object: "chat.completion.chunk".into(), system_fingerprint: None,
usage: None, object: "chat.completion.chunk".into(),
usage: None,
},
nvext: None, nvext: None,
} }
} }
......
This diff is collapsed.
...@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest { ...@@ -64,21 +64,24 @@ pub struct NvCreateChatCompletionRequest {
} }
/// A response structure for unary chat completion responses, embedding OpenAI's /// A response structure for unary chat completion responses, embedding OpenAI's
/// `CreateChatCompletionResponse`. /// `CreateChatCompletionResponse` with optional NVIDIA extension metadata.
/// #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
/// # Fields pub struct NvCreateChatCompletionResponse {
/// - `inner`: The base OpenAI unary chat completion response, embedded #[serde(flatten)]
/// using `serde(flatten)`. pub inner: dynamo_async_openai::types::CreateChatCompletionResponse,
pub type NvCreateChatCompletionResponse = dynamo_async_openai::types::CreateChatCompletionResponse; #[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
}
/// A response structure for streamed chat completions, embedding OpenAI's /// A response structure for streamed chat completions, embedding OpenAI's
/// `CreateChatCompletionStreamResponse`. /// `CreateChatCompletionStreamResponse` with optional NVIDIA extension metadata.
/// #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
/// # Fields pub struct NvCreateChatCompletionStreamResponse {
/// - `inner`: The base OpenAI streaming chat completion response, embedded #[serde(flatten)]
/// using `serde(flatten)`. pub inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse,
pub type NvCreateChatCompletionStreamResponse = #[serde(skip_serializing_if = "Option::is_none")]
dynamo_async_openai::types::CreateChatCompletionStreamResponse; pub nvext: Option<serde_json::Value>,
}
/// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`, /// Implements `NvExtProvider` for `NvCreateChatCompletionRequest`,
/// providing access to NVIDIA-specific extensions. /// providing access to NVIDIA-specific extensions.
......
...@@ -136,16 +136,16 @@ impl DeltaAggregator { ...@@ -136,16 +136,16 @@ impl DeltaAggregator {
if aggregator.error.is_none() if aggregator.error.is_none()
&& let Some(delta) = delta.data && let Some(delta) = delta.data
{ {
aggregator.id = delta.id; aggregator.id = delta.inner.id;
aggregator.model = delta.model; aggregator.model = delta.inner.model;
aggregator.created = delta.created; aggregator.created = delta.inner.created;
aggregator.service_tier = delta.service_tier; aggregator.service_tier = delta.inner.service_tier;
// Aggregate usage statistics if available. // Aggregate usage statistics if available.
if let Some(usage) = delta.usage { if let Some(usage) = delta.inner.usage {
aggregator.usage = Some(usage); aggregator.usage = Some(usage);
} }
if let Some(system_fingerprint) = delta.system_fingerprint { if let Some(system_fingerprint) = delta.inner.system_fingerprint {
aggregator.system_fingerprint = Some(system_fingerprint); aggregator.system_fingerprint = Some(system_fingerprint);
} }
...@@ -155,7 +155,7 @@ impl DeltaAggregator { ...@@ -155,7 +155,7 @@ impl DeltaAggregator {
} }
// Aggregate choices incrementally. // Aggregate choices incrementally.
for choice in delta.choices { for choice in delta.inner.choices {
let state_choice = let state_choice =
aggregator aggregator
.choices .choices
...@@ -267,14 +267,16 @@ impl DeltaAggregator { ...@@ -267,14 +267,16 @@ impl DeltaAggregator {
// Construct the final response object. // Construct the final response object.
let response = NvCreateChatCompletionResponse { let response = NvCreateChatCompletionResponse {
id: aggregator.id, inner: dynamo_async_openai::types::CreateChatCompletionResponse {
created: aggregator.created, id: aggregator.id,
usage: aggregator.usage, created: aggregator.created,
model: aggregator.model, usage: aggregator.usage,
object: "chat.completion".to_string(), model: aggregator.model,
system_fingerprint: aggregator.system_fingerprint, object: "chat.completion".to_string(),
choices, system_fingerprint: aggregator.system_fingerprint,
service_tier: aggregator.service_tier, choices,
service_tier: aggregator.service_tier,
},
nvext: aggregator.nvext, nvext: aggregator.nvext,
}; };
...@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator { ...@@ -360,7 +362,7 @@ pub trait ChatCompletionAggregator {
) -> Result<NvCreateChatCompletionResponse, String>; ) -> Result<NvCreateChatCompletionResponse, String>;
} }
impl ChatCompletionAggregator for dynamo_async_openai::types::CreateChatCompletionResponse { impl ChatCompletionAggregator for NvCreateChatCompletionResponse {
async fn from_annotated_stream( async fn from_annotated_stream(
stream: impl Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>>, stream: impl Stream<Item = Annotated<NvCreateChatCompletionStreamResponse>>,
parsing_options: ParsingOptions, parsing_options: ParsingOptions,
...@@ -445,14 +447,16 @@ mod tests { ...@@ -445,14 +447,16 @@ mod tests {
}; };
let data = NvCreateChatCompletionStreamResponse { let data = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
model: "meta/llama-3.1-8b-instruct".to_string(), id: "test_id".to_string(),
created: 1234567890, model: "meta/llama-3.1-8b-instruct".to_string(),
service_tier: None, created: 1234567890,
usage: None, service_tier: None,
system_fingerprint: None, usage: None,
choices: vec![choice], system_fingerprint: None,
object: "chat.completion".to_string(), choices: vec![choice],
object: "chat.completion".to_string(),
},
nvext: None, nvext: None,
}; };
...@@ -479,13 +483,13 @@ mod tests { ...@@ -479,13 +483,13 @@ mod tests {
let response = result.unwrap(); let response = result.unwrap();
// Verify that the response is empty and has default values // Verify that the response is empty and has default values
assert_eq!(response.id, ""); assert_eq!(response.inner.id, "");
assert_eq!(response.model, ""); assert_eq!(response.inner.model, "");
assert_eq!(response.created, 0); assert_eq!(response.inner.created, 0);
assert!(response.usage.is_none()); assert!(response.inner.usage.is_none());
assert!(response.system_fingerprint.is_none()); assert!(response.inner.system_fingerprint.is_none());
assert_eq!(response.choices.len(), 0); assert_eq!(response.inner.choices.len(), 0);
assert!(response.service_tier.is_none()); assert!(response.inner.service_tier.is_none());
} }
#[tokio::test] #[tokio::test]
...@@ -511,13 +515,13 @@ mod tests { ...@@ -511,13 +515,13 @@ mod tests {
let response = result.unwrap(); let response = result.unwrap();
// Verify the response fields // Verify the response fields
assert_eq!(response.id, "test_id"); assert_eq!(response.inner.id, "test_id");
assert_eq!(response.model, "meta/llama-3.1-8b-instruct"); assert_eq!(response.inner.model, "meta/llama-3.1-8b-instruct");
assert_eq!(response.created, 1234567890); assert_eq!(response.inner.created, 1234567890);
assert!(response.usage.is_none()); assert!(response.inner.usage.is_none());
assert!(response.system_fingerprint.is_none()); assert!(response.inner.system_fingerprint.is_none());
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
assert_eq!(choice.index, 0); assert_eq!(choice.index, 0);
assert_eq!( assert_eq!(
choice.message.content.as_ref().unwrap(), choice.message.content.as_ref().unwrap(),
...@@ -525,7 +529,7 @@ mod tests { ...@@ -525,7 +529,7 @@ mod tests {
); );
assert!(choice.finish_reason.is_none()); assert!(choice.finish_reason.is_none());
assert_eq!(choice.message.role, dynamo_async_openai::types::Role::User); assert_eq!(choice.message.role, dynamo_async_openai::types::Role::User);
assert!(response.service_tier.is_none()); assert!(response.inner.service_tier.is_none());
} }
#[tokio::test] #[tokio::test]
...@@ -562,8 +566,8 @@ mod tests { ...@@ -562,8 +566,8 @@ mod tests {
let response = result.unwrap(); let response = result.unwrap();
// Verify the response fields // Verify the response fields
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
assert_eq!(choice.index, 0); assert_eq!(choice.index, 0);
assert_eq!( assert_eq!(
choice.message.content.as_ref().unwrap(), choice.message.content.as_ref().unwrap(),
...@@ -630,8 +634,8 @@ mod tests { ...@@ -630,8 +634,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
assert_eq!(choice.index, 0); assert_eq!(choice.index, 0);
assert_eq!( assert_eq!(
...@@ -653,43 +657,49 @@ mod tests { ...@@ -653,43 +657,49 @@ mod tests {
// Create a delta with multiple choices // Create a delta with multiple choices
// ALLOW: function_call is deprecated // ALLOW: function_call is deprecated
let data = NvCreateChatCompletionStreamResponse { let data = NvCreateChatCompletionStreamResponse {
id: "test_id".to_string(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
model: "test_model".to_string(), id: "test_id".to_string(),
created: 1234567890, model: "test_model".to_string(),
service_tier: None, created: 1234567890,
usage: None, service_tier: None,
system_fingerprint: None, usage: None,
choices: vec![ system_fingerprint: None,
dynamo_async_openai::types::ChatChoiceStream { choices: vec![
index: 0, dynamo_async_openai::types::ChatChoiceStream {
delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta { index: 0,
role: Some(dynamo_async_openai::types::Role::Assistant), delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
content: Some(ChatCompletionMessageContent::Text("Choice 0".to_string())), role: Some(dynamo_async_openai::types::Role::Assistant),
function_call: None, content: Some(ChatCompletionMessageContent::Text(
tool_calls: None, "Choice 0".to_string(),
refusal: None, )),
reasoning_content: None, function_call: None,
tool_calls: None,
refusal: None,
reasoning_content: None,
},
finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
stop_reason: None,
logprobs: None,
}, },
finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop), dynamo_async_openai::types::ChatChoiceStream {
stop_reason: None, index: 1,
logprobs: None, delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta {
}, role: Some(dynamo_async_openai::types::Role::Assistant),
dynamo_async_openai::types::ChatChoiceStream { content: Some(ChatCompletionMessageContent::Text(
index: 1, "Choice 1".to_string(),
delta: dynamo_async_openai::types::ChatCompletionStreamResponseDelta { )),
role: Some(dynamo_async_openai::types::Role::Assistant), function_call: None,
content: Some(ChatCompletionMessageContent::Text("Choice 1".to_string())), tool_calls: None,
function_call: None, refusal: None,
tool_calls: None, reasoning_content: None,
refusal: None, },
reasoning_content: None, finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop),
stop_reason: None,
logprobs: None,
}, },
finish_reason: Some(dynamo_async_openai::types::FinishReason::Stop), ],
stop_reason: None, object: "chat.completion".to_string(),
logprobs: None, },
},
],
object: "chat.completion".to_string(),
nvext: None, nvext: None,
}; };
...@@ -711,9 +721,9 @@ mod tests { ...@@ -711,9 +721,9 @@ mod tests {
let mut response = result.unwrap(); let mut response = result.unwrap();
// Verify the response fields // Verify the response fields
assert_eq!(response.choices.len(), 2); assert_eq!(response.inner.choices.len(), 2);
response.choices.sort_by(|a, b| a.index.cmp(&b.index)); // Ensure the choices are ordered response.inner.choices.sort_by(|a, b| a.index.cmp(&b.index)); // Ensure the choices are ordered
let choice0 = &response.choices[0]; let choice0 = &response.inner.choices[0];
assert_eq!(choice0.index, 0); assert_eq!(choice0.index, 0);
assert_eq!( assert_eq!(
choice0.message.content.as_ref().unwrap(), choice0.message.content.as_ref().unwrap(),
...@@ -728,7 +738,7 @@ mod tests { ...@@ -728,7 +738,7 @@ mod tests {
dynamo_async_openai::types::Role::Assistant dynamo_async_openai::types::Role::Assistant
); );
let choice1 = &response.choices[1]; let choice1 = &response.inner.choices[1];
assert_eq!(choice1.index, 1); assert_eq!(choice1.index, 1);
assert_eq!( assert_eq!(
choice1.message.content.as_ref().unwrap(), choice1.message.content.as_ref().unwrap(),
...@@ -773,8 +783,8 @@ mod tests { ...@@ -773,8 +783,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// Verify tool calls are present // Verify tool calls are present
assert!(choice.message.tool_calls.is_some()); assert!(choice.message.tool_calls.is_some());
...@@ -816,8 +826,8 @@ mod tests { ...@@ -816,8 +826,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// Verify tool calls are present // Verify tool calls are present
assert!(choice.message.tool_calls.is_some()); assert!(choice.message.tool_calls.is_some());
...@@ -859,8 +869,8 @@ mod tests { ...@@ -859,8 +869,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// Verify tool calls are present // Verify tool calls are present
assert!(choice.message.tool_calls.is_some()); assert!(choice.message.tool_calls.is_some());
...@@ -900,8 +910,8 @@ mod tests { ...@@ -900,8 +910,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// Verify no tool calls are present // Verify no tool calls are present
assert!(choice.message.tool_calls.is_none()); assert!(choice.message.tool_calls.is_none());
...@@ -928,7 +938,7 @@ mod tests { ...@@ -928,7 +938,7 @@ mod tests {
// Manually set empty tool calls array // Manually set empty tool calls array
if let Some(ref mut data) = annotated_delta.data { if let Some(ref mut data) = annotated_delta.data {
data.choices[0].delta.tool_calls = Some(vec![]); // Empty tool calls array data.inner.choices[0].delta.tool_calls = Some(vec![]); // Empty tool calls array
} }
let data = annotated_delta.data.unwrap(); let data = annotated_delta.data.unwrap();
...@@ -945,8 +955,8 @@ mod tests { ...@@ -945,8 +955,8 @@ mod tests {
assert!(result.is_ok()); assert!(result.is_ok());
let response = result.unwrap(); let response = result.unwrap();
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// Verify tool calls array is empty // Verify tool calls array is empty
assert!(choice.message.tool_calls.is_none()); assert!(choice.message.tool_calls.is_none());
...@@ -992,8 +1002,8 @@ mod tests { ...@@ -992,8 +1002,8 @@ mod tests {
let response = result.unwrap(); let response = result.unwrap();
// There should be one choice // There should be one choice
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// The tool_calls field should be present and parsed // The tool_calls field should be present and parsed
assert!(choice.message.tool_calls.is_some()); assert!(choice.message.tool_calls.is_some());
...@@ -1050,8 +1060,8 @@ mod tests { ...@@ -1050,8 +1060,8 @@ mod tests {
let response = result.unwrap(); let response = result.unwrap();
// There should be one choice // There should be one choice
assert_eq!(response.choices.len(), 1); assert_eq!(response.inner.choices.len(), 1);
let choice = &response.choices[0]; let choice = &response.inner.choices[0];
// The finish_reason should be ToolCalls, not Stop, because tool calls are present // The finish_reason should be ToolCalls, not Stop, because tool calls are present
assert_eq!( assert_eq!(
......
...@@ -278,19 +278,21 @@ impl DeltaGenerator { ...@@ -278,19 +278,21 @@ impl DeltaGenerator {
// According to OpenAI spec: when stream_options.include_usage is true, // According to OpenAI spec: when stream_options.include_usage is true,
// all intermediate chunks should have usage: null // all intermediate chunks should have usage: null
// The final usage chunk will be sent separately with empty choices // The final usage chunk will be sent separately with empty choices
dynamo_async_openai::types::CreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: self.id.clone(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
object: self.object.clone(), id: self.id.clone(),
created: self.created, object: self.object.clone(),
model: self.model.clone(), created: self.created,
system_fingerprint: self.system_fingerprint.clone(), model: self.model.clone(),
choices, system_fingerprint: self.system_fingerprint.clone(),
usage: if self.options.enable_usage && self.options.continuous_usage_stats { choices,
Some(self.get_usage()) usage: if self.options.enable_usage && self.options.continuous_usage_stats {
} else { Some(self.get_usage())
None } else {
None
},
service_tier: self.service_tier.clone(),
}, },
service_tier: self.service_tier.clone(),
nvext: None, // Will be populated by router layer if needed nvext: None, // Will be populated by router layer if needed
} }
} }
...@@ -303,15 +305,17 @@ impl DeltaGenerator { ...@@ -303,15 +305,17 @@ impl DeltaGenerator {
pub fn create_usage_chunk(&self) -> NvCreateChatCompletionStreamResponse { pub fn create_usage_chunk(&self) -> NvCreateChatCompletionStreamResponse {
let usage = self.get_usage(); let usage = self.get_usage();
dynamo_async_openai::types::CreateChatCompletionStreamResponse { NvCreateChatCompletionStreamResponse {
id: self.id.clone(), inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
object: self.object.clone(), id: self.id.clone(),
created: self.created, object: self.object.clone(),
model: self.model.clone(), created: self.created,
system_fingerprint: self.system_fingerprint.clone(), model: self.model.clone(),
choices: vec![], // Empty choices for usage-only chunk system_fingerprint: self.system_fingerprint.clone(),
usage: Some(usage), choices: vec![], // Empty choices for usage-only chunk
service_tier: self.service_tier.clone(), usage: Some(usage),
service_tier: self.service_tier.clone(),
},
nvext: None, nvext: None,
} }
} }
......
...@@ -525,13 +525,13 @@ impl JailedStream { ...@@ -525,13 +525,13 @@ impl JailedStream {
// Process each item in the stream // Process each item in the stream
while let Some(response) = stream.next().await { while let Some(response) = stream.next().await {
if let Some(chat_response) = response.data.as_ref() { if let Some(chat_response) = response.data.as_ref() {
last_stream_id.clone_from(&chat_response.id); last_stream_id.clone_from(&chat_response.inner.id);
last_stream_model.clone_from(&chat_response.model); last_stream_model.clone_from(&chat_response.inner.model);
last_stream_created = chat_response.created; last_stream_created = chat_response.inner.created;
let mut all_emissions = Vec::new(); let mut all_emissions = Vec::new();
if chat_response.choices.is_empty() { if chat_response.inner.choices.is_empty() {
// No choices processed (e.g., usage-only chunk) // No choices processed (e.g., usage-only chunk)
// Pass through as-is to preserve usage and other metadata // Pass through as-is to preserve usage and other metadata
yield response; yield response;
...@@ -539,7 +539,7 @@ impl JailedStream { ...@@ -539,7 +539,7 @@ impl JailedStream {
} }
// Process each choice independently using the new architecture // Process each choice independently using the new architecture
for choice in &chat_response.choices { for choice in &chat_response.inner.choices {
if let Some(ref content) = choice.delta.content { if let Some(ref content) = choice.delta.content {
// Jailing only applies to text content // Jailing only applies to text content
let text_content = match content { let text_content = match content {
...@@ -676,14 +676,16 @@ impl JailedStream { ...@@ -676,14 +676,16 @@ impl JailedStream {
tracing::debug!("Stream ended while jailed, releasing accumulated content"); tracing::debug!("Stream ended while jailed, releasing accumulated content");
// Create a finalization response carrying forward real stream metadata // Create a finalization response carrying forward real stream metadata
let dummy_response = NvCreateChatCompletionStreamResponse { let dummy_response = NvCreateChatCompletionStreamResponse {
id: last_stream_id, inner: dynamo_async_openai::types::CreateChatCompletionStreamResponse {
object: "chat.completion.chunk".to_string(), id: last_stream_id,
created: last_stream_created, object: "chat.completion.chunk".to_string(),
model: last_stream_model, created: last_stream_created,
choices: Vec::new(), model: last_stream_model,
usage: None, choices: Vec::new(),
service_tier: None, usage: None,
system_fingerprint: None, service_tier: None,
system_fingerprint: None,
},
nvext: None, nvext: None,
}; };
...@@ -713,7 +715,7 @@ impl JailedStream { ...@@ -713,7 +715,7 @@ impl JailedStream {
EmissionMode::Packed => { EmissionMode::Packed => {
// Pack all choices into a single response // Pack all choices into a single response
let mut response = base_response.clone(); let mut response = base_response.clone();
response.choices = emissions.into_iter().map(|e| e.into_choice()).collect(); response.inner.choices = emissions.into_iter().map(|e| e.into_choice()).collect();
vec![Annotated { vec![Annotated {
data: Some(response), data: Some(response),
...@@ -729,7 +731,7 @@ impl JailedStream { ...@@ -729,7 +731,7 @@ impl JailedStream {
.into_iter() .into_iter()
.map(|emission| { .map(|emission| {
let mut response = base_response.clone(); let mut response = base_response.clone();
response.choices = vec![emission.into_choice()]; response.inner.choices = vec![emission.into_choice()];
Annotated { Annotated {
data: Some(response), data: Some(response),
...@@ -1013,7 +1015,7 @@ impl JailedStream { ...@@ -1013,7 +1015,7 @@ impl JailedStream {
while let Some(mut response) = input_stream.next().await { while let Some(mut response) = input_stream.next().await {
// Track if any choice emitted tool calls // Track if any choice emitted tool calls
if let Some(ref data) = response.data { if let Some(ref data) = response.data {
for choice in &data.choices { for choice in &data.inner.choices {
if choice.delta.tool_calls.is_some() { if choice.delta.tool_calls.is_some() {
has_tool_calls_per_choice.insert(choice.index, true); has_tool_calls_per_choice.insert(choice.index, true);
} }
...@@ -1022,7 +1024,7 @@ impl JailedStream { ...@@ -1022,7 +1024,7 @@ impl JailedStream {
// Fix finish_reason based on jail mode and whether tool calls were emitted // Fix finish_reason based on jail mode and whether tool calls were emitted
if let Some(ref mut data) = response.data { if let Some(ref mut data) = response.data {
for choice in &mut data.choices { for choice in &mut data.inner.choices {
if let Some(finish) = choice.finish_reason { if let Some(finish) = choice.finish_reason {
// Only modify Stop finish reason, preserve Length/ContentFilter // Only modify Stop finish reason, preserve Length/ContentFilter
if finish == FinishReason::Stop { if finish == FinishReason::Stop {
......
...@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest { ...@@ -48,6 +48,8 @@ pub struct NvCreateCompletionRequest {
pub struct NvCreateCompletionResponse { pub struct NvCreateCompletionResponse {
#[serde(flatten)] #[serde(flatten)]
pub inner: dynamo_async_openai::types::CreateCompletionResponse, pub inner: dynamo_async_openai::types::CreateCompletionResponse,
#[serde(skip_serializing_if = "Option::is_none")]
pub nvext: Option<serde_json::Value>,
} }
impl ContentProvider for dynamo_async_openai::types::Choice { impl ContentProvider for dynamo_async_openai::types::Choice {
...@@ -296,9 +298,8 @@ impl ResponseFactory { ...@@ -296,9 +298,8 @@ impl ResponseFactory {
choices: vec![choice], choices: vec![choice],
system_fingerprint: self.system_fingerprint.clone(), system_fingerprint: self.system_fingerprint.clone(),
usage, usage,
nvext: None, // Will be populated by router layer if needed
}; };
NvCreateCompletionResponse { inner } NvCreateCompletionResponse { inner, nvext: None }
} }
} }
......
...@@ -86,8 +86,8 @@ impl DeltaAggregator { ...@@ -86,8 +86,8 @@ impl DeltaAggregator {
aggregator.system_fingerprint = Some(system_fingerprint); aggregator.system_fingerprint = Some(system_fingerprint);
} }
// Aggregate nvext field (take the last non-None value) // Aggregate nvext field (take the last non-None value)
if delta.inner.nvext.is_some() { if delta.nvext.is_some() {
aggregator.nvext = delta.inner.nvext; aggregator.nvext = delta.nvext;
} }
// handle the choices // handle the choices
...@@ -168,10 +168,12 @@ impl DeltaAggregator { ...@@ -168,10 +168,12 @@ impl DeltaAggregator {
object: "text_completion".to_string(), object: "text_completion".to_string(),
system_fingerprint: aggregator.system_fingerprint, system_fingerprint: aggregator.system_fingerprint,
choices, choices,
nvext: aggregator.nvext,
}; };
let response = NvCreateCompletionResponse { inner }; let response = NvCreateCompletionResponse {
inner,
nvext: aggregator.nvext,
};
Ok(response) Ok(response)
} }
...@@ -256,10 +258,9 @@ mod tests { ...@@ -256,10 +258,9 @@ mod tests {
logprobs, logprobs,
}], }],
object: "text_completion".to_string(), object: "text_completion".to_string(),
nvext: None,
}; };
let response = NvCreateCompletionResponse { inner }; let response = NvCreateCompletionResponse { inner, nvext: None };
Annotated { Annotated {
data: Some(response), data: Some(response),
...@@ -387,10 +388,9 @@ mod tests { ...@@ -387,10 +388,9 @@ mod tests {
}, },
], ],
object: "text_completion".to_string(), object: "text_completion".to_string(),
nvext: None,
}; };
let response = NvCreateCompletionResponse { inner }; let response = NvCreateCompletionResponse { inner, nvext: None };
let annotated_delta = Annotated { let annotated_delta = Annotated {
data: Some(response), data: Some(response),
......
...@@ -218,10 +218,9 @@ impl DeltaGenerator { ...@@ -218,10 +218,9 @@ impl DeltaGenerator {
} else { } else {
None None
}, },
nvext: None, // Will be populated by router layer if needed
}; };
NvCreateCompletionResponse { inner } NvCreateCompletionResponse { inner, nvext: None }
} }
/// Creates a final usage-only chunk for OpenAI compliance. /// Creates a final usage-only chunk for OpenAI compliance.
...@@ -240,10 +239,9 @@ impl DeltaGenerator { ...@@ -240,10 +239,9 @@ impl DeltaGenerator {
system_fingerprint: self.system_fingerprint.clone(), system_fingerprint: self.system_fingerprint.clone(),
choices: vec![], // Empty choices for usage-only chunk choices: vec![], // Empty choices for usage-only chunk
usage: Some(usage), usage: Some(usage),
nvext: None, // Will be populated by router layer if needed
}; };
NvCreateCompletionResponse { inner } NvCreateCompletionResponse { inner, nvext: None }
} }
/// Check if usage tracking is enabled /// Check if usage tracking is enabled
...@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for ...@@ -343,7 +341,7 @@ impl crate::protocols::openai::DeltaGeneratorExt<NvCreateCompletionResponse> for
}; };
if let Ok(nvext_json) = serde_json::to_value(&nvext_response) { if let Ok(nvext_json) = serde_json::to_value(&nvext_response) {
response.inner.nvext = Some(nvext_json); response.nvext = Some(nvext_json);
if let Some(ref info) = worker_id_info { if let Some(ref info) = worker_id_info {
tracing::debug!( tracing::debug!(
"Injected worker_id into completions nvext: prefill={:?}, decode={:?}", "Injected worker_id into completions nvext: prefill={:?}, decode={:?}",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment