"docs/vscode:/vscode.git/clone" did not exist on "ef51831ee8dbd64833b25e042d4e984d169202f9"
Unverified Commit 179ee38b authored by Ayush Agarwal's avatar Ayush Agarwal Committed by GitHub
Browse files

fix: missing tool calling usage (#4516)


Signed-off-by: default avatarayushag <ayushag@nvidia.com>
parent e7544f19
...@@ -470,6 +470,13 @@ impl JailedStream { ...@@ -470,6 +470,13 @@ impl JailedStream {
if let Some(chat_response) = response.data.as_ref() { if let Some(chat_response) = response.data.as_ref() {
let mut all_emissions = Vec::new(); let mut all_emissions = Vec::new();
if chat_response.choices.is_empty() {
// No choices processed (e.g., usage-only chunk)
// Pass through as-is to preserve usage and other metadata
yield response;
continue;
}
// Process each choice independently using the new architecture // Process each choice independently using the new architecture
for choice in &chat_response.choices { for choice in &chat_response.choices {
if let Some(ref content) = choice.delta.content { if let Some(ref content) = choice.delta.content {
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use dynamo_async_openai::types::{ use dynamo_async_openai::types::{
ChatChoiceStream, ChatCompletionStreamResponseDelta, FinishReason, Role, ChatChoiceStream, ChatCompletionStreamResponseDelta, CompletionUsage, FinishReason, Role,
}; };
use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse; use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream; use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream;
...@@ -1633,6 +1633,55 @@ mod tests { ...@@ -1633,6 +1633,55 @@ mod tests {
} }
} }
#[tokio::test]
async fn test_usage_chunk_preserved() {
// Create one chunk with choices (content) and one chunk with only usage/no choices.
let content_chunk = create_mock_response_chunk("Hello, world!".to_string(), 0);
let mut usage_chunk = content_chunk.clone();
// Modify the inner data to be a usage-only chunk
if let Some(ref mut data) = usage_chunk.data {
data.choices.clear();
data.usage = Some(CompletionUsage {
prompt_tokens: 11,
completion_tokens: 3,
total_tokens: 14,
prompt_tokens_details: None,
completion_tokens_details: None,
});
}
let input_chunks = vec![content_chunk, usage_chunk];
let input_stream = stream::iter(input_chunks);
let jail = JailedStream::builder().build();
let results: Vec<_> = jail.apply(input_stream).collect().await;
// Validate we have exactly 2 chunks
assert_eq!(results.len(), 2, "Should have exactly 2 chunks");
// First chunk should be content chunk
let content = results[0].data.as_ref().unwrap().choices[0]
.delta
.content
.as_ref()
.unwrap();
assert_eq!(
content, "Hello, world!",
"Content chunk should have 'Hello, world!'"
);
// Second chunk should be usage-only chunk
assert!(
results[1].data.as_ref().unwrap().choices.is_empty(),
"Usage chunk should have no choices"
);
let usage = results[1].data.as_ref().unwrap().usage.as_ref().unwrap();
assert_eq!(usage.prompt_tokens, 11);
assert_eq!(usage.completion_tokens, 3);
assert_eq!(usage.total_tokens, 14);
}
#[tokio::test] #[tokio::test]
async fn test_multiple_choices_usage_aggregation() { async fn test_multiple_choices_usage_aggregation() {
// Test that usage is correctly aggregated across multiple choices // Test that usage is correctly aggregated across multiple choices
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment