fix: missing tool calling usage (#4516)

Signed-off-by: ayushag <ayushag@nvidia.com>

fix: missing tool calling usage (#4516)
Signed-off-by: ayushag <ayushag@nvidia.com>
179ee38b · Ayush Agarwal · GitHub · e7544f19 · 179ee38b · 179ee38b
Unverified Commit 179ee38b authored Nov 20, 2025 by Ayush Agarwal Committed by GitHub Nov 20, 2025
Show whitespace changes
Inline Side-by-side

Showing with 57 additions and 1 deletion

lib/llm/src/protocols/openai/chat_completions/jail.rs lib/llm/src/protocols/openai/chat_completions/jail.rs +7 -0

lib/llm/tests/test_jail.rs lib/llm/tests/test_jail.rs +50 -1

No files found.
--- a/lib/llm/src/protocols/openai/chat_completions/jail.rs
+++ b/lib/llm/src/protocols/openai/chat_completions/jail.rs
@@ -470,6 +470,13 @@ impl JailedStream {
                if let Some(chat_response) = response.data.as_ref() {
                    let mut all_emissions = Vec::new();
+                    if chat_response.choices.is_empty() {
+                        // No choices processed (e.g., usage-only chunk)
+                        // Pass through as-is to preserve usage and other metadata
+                        yield response;
+                        continue;
+                    }
                    // Process each choice independently using the new architecture
                    for choice in &chat_response.choices {
                        if let Some(ref content) = choice.delta.content {

--- a/lib/llm/tests/test_jail.rs
+++ b/lib/llm/tests/test_jail.rs
 // SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 // SPDX-License-Identifier: Apache-2.0
 use dynamo_async_openai::types::{
-    ChatChoiceStream, ChatCompletionStreamResponseDelta, FinishReason, Role,
+    ChatChoiceStream, ChatCompletionStreamResponseDelta, CompletionUsage, FinishReason, Role,
 };
 use dynamo_llm::protocols::openai::chat_completions::NvCreateChatCompletionStreamResponse;
 use dynamo_llm::protocols::openai::chat_completions::jail::JailedStream;
@@ -1633,6 +1633,55 @@ mod tests {
        }
    }
+    #[tokio::test]
+    async fn test_usage_chunk_preserved() {
+        // Create one chunk with choices (content) and one chunk with only usage/no choices.
+        let content_chunk = create_mock_response_chunk("Hello, world!".to_string(), 0);
+        let mut usage_chunk = content_chunk.clone();
+        // Modify the inner data to be a usage-only chunk
+        if let Some(ref mut data) = usage_chunk.data {
+            data.choices.clear();
+            data.usage = Some(CompletionUsage {
+                prompt_tokens: 11,
+                completion_tokens: 3,
+                total_tokens: 14,
+                prompt_tokens_details: None,
+                completion_tokens_details: None,
+            });
+        }
+        let input_chunks = vec![content_chunk, usage_chunk];
+        let input_stream = stream::iter(input_chunks);
+        let jail = JailedStream::builder().build();
+        let results: Vec<_> = jail.apply(input_stream).collect().await;
+        // Validate we have exactly 2 chunks
+        assert_eq!(results.len(), 2, "Should have exactly 2 chunks");
+        // First chunk should be content chunk
+        let content = results[0].data.as_ref().unwrap().choices[0]
+            .delta
+            .content
+            .as_ref()
+            .unwrap();
+        assert_eq!(
+            content, "Hello, world!",
+            "Content chunk should have 'Hello, world!'"
+        );
+        // Second chunk should be usage-only chunk
+        assert!(
+            results[1].data.as_ref().unwrap().choices.is_empty(),
+            "Usage chunk should have no choices"
+        );
+        let usage = results[1].data.as_ref().unwrap().usage.as_ref().unwrap();
+        assert_eq!(usage.prompt_tokens, 11);
+        assert_eq!(usage.completion_tokens, 3);
+        assert_eq!(usage.total_tokens, 14);
+    }
    #[tokio::test]
    async fn test_multiple_choices_usage_aggregation() {
        // Test that usage is correctly aggregated across multiple choices