response_formatting.rs 2.11 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
//! Shared response formatting logic
//!
//! This module contains common logic for formatting responses, including:
//! - Usage calculation from gRPC responses
//! - ChatCompletionResponse construction

use crate::{
    grpc_client::proto,
    protocols::{
        chat::{ChatChoice, ChatCompletionResponse},
        common::Usage,
    },
    routers::grpc::context::DispatchMetadata,
};

/// Build usage information from collected gRPC responses
///
/// Sums prompt_tokens and completion_tokens across all responses.
/// Typically used with n>1 parameter where multiple completions are generated.
///
/// # Arguments
/// * `responses` - Vector of GenerateComplete responses from the backend
///
/// # Returns
/// Usage object with aggregated token counts
pub fn build_usage(responses: &[proto::GenerateComplete]) -> Usage {
    let total_prompt_tokens: u32 = responses.iter().map(|r| r.prompt_tokens as u32).sum();
    let total_completion_tokens: u32 = responses.iter().map(|r| r.completion_tokens as u32).sum();

    Usage {
        prompt_tokens: total_prompt_tokens,
        completion_tokens: total_completion_tokens,
        total_tokens: total_prompt_tokens + total_completion_tokens,
        completion_tokens_details: None,
    }
}

/// Build final ChatCompletionResponse from processed choices
///
/// Constructs the OpenAI-compatible response object with all metadata.
///
/// # Arguments
/// * `choices` - Processed chat choices (after parsing, logprobs, etc.)
/// * `dispatch` - Dispatch metadata (request_id, created timestamp, etc.)
/// * `model` - Model name to include in response
/// * `usage` - Token usage information
///
/// # Returns
/// Complete ChatCompletionResponse ready to send to client
pub fn build_chat_response(
    choices: Vec<ChatChoice>,
    dispatch: &DispatchMetadata,
    model: String,
    usage: Usage,
) -> ChatCompletionResponse {
    ChatCompletionResponse {
        id: dispatch.request_id.clone(),
        object: "chat.completion".to_string(),
        created: dispatch.created,
        model,
        choices,
        usage: Some(usage),
        system_fingerprint: dispatch.weight_version.clone(),
    }
}