Unverified Commit c3ecaf6c authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: LLM metrics for non-streaming requests in frontend (#2427)

parent 72ec5f5c
......@@ -290,7 +290,11 @@ async fn completions(
Ok(sse_stream.into_response())
} else {
// TODO: report ISL/OSL for non-streaming requests
// Tap the stream to collect metrics for non-streaming requests without altering items
let stream = stream.inspect(move |response| {
process_metrics_only(response, &mut response_collector);
});
let response = NvCreateCompletionResponse::from_annotated_stream(stream)
.await
.map_err(|e| {
......@@ -515,7 +519,10 @@ async fn chat_completions(
Ok(sse_stream.into_response())
} else {
// TODO: report ISL/OSL for non-streaming requests
let stream = stream.inspect(move |response| {
process_metrics_only(response, &mut response_collector);
});
let response = NvCreateChatCompletionResponse::from_annotated_stream(stream)
.await
.map_err(|e| {
......@@ -911,6 +918,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> {
}
}
fn process_metrics_only<T>(
annotated: &Annotated<T>,
response_collector: &mut ResponseMetricCollector,
) {
// update metrics
if let Ok(Some(metrics)) = LLMMetricAnnotation::from_annotation(annotated) {
response_collector.observe_current_osl(metrics.output_tokens);
response_collector.observe_response(metrics.input_tokens, metrics.chunk_tokens);
}
}
fn process_event_converter<T: Serialize>(
annotated: EventConverter<T>,
response_collector: &mut ResponseMetricCollector,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment