"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "9e8731e50da395930b0017cc965c09e0fecae4d8"
Unverified Commit c3ecaf6c authored by Hongkuan Zhou's avatar Hongkuan Zhou Committed by GitHub
Browse files

feat: LLM metrics for non-streaming requests in frontend (#2427)

parent 72ec5f5c
...@@ -290,7 +290,11 @@ async fn completions( ...@@ -290,7 +290,11 @@ async fn completions(
Ok(sse_stream.into_response()) Ok(sse_stream.into_response())
} else { } else {
// TODO: report ISL/OSL for non-streaming requests // Tap the stream to collect metrics for non-streaming requests without altering items
let stream = stream.inspect(move |response| {
process_metrics_only(response, &mut response_collector);
});
let response = NvCreateCompletionResponse::from_annotated_stream(stream) let response = NvCreateCompletionResponse::from_annotated_stream(stream)
.await .await
.map_err(|e| { .map_err(|e| {
...@@ -515,7 +519,10 @@ async fn chat_completions( ...@@ -515,7 +519,10 @@ async fn chat_completions(
Ok(sse_stream.into_response()) Ok(sse_stream.into_response())
} else { } else {
// TODO: report ISL/OSL for non-streaming requests let stream = stream.inspect(move |response| {
process_metrics_only(response, &mut response_collector);
});
let response = NvCreateChatCompletionResponse::from_annotated_stream(stream) let response = NvCreateChatCompletionResponse::from_annotated_stream(stream)
.await .await
.map_err(|e| { .map_err(|e| {
...@@ -911,6 +918,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> { ...@@ -911,6 +918,17 @@ impl<T> From<Annotated<T>> for EventConverter<T> {
} }
} }
fn process_metrics_only<T>(
annotated: &Annotated<T>,
response_collector: &mut ResponseMetricCollector,
) {
// update metrics
if let Ok(Some(metrics)) = LLMMetricAnnotation::from_annotation(annotated) {
response_collector.observe_current_osl(metrics.output_tokens);
response_collector.observe_response(metrics.input_tokens, metrics.chunk_tokens);
}
}
fn process_event_converter<T: Serialize>( fn process_event_converter<T: Serialize>(
annotated: EventConverter<T>, annotated: EventConverter<T>,
response_collector: &mut ResponseMetricCollector, response_collector: &mut ResponseMetricCollector,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment