"docs/components/backends/vscode:/vscode.git/clone" did not exist on "b640f283f329c2ca901afbb6a9824e89d7ba38ec"
Unverified Commit 432fae67 authored by Vladislav Nosivskoy's avatar Vladislav Nosivskoy Committed by GitHub
Browse files

feat: add error_type label to reqs metric for fine-grained error classification (#5568)


Signed-off-by: default avatarVladislav Nosivskoy <vladnosiv@gmail.com>
Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarCursor <cursoragent@cursor.com>
parent c0f34b15
......@@ -33,7 +33,7 @@ use dynamo_runtime::engine::AsyncEngineContext;
use futures::{Stream, StreamExt};
use std::sync::Arc;
use crate::http::service::metrics::{InflightGuard, Metrics};
use crate::http::service::metrics::{ErrorType, InflightGuard, Metrics};
#[derive(Clone, Copy)]
pub enum ConnectionStatus {
......@@ -171,6 +171,12 @@ pub fn monitor_for_disconnects(
mut stream_handle: ConnectionHandle,
) -> impl Stream<Item = Result<Event, axum::Error>> {
stream_handle.arm();
// Default to Cancelled: if the stream is dropped unexpectedly (e.g. client
// disconnect causing a broken-pipe on the SSE write), the guard will report
// "cancelled" instead of "internal". The happy path overrides this via mark_ok().
inflight_guard.mark_error(ErrorType::Cancelled);
async_stream::try_stream! {
tokio::pin!(stream);
loop {
......@@ -181,7 +187,11 @@ pub fn monitor_for_disconnects(
yield event;
}
Some(Err(err)) => {
// Mark error as internal since it's a streaming error
inflight_guard.mark_error(ErrorType::Internal);
yield Event::default().event("error").comment(err.to_string());
// Break to prevent any subsequent mark_ok() from overwriting the error
break;
}
None => {
// Stream ended normally
......@@ -197,6 +207,8 @@ pub fn monitor_for_disconnects(
}
_ = context.stopped() => {
tracing::trace!("Context stopped; breaking stream");
// Mark as cancelled when context is stopped (client disconnect or timeout)
inflight_guard.mark_error(ErrorType::Cancelled);
break;
}
}
......
......@@ -273,11 +273,13 @@ pub struct InflightGuard {
endpoint: Endpoint,
request_type: RequestType,
status: Status,
error_type: ErrorType,
timer: Instant,
}
/// Requests will be logged by the type of endpoint hit
/// This will include llamastack in the future
#[derive(Clone, Copy)]
pub enum Endpoint {
/// OAI Completions
Completions,
......@@ -320,6 +322,25 @@ pub enum Status {
Error,
}
/// Error type classification for fine-grained observability
#[derive(PartialEq, Clone, Debug)]
pub enum ErrorType {
/// No error (for successful requests)
None,
/// Client validation error (4xx with "Validation:" prefix)
Validation,
/// Model or resource not found (404)
NotFound,
/// Service overloaded, too many requests (503)
Overload,
/// Request cancelled by client or timeout
Cancelled,
/// Internal server error (500 and other unexpected errors)
Internal,
/// Feature not implemented (501)
NotImplemented,
}
/// Track response-specific metrics
pub struct ResponseMetricCollector {
metrics: Arc<Metrics>,
......@@ -422,7 +443,7 @@ impl Metrics {
frontend_metric_name(frontend_service::REQUESTS_TOTAL),
"Total number of LLM requests processed",
),
&["model", "endpoint", "request_type", "status"],
&["model", "endpoint", "request_type", "status", "error_type"],
)
.unwrap();
......@@ -657,6 +678,7 @@ impl Metrics {
endpoint: &Endpoint,
request_type: &RequestType,
status: &Status,
error_type: &ErrorType,
) -> u64 {
self.request_counter
.with_label_values(&[
......@@ -664,6 +686,7 @@ impl Metrics {
endpoint.as_str(),
request_type.as_str(),
status.as_str(),
error_type.as_str(),
])
.get()
}
......@@ -679,6 +702,7 @@ impl Metrics {
endpoint: &Endpoint,
request_type: &RequestType,
status: &Status,
error_type: &ErrorType,
) {
self.request_counter
.with_label_values(&[
......@@ -686,6 +710,7 @@ impl Metrics {
endpoint.as_str(),
request_type.as_str(),
status.as_str(),
error_type.as_str(),
])
.inc()
}
......@@ -908,12 +933,19 @@ impl InflightGuard {
endpoint,
request_type,
status: Status::Error,
error_type: ErrorType::Internal,
timer,
}
}
pub(crate) fn mark_ok(&mut self) {
self.status = Status::Success;
self.error_type = ErrorType::None;
}
pub(crate) fn mark_error(&mut self, error_type: ErrorType) {
self.status = Status::Error;
self.error_type = error_type;
}
}
......@@ -932,6 +964,7 @@ impl Drop for InflightGuard {
&self.endpoint,
&self.request_type,
&self.status,
&self.error_type,
);
// Record the duration of the request
......@@ -990,6 +1023,20 @@ impl Status {
}
}
impl ErrorType {
pub fn as_str(&self) -> &'static str {
match self {
ErrorType::None => frontend_service::error_type::NONE,
ErrorType::Validation => frontend_service::error_type::VALIDATION,
ErrorType::NotFound => frontend_service::error_type::NOT_FOUND,
ErrorType::Overload => frontend_service::error_type::OVERLOAD,
ErrorType::Cancelled => frontend_service::error_type::CANCELLED,
ErrorType::Internal => frontend_service::error_type::INTERNAL,
ErrorType::NotImplemented => frontend_service::error_type::NOT_IMPLEMENTED,
}
}
}
impl ResponseMetricCollector {
fn new(metrics: Arc<Metrics>, model: String) -> Self {
ResponseMetricCollector {
......@@ -1940,4 +1987,229 @@ mod tests {
detokenize_metric.get_histogram().get_sample_sum()
);
}
#[test]
fn test_error_type_as_str() {
assert_eq!(ErrorType::None.as_str(), "");
assert_eq!(ErrorType::Validation.as_str(), "validation");
assert_eq!(ErrorType::NotFound.as_str(), "not_found");
assert_eq!(ErrorType::Overload.as_str(), "overload");
assert_eq!(ErrorType::Cancelled.as_str(), "cancelled");
assert_eq!(ErrorType::Internal.as_str(), "internal");
assert_eq!(ErrorType::NotImplemented.as_str(), "not_implemented");
}
#[test]
fn test_inflight_guard_marks_success_with_correct_error_type() {
let metrics = Arc::new(Metrics::new());
let registry = prometheus::Registry::new();
metrics.register(&registry).unwrap();
let model = "test-model";
{
let mut guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::ChatCompletions, false);
guard.mark_ok();
} // guard drops here
// Verify counter incremented with status=success, error_type=""
let counter_value = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::ChatCompletions.as_str(),
RequestType::Unary.as_str(),
Status::Success.as_str(),
ErrorType::None.as_str(),
])
.get();
assert_eq!(counter_value, 1);
}
#[test]
fn test_inflight_guard_marks_validation_error() {
let metrics = Arc::new(Metrics::new());
let registry = prometheus::Registry::new();
metrics.register(&registry).unwrap();
let model = "test-model";
{
let mut guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::ChatCompletions, false);
guard.mark_error(ErrorType::Validation);
} // guard drops here
// Verify counter incremented with status=error, error_type=validation
let counter_value = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::ChatCompletions.as_str(),
RequestType::Unary.as_str(),
Status::Error.as_str(),
ErrorType::Validation.as_str(),
])
.get();
assert_eq!(counter_value, 1);
}
#[test]
fn test_inflight_guard_defaults_to_internal_error_on_drop() {
let metrics = Arc::new(Metrics::new());
let registry = prometheus::Registry::new();
metrics.register(&registry).unwrap();
let model = "test-model";
{
let _guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::ChatCompletions, false);
// Don't call mark_ok() or mark_error() - simulate panic/unhandled error
} // guard drops with default error_type=Internal
// Verify counter incremented with status=error, error_type=internal
let counter_value = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::ChatCompletions.as_str(),
RequestType::Unary.as_str(),
Status::Error.as_str(),
ErrorType::Internal.as_str(),
])
.get();
assert_eq!(counter_value, 1);
}
#[test]
fn test_all_error_types_recorded_correctly() {
let metrics = Arc::new(Metrics::new());
let registry = prometheus::Registry::new();
metrics.register(&registry).unwrap();
let model = "test-model";
let endpoint = Endpoint::ChatCompletions;
// Test each error type
let error_types = vec![
ErrorType::Validation,
ErrorType::NotFound,
ErrorType::Overload,
ErrorType::Cancelled,
ErrorType::Internal,
ErrorType::NotImplemented,
];
for error_type in &error_types {
let mut guard = metrics
.clone()
.create_inflight_guard(model, endpoint, false);
guard.mark_error(error_type.clone());
drop(guard);
}
// Verify each error type recorded correctly
for error_type in &error_types {
let counter_value = metrics
.request_counter
.with_label_values(&[
model,
endpoint.as_str(),
RequestType::Unary.as_str(),
Status::Error.as_str(),
error_type.as_str(),
])
.get();
assert_eq!(
counter_value,
1,
"Should have 1 request for error_type={}",
error_type.as_str()
);
}
}
#[test]
fn test_multiple_requests_different_error_types() {
let metrics = Arc::new(Metrics::new());
let registry = prometheus::Registry::new();
metrics.register(&registry).unwrap();
let model = "test-model";
// Record 2 validation errors, 3 internal errors, 1 success
for _ in 0..2 {
let mut guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::ChatCompletions, false);
guard.mark_error(ErrorType::Validation);
drop(guard);
}
for _ in 0..3 {
let mut guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::Completions, false);
guard.mark_error(ErrorType::Internal);
drop(guard);
}
{
let mut guard =
metrics
.clone()
.create_inflight_guard(model, Endpoint::Embeddings, false);
guard.mark_ok();
drop(guard);
}
// Check validation errors (2 from ChatCompletions)
let validation_count = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::ChatCompletions.as_str(),
RequestType::Unary.as_str(),
Status::Error.as_str(),
ErrorType::Validation.as_str(),
])
.get();
assert_eq!(validation_count, 2);
// Check internal errors (3 from Completions)
let internal_count = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::Completions.as_str(),
RequestType::Unary.as_str(),
Status::Error.as_str(),
ErrorType::Internal.as_str(),
])
.get();
assert_eq!(internal_count, 3);
// Check success (1 from Embeddings)
let success_count = metrics
.request_counter
.with_label_values(&[
model,
Endpoint::Embeddings.as_str(),
RequestType::Unary.as_str(),
Status::Success.as_str(),
ErrorType::None.as_str(),
])
.get();
assert_eq!(success_count, 1);
}
}
This diff is collapsed.
......@@ -22,7 +22,7 @@ use dynamo_llm::{
service::{
Metrics,
error::HttpError,
metrics::{Endpoint, RequestType, Status},
metrics::{Endpoint, ErrorType, RequestType, Status},
service_v2::HttpService,
},
},
......@@ -197,16 +197,18 @@ fn compare_counter(
endpoint: &Endpoint,
request_type: &RequestType,
status: &Status,
error_type: &ErrorType,
expected: u64,
) {
assert_eq!(
metrics.get_request_counter(model, endpoint, request_type, status),
metrics.get_request_counter(model, endpoint, request_type, status, error_type),
expected,
"model: {}, endpoint: {:?}, request_type: {:?}, status: {:?}",
"model: {}, endpoint: {:?}, request_type: {:?}, status: {:?}, error_type: {:?}",
model,
endpoint.as_str(),
request_type.as_str(),
status.as_str()
status.as_str(),
error_type.as_str()
);
}
......@@ -240,12 +242,17 @@ fn compare_counters(metrics: &Metrics, model: &str, expected: &[u64; 8]) {
for request_type in &[RequestType::Unary, RequestType::Stream] {
for status in &[Status::Success, Status::Error] {
let index = compute_index(endpoint, request_type, status);
let error_type = match status {
Status::Success => &ErrorType::None,
Status::Error => &ErrorType::Validation, // Test engines return 4xx errors
};
compare_counter(
metrics,
model,
endpoint,
request_type,
status,
error_type,
expected[index],
);
}
......
......@@ -249,6 +249,30 @@ pub mod frontend_service {
/// Value for unary requests
pub const UNARY: &str = "unary";
}
/// Error type label values for fine-grained error classification
pub mod error_type {
/// No error (used for successful requests)
pub const NONE: &str = "";
/// Client validation error (4xx with "Validation:" prefix)
pub const VALIDATION: &str = "validation";
/// Model or resource not found (404)
pub const NOT_FOUND: &str = "not_found";
/// Service overloaded, too many requests (503)
pub const OVERLOAD: &str = "overload";
/// Request cancelled by client or timeout
pub const CANCELLED: &str = "cancelled";
/// Internal server error (500 and other unexpected errors)
pub const INTERNAL: &str = "internal";
/// Feature not implemented (501)
pub const NOT_IMPLEMENTED: &str = "not_implemented";
}
}
/// Work handler Prometheus metric names
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment