Unverified Commit 1284237e authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix(frontend): classify cancelled requests as 499 instead of 500 (#8090)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 934b49ef
......@@ -289,6 +289,16 @@ async fn anthropic_messages(
.metrics_clone()
.inc_rejection(&model, super::metrics::Endpoint::AnthropicMessages);
}
// Check for cancelled request (client disconnected before response was sent)
if super::metrics::request_was_cancelled(e.as_ref()) {
inflight_guard.mark_error(super::metrics::ErrorType::Cancelled);
return anthropic_error(
StatusCode::from_u16(499).unwrap(),
"request_cancelled",
&format!("Request cancelled: {}", e),
);
}
inflight_guard.mark_error(super::metrics::ErrorType::Internal);
anthropic_error(
StatusCode::INTERNAL_SERVER_ERROR,
"api_error",
......
......@@ -36,6 +36,13 @@ pub fn request_was_rejected(err: &(dyn std::error::Error + 'static)) -> bool {
dynamo_runtime::error::match_error_chain(err, REJECTION, NON_REJECTION)
}
/// Check whether an error chain indicates the request was cancelled.
pub fn request_was_cancelled(err: &(dyn std::error::Error + 'static)) -> bool {
const CANCELLATION: &[DynamoErrorType] = &[DynamoErrorType::Cancelled];
const NON_CANCELLATION: &[DynamoErrorType] = &[];
dynamo_runtime::error::match_error_chain(err, CANCELLATION, NON_CANCELLATION)
}
pub use prometheus::Registry;
use super::RouteDoc;
......
......@@ -94,6 +94,9 @@ pub(crate) struct ErrorMessage {
fn map_error_code_to_error_type(code: StatusCode) -> String {
match code.canonical_reason() {
Some(reason) => reason.to_string(),
// 499 is not IANA-registered (nginx convention for client-closed-request),
// so canonical_reason() returns None. Use the de facto standard name.
None if code.as_u16() == 499 => "Client Closed Request".to_string(),
None => "UnknownError".to_string(),
}
}
......@@ -114,6 +117,7 @@ fn classify_error_for_metrics(code: StatusCode, message: &str) -> ErrorType {
StatusCode::TOO_MANY_REQUESTS => ErrorType::Overload, // 429
StatusCode::SERVICE_UNAVAILABLE => ErrorType::Overload, // 503
StatusCode::INTERNAL_SERVER_ERROR => ErrorType::Internal, // 500
_ if code.as_u16() == 499 => ErrorType::Cancelled, // 499 Client Closed Request
_ if code.is_client_error() => ErrorType::Validation, // other 4xx
_ => ErrorType::Internal, // everything else
}
......@@ -220,6 +224,20 @@ impl ErrorMessage {
);
}
// Check for Cancelled anywhere in the error chain → HTTP 499 (Client Closed Request)
if super::metrics::request_was_cancelled(err.as_ref()) {
let code = StatusCode::from_u16(499).unwrap();
tracing::debug!("Request cancelled before response: {err}");
return (
code,
Json(ErrorMessage {
message: err.to_string(),
error_type: map_error_code_to_error_type(code),
code: code.as_u16(),
}),
);
}
// Then check for HttpError
match err.downcast::<HttpError>() {
Ok(http_error) => ErrorMessage::from_http_error(http_error),
......@@ -2524,6 +2542,38 @@ mod tests {
);
}
#[test]
fn test_cancelled_error_response_from_anyhow() {
use dynamo_runtime::error::{DynamoError, ErrorType};
let err: anyhow::Error = DynamoError::builder()
.error_type(ErrorType::Cancelled)
.message("Context id abc-123 is stopped or killed")
.build()
.into();
let response = ErrorMessage::from_anyhow(err, BACKUP_ERROR_MESSAGE);
assert_eq!(
response.0.as_u16(),
499,
"Cancelled errors should return HTTP 499"
);
assert_eq!(response.1.code, 499);
assert_eq!(response.1.error_type, "Client Closed Request");
assert!(response.1.message.contains("stopped or killed"));
}
#[test]
fn test_cancelled_error_metrics_classification() {
// HTTP 499 should be classified as Cancelled for metrics
let error_type =
classify_error_for_metrics(StatusCode::from_u16(499).unwrap(), "cancelled request");
assert_eq!(
error_type,
ErrorType::Cancelled,
"HTTP 499 should map to ErrorType::Cancelled in metrics"
);
}
#[test]
fn test_validate_unsupported_fields_accepts_clean_request() {
let request = make_base_request();
......
......@@ -204,10 +204,14 @@ impl RetryManager {
self.context.link_child(request.context());
if self.context.is_stopped() || self.context.is_killed() {
tracing::debug!("Abort creating new stream after context is stopped or killed");
return Err(Error::msg(format!(
return Err(DynamoError::builder()
.error_type(ErrorType::Cancelled)
.message(format!(
"Context id {} is stopped or killed",
self.context.id()
)));
))
.build()
.into());
}
response_stream = Some(self.next_generate.generate(request).await);
if let Some(err) = response_stream.as_ref().unwrap().as_ref().err()
......@@ -960,6 +964,15 @@ mod tests {
.to_string()
.contains(&format!("Context id {} is stopped or killed", context_id))
);
// Verify the error is a typed DynamoError with Cancelled type
let dynamo_err = error
.downcast_ref::<DynamoError>()
.expect("Error should be a DynamoError");
assert_eq!(
dynamo_err.error_type(),
ErrorType::Cancelled,
"Stopped/killed context should produce a Cancelled error"
);
}
assert_eq!(metrics.get_migration_new_request_count(TEST_MODEL), 0);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment