Unverified Commit bba26d55 authored by Thomas Montfort's avatar Thomas Montfort Committed by GitHub
Browse files

fix(metrics): widen histogram buckets for request duration and router overhead (#8087)


Signed-off-by: default avatartmontfort <tmontfort@nvidia.com>
Signed-off-by: default avatarThomas Montfort <tmontfort@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 243d7f07
...@@ -521,7 +521,7 @@ impl Metrics { ...@@ -521,7 +521,7 @@ impl Metrics {
// Request duration buckets: configurable via DYN_METRICS_REQUEST_DURATION_{MIN,MAX,COUNT} // Request duration buckets: configurable via DYN_METRICS_REQUEST_DURATION_{MIN,MAX,COUNT}
let (req_dur_min, req_dur_max, req_dur_count) = let (req_dur_min, req_dur_max, req_dur_count) =
parse_bucket_config("DYN_METRICS_REQUEST_DURATION", 1.0, 256.0, 10); parse_bucket_config("DYN_METRICS_REQUEST_DURATION", 1.0, 512.0, 10);
let request_duration_buckets = let request_duration_buckets =
generate_log_buckets(req_dur_min, req_dur_max, req_dur_count); generate_log_buckets(req_dur_min, req_dur_max, req_dur_count);
......
...@@ -56,10 +56,14 @@ use prometheus::{HistogramOpts, IntGaugeVec, Opts}; ...@@ -56,10 +56,14 @@ use prometheus::{HistogramOpts, IntGaugeVec, Opts};
use crate::http::service::metrics::generate_log_buckets; use crate::http::service::metrics::generate_log_buckets;
/// Exponential buckets for routing overhead histograms: /// Buckets for CPU-bound compute phases (block hashing, sequence hashing).
/// from 0.0001 ms (0.1 µs) to ~13.1 ms, factor 2, 18 steps. fn compute_overhead_buckets() -> Vec<f64> {
fn overhead_buckets() -> Vec<f64> { prometheus::exponential_buckets(0.001, 2.0, 15).unwrap()
prometheus::exponential_buckets(0.0001, 2.0, 18).expect("exponential buckets should not fail") }
/// Buckets for async phases (indexer find_matches, scheduling, total).
fn async_overhead_buckets() -> Vec<f64> {
prometheus::exponential_buckets(0.01, 3.0, 17).unwrap()
} }
// --------------------------------------------------------------------------- // ---------------------------------------------------------------------------
...@@ -219,39 +223,45 @@ impl RoutingOverheadMetrics { ...@@ -219,39 +223,45 @@ impl RoutingOverheadMetrics {
instance_id: u64, instance_id: u64,
) -> Result<(), prometheus::Error> { ) -> Result<(), prometheus::Error> {
let m = ROUTING_OVERHEAD_METRICS.get_or_init(|| { let m = ROUTING_OVERHEAD_METRICS.get_or_init(|| {
let buckets = overhead_buckets(); let compute_buckets = compute_overhead_buckets();
let async_buckets = async_overhead_buckets();
let router_id = instance_id.to_string(); let router_id = instance_id.to_string();
let make = |suffix: &str, help: &str| { let make = |suffix: &str, help: &str, buckets: Vec<f64>| {
let name = format!("{}_{}", name_prefix::ROUTER, suffix); let name = format!("{}_{}", name_prefix::ROUTER, suffix);
prometheus::Histogram::with_opts( prometheus::Histogram::with_opts(
HistogramOpts::new(name, help) HistogramOpts::new(name, help)
.const_label(labels::ROUTER_ID, &router_id) .const_label(labels::ROUTER_ID, &router_id)
.buckets(buckets.clone()), .buckets(buckets),
) )
}; };
let block_hashing = make( let block_hashing = make(
routing_overhead::BLOCK_HASHING_MS, routing_overhead::BLOCK_HASHING_MS,
"Time spent computing block hashes in milliseconds", "Time spent computing block hashes in milliseconds",
compute_buckets.clone(),
) )
.expect("overhead_block_hashing_ms"); .expect("overhead_block_hashing_ms");
let indexer_find_matches = make( let indexer_find_matches = make(
routing_overhead::INDEXER_FIND_MATCHES_MS, routing_overhead::INDEXER_FIND_MATCHES_MS,
"Time spent in indexer find_matches in milliseconds", "Time spent in indexer find_matches in milliseconds",
async_buckets.clone(),
) )
.expect("overhead_indexer_find_matches_ms"); .expect("overhead_indexer_find_matches_ms");
let seq_hashing = make( let seq_hashing = make(
routing_overhead::SEQ_HASHING_MS, routing_overhead::SEQ_HASHING_MS,
"Time spent computing sequence hashes in milliseconds", "Time spent computing sequence hashes in milliseconds",
compute_buckets,
) )
.expect("overhead_seq_hashing_ms"); .expect("overhead_seq_hashing_ms");
let scheduling = make( let scheduling = make(
routing_overhead::SCHEDULING_MS, routing_overhead::SCHEDULING_MS,
"Time spent in scheduler worker selection in milliseconds", "Time spent in scheduler worker selection in milliseconds",
async_buckets.clone(),
) )
.expect("overhead_scheduling_ms"); .expect("overhead_scheduling_ms");
let total = make( let total = make(
routing_overhead::TOTAL_MS, routing_overhead::TOTAL_MS,
"Total routing overhead per request in milliseconds", "Total routing overhead per request in milliseconds",
async_buckets,
) )
.expect("overhead_total_ms"); .expect("overhead_total_ms");
Arc::new(Self { Arc::new(Self {
...@@ -600,7 +610,7 @@ dynamo_frontend_router_queue_pending_requests{worker_type=\"decode\"} 5 ...@@ -600,7 +610,7 @@ dynamo_frontend_router_queue_pending_requests{worker_type=\"decode\"} 5
// Verify the overhead constants produce valid histogram names when // Verify the overhead constants produce valid histogram names when
// combined with dynamo_router_ prefix. // combined with dynamo_router_ prefix.
let registry = prometheus::Registry::new(); let registry = prometheus::Registry::new();
let buckets = overhead_buckets(); let buckets = async_overhead_buckets();
let prefix = name_prefix::ROUTER; let prefix = name_prefix::ROUTER;
let name = format!("{}_{}", prefix, routing_overhead::TOTAL_MS); let name = format!("{}_{}", prefix, routing_overhead::TOTAL_MS);
let total = prometheus::Histogram::with_opts( let total = prometheus::Histogram::with_opts(
......
...@@ -36,7 +36,7 @@ pub static WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS: Lazy<Histogram> = Lazy:: ...@@ -36,7 +36,7 @@ pub static WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS: Lazy<Histogram> = Lazy::
"Backend processing time from handle_payload entry to prologue sent (seconds)", "Backend processing time from handle_payload entry to prologue sent (seconds)",
) )
.buckets(vec![ .buckets(vec![
0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 30.0, 60.0,
]), ]),
) )
.expect("work_handler_time_to_first_response_seconds histogram") .expect("work_handler_time_to_first_response_seconds histogram")
......
...@@ -61,11 +61,18 @@ impl WorkHandlerMetrics { ...@@ -61,11 +61,18 @@ impl WorkHandlerMetrics {
metrics_labels, metrics_labels,
)?; )?;
// Custom buckets for inference workloads: retain sub-second resolution for
// fast operations, extend well beyond the default 10s ceiling to capture
// long-running generation requests that can last minutes.
let request_duration_buckets = vec![
0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, 20.0, 30.0, 60.0, 120.0,
300.0, 600.0,
];
let request_duration = metrics.create_histogram( let request_duration = metrics.create_histogram(
work_handler::REQUEST_DURATION_SECONDS, work_handler::REQUEST_DURATION_SECONDS,
"Time spent processing requests by work handler", "Time spent processing requests by work handler",
metrics_labels, metrics_labels,
None, Some(request_duration_buckets),
)?; )?;
let inflight_requests = metrics.create_intgauge( let inflight_requests = metrics.create_intgauge(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment