// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 //! Transport breakdown metrics for work handler (backend side). //! Captures network transit (T2-T1) and backend processing time (T3-T2). use once_cell::sync::{Lazy, OnceCell}; use prometheus::{Histogram, HistogramOpts}; use super::prometheus_names::{name_prefix, work_handler}; use crate::MetricsRegistry; fn work_handler_metric_name(suffix: &str) -> String { format!("{}_{}", name_prefix::WORK_HANDLER, suffix) } /// Network transit: frontend send to backend receive (wall-clock, cross-process). pub static WORK_HANDLER_NETWORK_TRANSIT_SECONDS: Lazy = Lazy::new(|| { Histogram::with_opts( HistogramOpts::new( work_handler_metric_name(work_handler::NETWORK_TRANSIT_SECONDS), "Frontend-to-backend network transit time (cross-process wall-clock, seconds)", ) .buckets(vec![ 0.0001, 0.0005, 0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, ]), ) .expect("work_handler_network_transit_seconds histogram") }); /// Backend processing: handle_payload entry to first response sent. pub static WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS: Lazy = Lazy::new(|| { Histogram::with_opts( HistogramOpts::new( work_handler_metric_name(work_handler::TIME_TO_FIRST_RESPONSE_SECONDS), "Backend processing time from handle_payload entry to prologue sent (seconds)", ) .buckets(vec![ 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0, 10.0, ]), ) .expect("work_handler_time_to_first_response_seconds histogram") }); /// Guards idempotency for the `MetricsRegistry` registration path. static METRICS_REGISTERED: OnceCell<()> = OnceCell::new(); /// Guards idempotency for the raw `prometheus::Registry` registration path. /// Kept separate from `METRICS_REGISTERED` so that calling `ensure_work_handler_perf_metrics_registered` /// first does not silently prevent the metrics from being registered in the prometheus registry. static PROMETHEUS_REGISTERED: OnceCell> = OnceCell::new(); /// Register work handler transport breakdown metrics with the given registry. Idempotent. pub fn ensure_work_handler_perf_metrics_registered(registry: &MetricsRegistry) { let _ = METRICS_REGISTERED.get_or_init(|| { registry.add_metric_or_warn( Box::new(WORK_HANDLER_NETWORK_TRANSIT_SECONDS.clone()), "work_handler_network_transit_seconds", ); registry.add_metric_or_warn( Box::new(WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS.clone()), "work_handler_time_to_first_response_seconds", ); }); } /// Register with a raw Prometheus registry. Idempotent. pub fn ensure_work_handler_perf_metrics_registered_prometheus( registry: &prometheus::Registry, ) -> Result<(), prometheus::Error> { PROMETHEUS_REGISTERED .get_or_init(|| { (|| -> Result<(), prometheus::Error> { registry.register(Box::new(WORK_HANDLER_NETWORK_TRANSIT_SECONDS.clone()))?; registry.register(Box::new( WORK_HANDLER_TIME_TO_FIRST_RESPONSE_SECONDS.clone(), ))?; Ok(()) })() .map_err(|e| e.to_string()) }) .as_ref() .map(|_| ()) .map_err(|e| prometheus::Error::Msg(e.clone())) }