Unverified Commit f0065bb4 authored by William Arnold's avatar William Arnold Committed by GitHub
Browse files

feat: Metric histogram buckets configureable from env (#3562)


Signed-off-by: default avatarWilliam Arnold <7565007+Aphoh@users.noreply.github.com>
parent d4ff6f0a
...@@ -26,6 +26,128 @@ pub use prometheus::Registry; ...@@ -26,6 +26,128 @@ pub use prometheus::Registry;
use super::RouteDoc; use super::RouteDoc;
/// Generate log-spaced histogram buckets with values rounded to 2 significant figures.
///
/// # Arguments
/// * `min` - Minimum value for the buckets (must be > 0 for log spacing)
/// * `max` - Maximum value for the buckets (must be > min)
/// * `count` - Number of buckets to generate
///
/// # Returns
/// A vector of log-spaced values, always starting with 0.0 and ending with the rounded max value.
/// Duplicates created by rounding are removed, so the final count may be less than requested.
///
/// # Note
/// With 2 significant figures, there are roughly 90 unique values per order of magnitude.
/// Requesting more buckets than can be uniquely represented will result in deduplication.
fn generate_log_buckets(min: f64, max: f64, count: usize) -> Vec<f64> {
if count == 0 {
return vec![];
}
if count == 1 {
return vec![0.0];
}
let requested_count = count;
let mut buckets = Vec::with_capacity(count);
buckets.push(0.0);
// Generate log-spaced values from min to max
for i in 1..count {
let log_min = min.ln();
let log_max = max.ln();
let log_value = log_min + (log_max - log_min) * (i as f64) / ((count - 1) as f64);
let value = log_value.exp();
buckets.push(round_to_sig_figs(value, 2));
}
// Remove consecutive duplicates (buckets are already sorted)
let original_len = buckets.len();
buckets.dedup();
// Warn if significant deduplication occurred
if buckets.len() < original_len && (original_len - buckets.len()) > original_len / 10 {
tracing::warn!(
requested = requested_count,
unique = buckets.len(),
duplicates = original_len - buckets.len(),
min = min,
max = max,
"Histogram bucket generation: Significant duplicate values after rounding to 2 sig figs. \
Consider reducing bucket count or increasing range."
);
}
buckets
}
/// Round a number to a specified number of significant figures
fn round_to_sig_figs(value: f64, sig_figs: u32) -> f64 {
if value == 0.0 {
return 0.0;
}
let magnitude = value.abs().log10().floor();
let scale = 10_f64.powf(sig_figs as f64 - 1.0 - magnitude);
(value * scale).round() / scale
}
const MAX_BUCKET_COUNT: usize = 512;
fn validate_bucket_config(min: f64, max: f64, count: usize) -> bool {
min.is_finite()
&& max.is_finite()
&& min > 0.0
&& min < max
&& count > 0
&& count <= MAX_BUCKET_COUNT
}
/// Parse histogram bucket configuration from environment variables
/// Returns (min, max, count) with defaults if not specified
fn parse_bucket_config(
env_prefix: &str,
default_min: f64,
default_max: f64,
default_count: usize,
) -> (f64, f64, usize) {
if !validate_bucket_config(default_min, default_max, default_count) {
tracing::error!(
default_min,
default_max,
default_count,
"Invalid default histogram configuration"
);
return (1.0, 10.0, 10);
}
let mut min = std::env::var(format!("{env_prefix}_MIN"))
.ok()
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(default_min);
let mut max = std::env::var(format!("{env_prefix}_MAX"))
.ok()
.and_then(|s| s.parse::<f64>().ok())
.unwrap_or(default_max);
let mut count = std::env::var(format!("{env_prefix}_COUNT"))
.ok()
.and_then(|s| s.parse::<usize>().ok())
.unwrap_or(default_count);
if !validate_bucket_config(min, max, count) {
tracing::warn!(
min=%min,
max=%max,
count=%count,
"Invalid histogram configuration given, using defaults"
);
min = default_min;
max = default_max;
count = default_count;
}
(min, max, count)
}
/// State for metrics handler with custom backend support /// State for metrics handler with custom backend support
struct MetricsHandlerState { struct MetricsHandlerState {
registry: Arc<Registry>, registry: Arc<Registry>,
...@@ -147,6 +269,17 @@ impl Metrics { ...@@ -147,6 +269,17 @@ impl Metrics {
/// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds /// - `{prefix}_time_to_first_token_seconds` - HistogramVec for time to first token in seconds
/// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds /// - `{prefix}_inter_token_latency_seconds` - HistogramVec for inter-token latency in seconds
/// ///
/// ## Histogram Bucket Configuration
///
/// All histograms use log-spaced buckets rounded to 2 significant figures. Bucket configuration
/// can be customized via environment variables (MIN: minimum value, MAX: maximum value, COUNT: number of buckets):
///
/// - `DYN_METRICS_REQUEST_DURATION_{MIN,MAX,COUNT}` - Request duration histogram (defaults: 1.0, 256.0, 10)
/// - `DYN_METRICS_INPUT_SEQUENCE_{MIN,MAX,COUNT}` - Input sequence length histogram (defaults: 50.0, 128000.0, 12)
/// - `DYN_METRICS_OUTPUT_SEQUENCE_{MIN,MAX,COUNT}` - Output sequence length histogram (defaults: 50.0, 32000.0, 10)
/// - `DYN_METRICS_TTFT_{MIN,MAX,COUNT}` - Time to first token histogram (defaults: 0.001, 480.0, 18)
/// - `DYN_METRICS_ITL_{MIN,MAX,COUNT}` - Inter-token latency histogram (defaults: 0.001, 2.0, 13)
///
/// ## Model Configuration Metrics /// ## Model Configuration Metrics
/// ///
/// Runtime config metrics (from ModelRuntimeConfig): /// Runtime config metrics (from ModelRuntimeConfig):
...@@ -213,64 +346,77 @@ impl Metrics { ...@@ -213,64 +346,77 @@ impl Metrics {
) )
.unwrap(); .unwrap();
let buckets = vec![0.0, 1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0, 128.0, 256.0]; // Request duration buckets: configurable via DYN_METRICS_REQUEST_DURATION_{MIN,MAX,COUNT}
let (req_dur_min, req_dur_max, req_dur_count) =
parse_bucket_config("DYN_METRICS_REQUEST_DURATION", 1.0, 256.0, 10);
let request_duration_buckets =
generate_log_buckets(req_dur_min, req_dur_max, req_dur_count);
let request_duration = HistogramVec::new( let request_duration = HistogramVec::new(
HistogramOpts::new( HistogramOpts::new(
frontend_metric_name(frontend_service::REQUEST_DURATION_SECONDS), frontend_metric_name(frontend_service::REQUEST_DURATION_SECONDS),
"Duration of LLM requests", "Duration of LLM requests",
) )
.buckets(buckets), .buckets(request_duration_buckets),
&["model"], &["model"],
) )
.unwrap(); .unwrap();
// Input sequence length buckets: configurable via DYN_METRICS_INPUT_SEQUENCE_{MIN,MAX,COUNT}
let (isl_min, isl_max, isl_count) =
parse_bucket_config("DYN_METRICS_INPUT_SEQUENCE", 50.0, 128000.0, 12);
let input_sequence_buckets = generate_log_buckets(isl_min, isl_max, isl_count);
let input_sequence_length = HistogramVec::new( let input_sequence_length = HistogramVec::new(
HistogramOpts::new( HistogramOpts::new(
frontend_metric_name(frontend_service::INPUT_SEQUENCE_TOKENS), frontend_metric_name(frontend_service::INPUT_SEQUENCE_TOKENS),
"Input sequence length in tokens", "Input sequence length in tokens",
) )
.buckets(vec![ .buckets(input_sequence_buckets),
0.0, 50.0, 100.0, 500.0, 1000.0, 2000.0, 4000.0, 8000.0, 16000.0, 32000.0, 64000.0,
128000.0,
]),
&["model"], &["model"],
) )
.unwrap(); .unwrap();
// Output sequence length buckets: configurable via DYN_METRICS_OUTPUT_SEQUENCE_{MIN,MAX,COUNT}
let (osl_min, osl_max, osl_count) =
parse_bucket_config("DYN_METRICS_OUTPUT_SEQUENCE", 50.0, 32000.0, 10);
let output_sequence_buckets = generate_log_buckets(osl_min, osl_max, osl_count);
let output_sequence_length = HistogramVec::new( let output_sequence_length = HistogramVec::new(
HistogramOpts::new( HistogramOpts::new(
frontend_metric_name(frontend_service::OUTPUT_SEQUENCE_TOKENS), frontend_metric_name(frontend_service::OUTPUT_SEQUENCE_TOKENS),
"Output sequence length in tokens", "Output sequence length in tokens",
) )
.buckets(vec![ .buckets(output_sequence_buckets),
0.0, 50.0, 100.0, 500.0, 1000.0, 2000.0, 4000.0, 8000.0, 16000.0, 32000.0,
]),
&["model"], &["model"],
) )
.unwrap(); .unwrap();
// Time to first token buckets: configurable via DYN_METRICS_TTFT_{MIN,MAX,COUNT}
let (ttft_min, ttft_max, ttft_count) =
parse_bucket_config("DYN_METRICS_TTFT", 0.001, 480.0, 18);
let time_to_first_token_buckets = generate_log_buckets(ttft_min, ttft_max, ttft_count);
let time_to_first_token = HistogramVec::new( let time_to_first_token = HistogramVec::new(
HistogramOpts::new( HistogramOpts::new(
frontend_metric_name(frontend_service::TIME_TO_FIRST_TOKEN_SECONDS), frontend_metric_name(frontend_service::TIME_TO_FIRST_TOKEN_SECONDS),
"Time to first token in seconds", "Time to first token in seconds",
) )
.buckets(vec![ .buckets(time_to_first_token_buckets),
0.0, 0.001, 0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0, 5.0, 10.0, 30.0,
60.0, 120.0, 240.0, 480.0,
]),
&["model"], &["model"],
) )
.unwrap(); .unwrap();
// Inter-token latency buckets: configurable via DYN_METRICS_ITL_{MIN,MAX,COUNT}
let (itl_min, itl_max, itl_count) = parse_bucket_config("DYN_METRICS_ITL", 0.001, 2.0, 13);
let inter_token_latency_buckets = generate_log_buckets(itl_min, itl_max, itl_count);
let inter_token_latency = HistogramVec::new( let inter_token_latency = HistogramVec::new(
HistogramOpts::new( HistogramOpts::new(
frontend_metric_name(frontend_service::INTER_TOKEN_LATENCY_SECONDS), frontend_metric_name(frontend_service::INTER_TOKEN_LATENCY_SECONDS),
"Inter-token latency in seconds", "Inter-token latency in seconds",
) )
.buckets(vec![ .buckets(inter_token_latency_buckets),
0.0, 0.001, 0.005, 0.01, 0.015, 0.02, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.0,
]),
&["model"], &["model"],
) )
.unwrap(); .unwrap();
...@@ -874,3 +1020,171 @@ async fn handler_metrics(State(state): State<Arc<MetricsHandlerState>>) -> impl ...@@ -874,3 +1020,171 @@ async fn handler_metrics(State(state): State<Arc<MetricsHandlerState>>) -> impl
(StatusCode::OK, metrics).into_response() (StatusCode::OK, metrics).into_response()
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_round_to_sig_figs() {
// Test rounding to 2 significant figures
assert_eq!(round_to_sig_figs(0.0026, 2), 0.0026);
assert_eq!(round_to_sig_figs(0.26, 2), 0.26);
assert_eq!(round_to_sig_figs(0.2356, 2), 0.24);
assert_eq!(round_to_sig_figs(1.234, 2), 1.2);
assert_eq!(round_to_sig_figs(12.34, 2), 12.0);
assert_eq!(round_to_sig_figs(123.4, 2), 120.0);
assert_eq!(round_to_sig_figs(1234.0, 2), 1200.0);
assert_eq!(round_to_sig_figs(0.0, 2), 0.0);
// Test edge cases
assert_eq!(round_to_sig_figs(0.999, 2), 1.0);
assert_eq!(round_to_sig_figs(9.99, 2), 10.0);
assert_eq!(round_to_sig_figs(99.9, 2), 100.0);
}
#[test]
fn test_generate_log_buckets_basic() {
// Test basic properties
let buckets = generate_log_buckets(1.0, 100.0, 5);
// Check length
assert_eq!(buckets.len(), 5);
// Check first value is 0
assert_eq!(buckets[0], 0.0);
// Check last value is approximately max (rounded to 2 sig figs)
assert_eq!(buckets[buckets.len() - 1], 100.0);
// Check values are increasing
for i in 1..buckets.len() {
assert!(
buckets[i] > buckets[i - 1],
"Bucket values should be increasing: {} <= {}",
buckets[i - 1],
buckets[i]
);
}
}
#[test]
fn test_generate_log_buckets_edge_cases() {
// Test empty buckets
let buckets = generate_log_buckets(1.0, 100.0, 0);
assert_eq!(buckets.len(), 0);
// Test single bucket
let buckets = generate_log_buckets(1.0, 100.0, 1);
assert_eq!(buckets.len(), 1);
assert_eq!(buckets[0], 0.0);
// Test two buckets
let buckets = generate_log_buckets(1.0, 100.0, 2);
assert_eq!(buckets.len(), 2);
assert_eq!(buckets[0], 0.0);
assert_eq!(buckets[1], 100.0);
}
#[test]
fn test_generate_log_buckets_always_includes_zero() {
// Test various configurations
for count in 1..=20 {
let buckets = generate_log_buckets(0.1, 1000.0, count);
assert_eq!(
buckets[0], 0.0,
"First bucket should always be 0.0 for count={}",
count
);
}
}
#[test]
fn test_all_buckets_are_two_sig_figs() {
let test_cases = vec![
(1.0, 256.0, 10),
(50.0, 128000.0, 12),
(50.0, 32000.0, 10),
(0.001, 480.0, 18),
(0.001, 2.0, 13),
];
for (min, max, count) in test_cases {
let buckets = generate_log_buckets(min, max, count);
for &value in buckets.iter().skip(1) {
let rounded = round_to_sig_figs(value, 2);
assert_eq!(
value, rounded,
"Value {} should be rounded to 2 sig figs (min={}, max={}, count={})",
value, min, max, count
);
}
}
}
#[test]
fn test_sig_fig_limitation_with_many_buckets() {
// This test demonstrates that 2 sig figs limits the number of unique bucket values
// With 1000 requested buckets but only 2 sig figs, we'll get automatic deduplication
let buckets = generate_log_buckets(0.0001, 1.0, 1000);
println!(
"Requested 1000 buckets, got {} total values (including 0.0)",
buckets.len()
);
// With 2 sig figs across 4 orders of magnitude (0.0001 to 1.0),
// we can have roughly 90 unique values per order of magnitude
// So we expect around 360 unique values maximum
assert!(
buckets.len() < 500,
"Expected fewer than 500 unique buckets due to 2 sig fig limitation, got {}",
buckets.len()
);
// Verify all values are unique (no duplicates remain after deduplication)
let mut sorted_buckets = buckets.clone();
sorted_buckets.sort_by(|a, b| a.partial_cmp(b).unwrap());
sorted_buckets.dedup();
assert_eq!(
buckets.len(),
sorted_buckets.len(),
"All buckets should be unique after deduplication"
);
// Verify first is still 0.0
assert_eq!(buckets[0], 0.0);
// Verify values are still in increasing order
for i in 1..buckets.len() {
assert!(
buckets[i] > buckets[i - 1],
"Buckets should be in increasing order"
);
}
}
#[test]
fn test_deduplication_preserves_order() {
// Test that deduplication maintains increasing order
let buckets = generate_log_buckets(0.01, 1.0, 50);
// Verify all values are unique
let mut unique_check = std::collections::HashSet::new();
for &bucket in &buckets {
assert!(
unique_check.insert(bucket.to_bits()),
"Duplicate value {} found after deduplication",
bucket
);
}
// Verify order is maintained
for i in 1..buckets.len() {
assert!(
buckets[i] > buckets[i - 1],
"Bucket values should be in increasing order after deduplication"
);
}
}
}
...@@ -384,26 +384,22 @@ async fn test_http_service() { ...@@ -384,26 +384,22 @@ async fn test_http_service() {
let buckets = histogram.get_bucket(); let buckets = histogram.get_bucket();
let mut found = false; let mut found = false;
let mut expected_count = 0;
for bucket in buckets { for bucket_idx in 1..buckets.len() {
let upper_bound = bucket.get_upper_bound(); if buckets[bucket_idx].get_upper_bound() >= 2.5
let cumulative_count = bucket.get_cumulative_count(); && buckets[bucket_idx - 1].get_upper_bound() < 2.5
{
println!( found = true;
"Bucket upper bound: {}, count: {}",
upper_bound, cumulative_count
);
// Since our observation is 2.5, it should fall into the bucket with upper bound 4.0
if upper_bound >= 4.0 {
assert_eq!( assert_eq!(
cumulative_count, 1, buckets[bucket_idx].get_cumulative_count(),
"Observation should be counted in the 4.0 bucket" 1,
"Observation should be counted in the bucket containing 2.5"
); );
found = true; expected_count = 1;
} else { } else {
assert_eq!( assert_eq!(
cumulative_count, 0, buckets[bucket_idx].get_cumulative_count(),
expected_count,
"No observations should be in this bucket" "No observations should be in this bucket"
); );
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment