prometheus_names.rs 26.6 KB
Newer Older
1
2
3
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

4
//! Prometheus metric name constants and sanitization utilities
5
//!
6
7
8
//! This module provides centralized Prometheus metric name constants and sanitization functions
//! for various components to ensure consistency and avoid duplication across the codebase.
//!
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//! ## Naming Conventions
//!
//! All metric names should follow: `{prefix}_{name}_{suffix}`
//!
//! **Prefix**: Component identifier (`dynamo_component_`, `dynamo_frontend_`, etc.)
//! **Name**: Descriptive snake_case name indicating what is measured
//! **Suffix**:
//!   - Units: `_seconds`, `_bytes`, `_ms`, `_percent`
//!   - Counters: `_total` (not `total_` prefix)
//!   - Note: Do not use `_counter`, `_gauge`, `_time`, or `_size` in Prometheus names (too vague)
//!
//! **Common Transformations**:
//! - ❌ `_counter` → ✅ `_total`
//! - ❌ `_time` → ✅ `_seconds`, `_ms`, `_hours`, `_duration_seconds`
//! - ❌ `_size` → ✅ `_bytes`, `_total`, `_length`
//! - ❌ `_gauge` → ✅ (no suffix needed for current values)
//! - ❌ `_rate` → ✅ `_per_second`, `_per_minute`
//!
//! **Examples**:
//! - ✅ `dynamo_frontend_requests_total` - Total request counter (not `incoming_requests`)
//! - ✅ `dynamo_frontend_request_duration_seconds` - Request duration histogram (not `response_time`)
//! - ✅ `dynamo_component_errors_total` - Total error counter (not `total_errors`)
//! - ✅ `dynamo_component_memory_usage_bytes` - Memory usage gauge
//! - ✅ `dynamo_frontend_inflight_requests_total` - Current inflight requests gauge
//! - ✅ `nats_client_connection_duration_ms` - Connection time in milliseconds
//! - ✅ `dynamo_component_cpu_usage_percent` - CPU usage percentage
//! - ✅ `dynamo_frontend_tokens_per_second` - Token generation rate
//!
37
38
39
40
//! ## Key Differences: Prometheus Metric Names vs Prometheus Label Names
//!
//! **Metric names**: Allow colons and `__` anywhere. **Label names**: No colons, no `__` prefix.
//! Label names starting with `__` are reserved for Prometheus internal use.
41

42
43
use once_cell::sync::Lazy;
use regex::Regex;
44
45
46
47

/// Metric name prefixes used across the metrics system
pub mod name_prefix {
    /// Prefix for all Prometheus metric names.
48
    pub const COMPONENT: &str = "dynamo_component";
49

50
51
    /// Prefix for frontend service metrics
    pub const FRONTEND: &str = "dynamo_frontend";
52
53
54
55
56
57
58
59
60
61
62
63
64
65
}

/// Automatically inserted Prometheus label names used across the metrics system
pub mod labels {
    /// Label for component identification
    pub const COMPONENT: &str = "dynamo_component";

    /// Label for namespace identification
    pub const NAMESPACE: &str = "dynamo_namespace";

    /// Label for endpoint identification
    pub const ENDPOINT: &str = "dynamo_endpoint";
}

66
67
68
69
70
71
72
73
74
75
/// Frontend service metrics (LLM HTTP service)
pub mod frontend_service {
    // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs
    // for centralized environment variable constant management across the codebase
    /// Environment variable that overrides the default metric prefix
    pub const METRICS_PREFIX_ENV: &str = "DYN_METRICS_PREFIX";

    /// Total number of LLM requests processed
    pub const REQUESTS_TOTAL: &str = "requests_total";

76
77
78
79
80
    /// Number of requests waiting in HTTP queue before receiving the first response.
    pub const QUEUED_REQUESTS_TOTAL: &str = "queued_requests_total";

    /// Number of inflight requests going to the engine (vLLM, SGLang, ...)
    pub const INFLIGHT_REQUESTS_TOTAL: &str = "inflight_requests_total";
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

    /// Duration of LLM requests
    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

    /// Input sequence length in tokens
    pub const INPUT_SEQUENCE_TOKENS: &str = "input_sequence_tokens";

    /// Output sequence length in tokens
    pub const OUTPUT_SEQUENCE_TOKENS: &str = "output_sequence_tokens";

    /// Time to first token in seconds
    pub const TIME_TO_FIRST_TOKEN_SECONDS: &str = "time_to_first_token_seconds";

    /// Inter-token latency in seconds
    pub const INTER_TOKEN_LATENCY_SECONDS: &str = "inter_token_latency_seconds";

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    /// Model configuration metrics
    ///
    /// Runtime config metrics (from ModelRuntimeConfig):
    /// Total KV blocks available for a worker serving the model
    pub const MODEL_TOTAL_KV_BLOCKS: &str = "model_total_kv_blocks";

    /// Maximum number of sequences for a worker serving the model (runtime config)
    pub const MODEL_MAX_NUM_SEQS: &str = "model_max_num_seqs";

    /// Maximum number of batched tokens for a worker serving the model (runtime config)
    pub const MODEL_MAX_NUM_BATCHED_TOKENS: &str = "model_max_num_batched_tokens";

    /// MDC metrics (from ModelDeploymentCard):
    /// Maximum context length for a worker serving the model (MDC)
    pub const MODEL_CONTEXT_LENGTH: &str = "model_context_length";

    /// KV cache block size for a worker serving the model (MDC)
    pub const MODEL_KV_CACHE_BLOCK_SIZE: &str = "model_kv_cache_block_size";

    /// Request migration limit for a worker serving the model (MDC)
    pub const MODEL_MIGRATION_LIMIT: &str = "model_migration_limit";

119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
    /// Status label values
    pub mod status {
        /// Value for successful requests
        pub const SUCCESS: &str = "success";

        /// Value for failed requests
        pub const ERROR: &str = "error";
    }

    /// Request type label values
    pub mod request_type {
        /// Value for streaming requests
        pub const STREAM: &str = "stream";

        /// Value for unary requests
        pub const UNARY: &str = "unary";
    }
}

/// Work handler Prometheus metric names
pub mod work_handler {
    /// Total number of requests processed by work handler
    pub const REQUESTS_TOTAL: &str = "requests_total";

    /// Total number of bytes received in requests by work handler
    pub const REQUEST_BYTES_TOTAL: &str = "request_bytes_total";

    /// Total number of bytes sent in responses by work handler
    pub const RESPONSE_BYTES_TOTAL: &str = "response_bytes_total";

    /// Number of requests currently being processed by work handler
    pub const INFLIGHT_REQUESTS: &str = "inflight_requests";

    /// Time spent processing requests by work handler (histogram)
    pub const REQUEST_DURATION_SECONDS: &str = "request_duration_seconds";

    /// Total number of errors in work handler processing
    pub const ERRORS_TOTAL: &str = "errors_total";

    /// Label name for error type classification
    pub const ERROR_TYPE_LABEL: &str = "error_type";

    /// Error type values for work handler metrics
    pub mod error_types {
        /// Deserialization error
        pub const DESERIALIZATION: &str = "deserialization";

        /// Invalid message format error
        pub const INVALID_MESSAGE: &str = "invalid_message";

        /// Response stream creation error
        pub const RESPONSE_STREAM: &str = "response_stream";

        /// Generation error
        pub const GENERATE: &str = "generate";

        /// Response publishing error
        pub const PUBLISH_RESPONSE: &str = "publish_response";

        /// Final message publishing error
        pub const PUBLISH_FINAL: &str = "publish_final";
    }
}

183
184
185
186
187
188
189
190
191
/// NATS client metrics. DistributedRuntime contains a NATS client shared by all children)
pub mod nats_client {
    /// Macro to generate NATS client metric names with the prefix
    macro_rules! nats_client_name {
        ($name:expr) => {
            concat!("nats_client_", $name)
        };
    }

192
    /// Prefix for all NATS client metrics
193
    pub const PREFIX: &str = nats_client_name!("");
194
195

    /// Total number of bytes received by NATS client
196
    pub const IN_TOTAL_BYTES: &str = nats_client_name!("in_total_bytes");
197
198

    /// Total number of bytes sent by NATS client
199
    pub const OUT_OVERHEAD_BYTES: &str = nats_client_name!("out_overhead_bytes");
200
201

    /// Total number of messages received by NATS client
202
    pub const IN_MESSAGES: &str = nats_client_name!("in_messages");
203
204

    /// Total number of messages sent by NATS client
205
    pub const OUT_MESSAGES: &str = nats_client_name!("out_messages");
206
207

    /// Total number of connections established by NATS client
208
    pub const CONNECTS: &str = nats_client_name!("connects");
209
210

    /// Current connection state of NATS client (0=disconnected, 1=connected, 2=reconnecting)
211
212
213
214
215
216
217
218
219
220
221
222
223
224
    pub const CONNECTION_STATE: &str = nats_client_name!("connection_state");
}

/// NATS service metrics, from the $SRV.STATS.<service_name> requests on NATS server
pub mod nats_service {
    /// Macro to generate NATS service metric names with the prefix
    macro_rules! nats_service_name {
        ($name:expr) => {
            concat!("nats_service_", $name)
        };
    }

    /// Prefix for all NATS service metrics
    pub const PREFIX: &str = nats_service_name!("");
225
226

    /// Average processing time in milliseconds (maps to: average_processing_time in ms)
227
    pub const AVG_PROCESSING_MS: &str = nats_service_name!("avg_processing_time_ms");
228
229

    /// Total errors across all endpoints (maps to: num_errors)
230
    pub const TOTAL_ERRORS: &str = nats_service_name!("total_errors");
231
232

    /// Total requests across all endpoints (maps to: num_requests)
233
    pub const TOTAL_REQUESTS: &str = nats_service_name!("total_requests");
234
235

    /// Total processing time in milliseconds (maps to: processing_time in ms)
236
    pub const TOTAL_PROCESSING_MS: &str = nats_service_name!("total_processing_time_ms");
237
238

    /// Number of active services (derived from ServiceSet.services)
239
    pub const ACTIVE_SERVICES: &str = nats_service_name!("active_services");
240
241

    /// Number of active endpoints (derived from ServiceInfo.endpoints)
242
    pub const ACTIVE_ENDPOINTS: &str = nats_service_name!("active_endpoints");
243
244
245
246
}

/// All NATS client Prometheus metric names as an array for iteration/validation
pub const DRT_NATS_METRICS: &[&str] = &[
247
248
249
250
251
252
    nats_client::CONNECTION_STATE,
    nats_client::CONNECTS,
    nats_client::IN_TOTAL_BYTES,
    nats_client::IN_MESSAGES,
    nats_client::OUT_OVERHEAD_BYTES,
    nats_client::OUT_MESSAGES,
253
254
255
256
257
];

/// All component service Prometheus metric names as an array for iteration/validation
/// (ordered to match NatsStatsMetrics fields)
pub const COMPONENT_NATS_METRICS: &[&str] = &[
258
259
260
261
262
263
    nats_service::AVG_PROCESSING_MS, // maps to: average_processing_time (nanoseconds)
    nats_service::TOTAL_ERRORS,      // maps to: num_errors
    nats_service::TOTAL_REQUESTS,    // maps to: num_requests
    nats_service::TOTAL_PROCESSING_MS, // maps to: processing_time (nanoseconds)
    nats_service::ACTIVE_SERVICES,   // derived from ServiceSet.services
    nats_service::ACTIVE_ENDPOINTS,  // derived from ServiceInfo.endpoints
264
265
];

266
267
268
269
/// Task tracker Prometheus metric name suffixes
pub mod task_tracker {
    /// Total number of tasks issued/submitted
    pub const TASKS_ISSUED_TOTAL: &str = "tasks_issued_total";
270

271
272
    /// Total number of tasks started
    pub const TASKS_STARTED_TOTAL: &str = "tasks_started_total";
273

274
275
    /// Total number of successfully completed tasks
    pub const TASKS_SUCCESS_TOTAL: &str = "tasks_success_total";
276

277
278
    /// Total number of cancelled tasks
    pub const TASKS_CANCELLED_TOTAL: &str = "tasks_cancelled_total";
279

280
281
282
283
284
285
286
287
288
289
290
    /// Total number of failed tasks
    pub const TASKS_FAILED_TOTAL: &str = "tasks_failed_total";

    /// Total number of rejected tasks
    pub const TASKS_REJECTED_TOTAL: &str = "tasks_rejected_total";
}

/// DistributedRuntime core metrics
pub mod distributed_runtime {
    /// Total uptime of the DistributedRuntime in seconds
    pub const UPTIME_SECONDS: &str = "uptime_seconds";
291
}
292
293
294
295
296
297
298
299
300

/// KVBM connector
pub mod kvbm_connector {
    /// KVBM connector leader
    pub const KVBM_CONNECTOR_LEADER: &str = "kvbm_connector_leader";

    /// KVBM connector worker
    pub const KVBM_CONNECTOR_WORKER: &str = "kvbm_connector_worker";
}
301

302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/// KvStats metrics from LLM workers
pub mod kvstats {
    /// Macro to generate KvStats metric names with the prefix
    macro_rules! kvstats_name {
        ($name:expr) => {
            concat!("kvstats_", $name)
        };
    }

    /// Prefix for all KvStats metrics
    pub const PREFIX: &str = kvstats_name!("");

    /// Number of active KV cache blocks currently in use
    pub const ACTIVE_BLOCKS: &str = kvstats_name!("active_blocks");

    /// Total number of KV cache blocks available
    pub const TOTAL_BLOCKS: &str = kvstats_name!("total_blocks");

    /// GPU cache usage as a percentage (0.0-1.0)
    pub const GPU_CACHE_USAGE_PERCENT: &str = kvstats_name!("gpu_cache_usage_percent");

    /// GPU prefix cache hit rate as a percentage (0.0-1.0)
    pub const GPU_PREFIX_CACHE_HIT_RATE: &str = kvstats_name!("gpu_prefix_cache_hit_rate");
}

/// All KvStats Prometheus metric names as an array for iteration/validation
pub const KVSTATS_METRICS: &[&str] = &[
    kvstats::ACTIVE_BLOCKS,
    kvstats::TOTAL_BLOCKS,
    kvstats::GPU_CACHE_USAGE_PERCENT,
    kvstats::GPU_PREFIX_CACHE_HIT_RATE,
];

335
336
337
338
339
340
// KvRouter (including KvInexer) Prometheus metric names
pub mod kvrouter {
    /// Number of KV cache events applied to the index (including status)
    pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
}

341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
// Shared regex patterns for Prometheus sanitization
static METRIC_INVALID_CHARS_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_:]").unwrap());
static LABEL_INVALID_CHARS_PATTERN: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"[^a-zA-Z0-9_]").unwrap());
static INVALID_FIRST_CHAR_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[^a-zA-Z_]").unwrap());

/// Sanitizes a Prometheus metric name by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses regex for clear validation.
/// Returns an error if the input cannot be sanitized into a valid name.
///
/// **Rules**: Pattern `[a-zA-Z_:][a-zA-Z0-9_:]*`. Allows colons and `__` anywhere.
pub fn sanitize_prometheus_name(raw: &str) -> anyhow::Result<String> {
    if raw.is_empty() {
        return Err(anyhow::anyhow!(
            "Cannot sanitize empty string into valid Prometheus name"
        ));
    }

    // Replace all invalid characters with underscores
    let mut sanitized = METRIC_INVALID_CHARS_PATTERN
        .replace_all(raw, "_")
        .to_string();

    // Ensure first character is valid (letter, underscore, or colon)
    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
        sanitized = format!("_{}", sanitized);
    }

    // Check if the result is all underscores (invalid input)
    if sanitized.chars().all(|c| c == '_') {
        return Err(anyhow::anyhow!(
            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus name",
            raw
        ));
    }

    Ok(sanitized)
}

/// Sanitizes a Prometheus label name by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses regex for clear validation.
/// Label names have stricter rules than metric names (no colons allowed).
/// Returns an error if the input cannot be sanitized into a valid label name.
///
/// **Rules**: Pattern `[a-zA-Z_][a-zA-Z0-9_]*`. No colons, no `__` prefix (reserved).
pub fn sanitize_prometheus_label(raw: &str) -> anyhow::Result<String> {
    if raw.is_empty() {
        return Err(anyhow::anyhow!(
            "Cannot sanitize empty string into valid Prometheus label"
        ));
    }

    // Replace all invalid characters with underscores (no colons allowed in labels)
    let mut sanitized = LABEL_INVALID_CHARS_PATTERN
        .replace_all(raw, "_")
        .to_string();

    // Ensure first character is valid (letter or underscore only)
    if INVALID_FIRST_CHAR_PATTERN.is_match(&sanitized) {
        sanitized = format!("_{}", sanitized);
    }

    // Prevent __ prefix (reserved for Prometheus internal use) but allow __ elsewhere
    if sanitized.starts_with("__") {
        sanitized = sanitized
            .strip_prefix("__")
            .unwrap_or(&sanitized)
            .to_string();
        if sanitized.is_empty() || !sanitized.chars().next().unwrap().is_ascii_alphabetic() {
            sanitized = format!("_{}", sanitized);
        }
    }

    // Check if the result is all underscores (invalid input)
    if sanitized.chars().all(|c| c == '_') {
        return Err(anyhow::anyhow!(
            "Input '{}' contains only invalid characters and cannot be sanitized into a valid Prometheus label",
            raw
        ));
    }

    Ok(sanitized)
}

/// Sanitizes a Prometheus frontend metric prefix by converting invalid characters to underscores
/// and ensuring the first character is valid. Uses the general prometheus name sanitization
/// but with frontend-specific fallback behavior.
pub fn sanitize_frontend_prometheus_prefix(raw: &str) -> String {
    if raw.is_empty() {
        return name_prefix::FRONTEND.to_string();
    }

    // Reuse the general prometheus name sanitization logic, fallback to frontend prefix on error
    sanitize_prometheus_name(raw).unwrap_or_else(|_| name_prefix::FRONTEND.to_string())
}

/// Builds a full component metric name by prepending the component prefix
/// Sanitizes the metric name to ensure it's valid for Prometheus
pub fn build_component_metric_name(metric_name: &str) -> String {
    let sanitized_name =
        sanitize_prometheus_name(metric_name).expect("metric name should be valid or sanitizable");
    format!("{}_{}", name_prefix::COMPONENT, sanitized_name)
}

446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
/// Safely converts a u64 value to i64 for Prometheus metrics
///
/// Since Prometheus IntGaugeVec uses i64 but our data types use u64,
/// this function clamps large u64 values to i64::MAX to prevent overflow
/// and ensure metrics remain positive.
///
/// # Arguments
/// * `value` - The u64 value to convert
///
/// # Returns
/// An i64 value, clamped to i64::MAX if the input exceeds i64::MAX
///
/// # Examples
/// ```
/// use dynamo_runtime::metrics::prometheus_names::clamp_u64_to_i64;
///
/// assert_eq!(clamp_u64_to_i64(100), 100);
/// assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
/// ```
pub fn clamp_u64_to_i64(value: u64) -> i64 {
    if value > i64::MAX as u64 {
        i64::MAX
    } else {
        value as i64
    }
}

473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_sanitize_frontend_prometheus_prefix() {
        // Test that valid prefixes remain unchanged
        assert_eq!(
            sanitize_frontend_prometheus_prefix("dynamo_frontend"),
            "dynamo_frontend"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("custom_prefix"),
            "custom_prefix"
        );
        assert_eq!(sanitize_frontend_prometheus_prefix("test123"), "test123");

        // Test that invalid characters are converted to underscores
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test.prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test@prefix"),
            "test_prefix"
        );
        assert_eq!(
            sanitize_frontend_prometheus_prefix("test-prefix"),
            "test_prefix"
        );

        // Test that invalid first characters are fixed
        assert_eq!(sanitize_frontend_prometheus_prefix("123test"), "_123test");
        assert_eq!(sanitize_frontend_prometheus_prefix("@test"), "_test");

        // Test empty string fallback
        assert_eq!(
            sanitize_frontend_prometheus_prefix(""),
            name_prefix::FRONTEND
        );
    }

    #[test]
    fn test_sanitize_prometheus_name() {
        // Test that valid names remain unchanged
        assert_eq!(
            sanitize_prometheus_name("valid_name").unwrap(),
            "valid_name"
        );
        assert_eq!(sanitize_prometheus_name("test123").unwrap(), "test123");
        assert_eq!(
            sanitize_prometheus_name("test_name_123").unwrap(),
            "test_name_123"
        );
        assert_eq!(sanitize_prometheus_name("test:name").unwrap(), "test:name"); // colons allowed

        // Test that invalid characters are converted to underscores
        assert_eq!(sanitize_prometheus_name("test name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test.name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test@name").unwrap(), "test_name");
        assert_eq!(sanitize_prometheus_name("test-name").unwrap(), "test_name");
        assert_eq!(
            sanitize_prometheus_name("test$name#123").unwrap(),
            "test_name_123"
        );

        // Test that double underscores are ALLOWED in metric names (unlike labels)
        assert_eq!(
            sanitize_prometheus_name("test__name").unwrap(),
            "test__name"
        );
        assert_eq!(
            sanitize_prometheus_name("test___name").unwrap(),
            "test___name"
        );
        assert_eq!(sanitize_prometheus_name("__test").unwrap(), "__test"); // Leading double underscore OK

        // Test that invalid first characters are fixed
        assert_eq!(sanitize_prometheus_name("123test").unwrap(), "_123test");
        assert_eq!(sanitize_prometheus_name("@test").unwrap(), "_test"); // @ becomes _, no double underscore
        assert_eq!(sanitize_prometheus_name("-test").unwrap(), "_test"); // - becomes _, no double underscore
        assert_eq!(sanitize_prometheus_name(".test").unwrap(), "_test"); // . becomes _, no double underscore

        // Test empty string returns error
        assert!(sanitize_prometheus_name("").is_err());

        // Test complex cases
        assert_eq!(
            sanitize_prometheus_name("123.test-name@domain").unwrap(),
            "_123_test_name_domain"
        );

        // Test that strings with only invalid characters return error
        assert!(sanitize_prometheus_name("@#$%").is_err());
        assert!(sanitize_prometheus_name("!!!!").is_err());
    }

    #[test]
    fn test_sanitize_prometheus_label() {
        // Test that valid labels remain unchanged
        assert_eq!(
            sanitize_prometheus_label("valid_label").unwrap(),
            "valid_label"
        );
        assert_eq!(sanitize_prometheus_label("test123").unwrap(), "test123");
        assert_eq!(
            sanitize_prometheus_label("test_label_123").unwrap(),
            "test_label_123"
        );

        // Test that colons are NOT allowed in labels (stricter than names)
        assert_eq!(
            sanitize_prometheus_label("test:label").unwrap(),
            "test_label"
        );

        // Test that invalid characters are converted to underscores
        assert_eq!(
            sanitize_prometheus_label("test label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test.label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test@label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test-label").unwrap(),
            "test_label"
        );
        assert_eq!(
            sanitize_prometheus_label("test$label#123").unwrap(),
            "test_label_123"
        );

        // Test that double underscores are ALLOWED in middle but NOT at start
        assert_eq!(
            sanitize_prometheus_label("test__label").unwrap(),
            "test__label"
        ); // OK in middle
        assert_eq!(
            sanitize_prometheus_label("test___label").unwrap(),
            "test___label"
        ); // OK in middle
        assert_eq!(
            sanitize_prometheus_label("test____label").unwrap(),
            "test____label"
        ); // OK in middle
        assert_eq!(sanitize_prometheus_label("__test").unwrap(), "test"); // Leading __ removed
        assert!(sanitize_prometheus_label("____").is_err()); // All underscores should error

        // Test that invalid first characters are fixed (no colons allowed)
        assert_eq!(sanitize_prometheus_label("123test").unwrap(), "_123test");
        assert_eq!(sanitize_prometheus_label("@test").unwrap(), "_test");
        assert_eq!(sanitize_prometheus_label(":test").unwrap(), "_test"); // colon not allowed
        assert_eq!(sanitize_prometheus_label("-test").unwrap(), "_test");

        // Test empty string returns error
        assert!(sanitize_prometheus_label("").is_err());

        // Test complex cases
        assert_eq!(
            sanitize_prometheus_label("123:test-label@domain").unwrap(),
            "_123_test_label_domain"
        );

        // Test that strings with only invalid characters return error
        assert!(sanitize_prometheus_label("@#$%").is_err()); // @#$% -> ____ -> ___ -> all underscores error
        assert!(sanitize_prometheus_label("!!!!").is_err()); // !!!! -> ____ -> ___ -> all underscores error
    }

    #[test]
    fn test_build_component_metric_name() {
        // Test that valid names work correctly
        assert_eq!(
            build_component_metric_name("test_metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("requests_total"),
            "dynamo_component_requests_total"
        );

        // Test that invalid characters are sanitized
        assert_eq!(
            build_component_metric_name("test metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("test.metric"),
            "dynamo_component_test_metric"
        );
        assert_eq!(
            build_component_metric_name("test@metric"),
            "dynamo_component_test_metric"
        );

        // Test that invalid first characters are fixed
        assert_eq!(
            build_component_metric_name("123metric"),
            "dynamo_component__123metric"
        );
    }

    #[test]
    #[should_panic(expected = "metric name should be valid or sanitizable")]
    fn test_build_component_metric_name_panics_on_invalid_input() {
        // Test that completely invalid input panics with clear message
        build_component_metric_name("@#$%");
    }

    #[test]
    #[should_panic(expected = "metric name should be valid or sanitizable")]
    fn test_build_component_metric_name_panics_on_empty_input() {
        // Test that empty input panics with clear message
        build_component_metric_name("");
    }
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712

    #[test]
    fn test_clamp_u64_to_i64() {
        // Test normal values within i64 range
        assert_eq!(clamp_u64_to_i64(0), 0);
        assert_eq!(clamp_u64_to_i64(100), 100);
        assert_eq!(clamp_u64_to_i64(1000000), 1000000);

        // Test maximum i64 value
        assert_eq!(clamp_u64_to_i64(i64::MAX as u64), i64::MAX);

        // Test values that exceed i64::MAX
        assert_eq!(clamp_u64_to_i64(u64::MAX), i64::MAX);
        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1), i64::MAX);
        assert_eq!(clamp_u64_to_i64((i64::MAX as u64) + 1000), i64::MAX);
    }
713
}