feat: add Python const for Prometheus metric names (#3244)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>

feat: add Python const for Prometheus metric names (#3244)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
cacac9b9 · Keiven C · GitHub · 481bf395 · cacac9b9 · cacac9b9
Unverified Commit cacac9b9 authored Sep 30, 2025 by Keiven C Committed by GitHub Sep 30, 2025
6 changed files
--- a/components/planner/src/dynamo/planner/utils/prometheus.py
+++ b/components/planner/src/dynamo/planner/utils/prometheus.py
@@ -19,6 +19,7 @@ import typing
 from prometheus_api_client import PrometheusConnect
 from pydantic import BaseModel, ValidationError
+from dynamo._core import prometheus_names
 from dynamo.runtime.logging import configure_dynamo_logging
 configure_dynamo_logging()
@@ -47,18 +48,14 @@ class PrometheusAPIClient:
        self.dynamo_namespace = dynamo_namespace
    def _get_average_metric(
-        self,
+        self, full_metric_name: str, interval: str, operation_name: str, model_name: str
-        metric_name: str,
-        interval: str,
-        operation_name: str,
-        model_name: str,
    ) -> float:
        """
        Helper method to get average metrics using the pattern:
        increase(metric_sum[interval])/increase(metric_count[interval])
        Args:
-            metric_name: Base metric name (e.g., 'inter_token_latency_seconds')
+            full_metric_name: Full metric name (e.g., 'dynamo_frontend_inter_token_latency_seconds')
            interval: Time interval for the query (e.g., '60s')
            operation_name: Human-readable name for error logging
@@ -66,8 +63,6 @@ class PrometheusAPIClient:
            Average metric value or 0 if no data/error
        """
        try:
-            # TODO: use prometheus_names.rs
-            full_metric_name = f"dynamo_frontend_{metric_name}"
            query = f"increase({full_metric_name}_sum[{interval}])/increase({full_metric_name}_count[{interval}])"
            result = self.prom.custom_query(query=query)
            if not result:
@@ -93,7 +88,7 @@ class PrometheusAPIClient:
    def get_avg_inter_token_latency(self, interval: str, model_name: str):
        return self._get_average_metric(
-            "inter_token_latency_seconds",
+            prometheus_names.frontend.inter_token_latency_seconds,
            interval,
            "avg inter token latency",
            model_name,
@@ -101,7 +96,7 @@ class PrometheusAPIClient:
    def get_avg_time_to_first_token(self, interval: str, model_name: str):
        return self._get_average_metric(
-            "time_to_first_token_seconds",
+            prometheus_names.frontend.time_to_first_token_seconds,
            interval,
            "avg time to first token",
            model_name,
@@ -109,7 +104,7 @@ class PrometheusAPIClient:
    def get_avg_request_duration(self, interval: str, model_name: str):
        return self._get_average_metric(
-            "request_duration_seconds",
+            prometheus_names.frontend.request_duration_seconds,
            interval,
            "avg request duration",
            model_name,
@@ -118,8 +113,9 @@ class PrometheusAPIClient:
    def get_avg_request_count(self, interval: str, model_name: str):
        # This function follows a different query pattern than the other metrics
        try:
+            requests_total_metric = prometheus_names.frontend.requests_total
            raw_res = self.prom.custom_query(
-                query=f"increase(dynamo_frontend_requests_total[{interval}])"
+                query=f"increase({requests_total_metric}[{interval}])"
            )
            metrics_containers = parse_frontend_metric_containers(raw_res)
            total_count = 0.0
@@ -136,7 +132,7 @@ class PrometheusAPIClient:
    def get_avg_input_sequence_tokens(self, interval: str, model_name: str):
        return self._get_average_metric(
-            "input_sequence_tokens",
+            prometheus_names.frontend.input_sequence_tokens,
            interval,
            "avg input sequence tokens",
            model_name,
@@ -144,7 +140,7 @@ class PrometheusAPIClient:
    def get_avg_output_sequence_tokens(self, interval: str, model_name: str):
        return self._get_average_metric(
-            "output_sequence_tokens",
+            prometheus_names.frontend.output_sequence_tokens,
            interval,
            "avg output sequence tokens",
            model_name,

--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -55,6 +55,7 @@ mod http;
 mod llm;
 mod parsers;
 mod planner;
+mod prometheus_names;
 type JsonServerStreamingIngress =
    Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>;
@@ -184,6 +185,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
    engine::add_to_module(m)?;
    parsers::add_to_module(m)?;
+    prometheus_names::add_to_module(m)?;
    #[cfg(feature = "block-manager")]
    llm::block_manager::add_to_module(m)?;

--- a/lib/bindings/python/rust/prometheus_names.rs
+++ b/lib/bindings/python/rust/prometheus_names.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Python bindings for Prometheus metric name constants
+//!
+//! ⚠️  **CRITICAL: SYNC WITH RUST SOURCE AND PYTHON TYPE STUBS** ⚠️
+//! This file exposes constants from `lib/runtime/src/metrics/prometheus_names.rs` to Python.
+//! When the source file is modified, you MUST update BOTH files to match:
+//!
+//! 1. **This Rust file** - Update the actual Python bindings implementation
+//! 2. **Python type stubs** - Update `lib/bindings/python/src/dynamo/_core.pyi`
+//!    The .pyi file provides type hints for IDEs and static type checkers.
+//!    Without updating it, IDEs won't recognize new classes/methods for autocomplete.
+//!
+//! The constants here should mirror the structure and values from the Rust source.
+//! Any changes to metric names in the source must be reflected here immediately.
+//!
+//! Files to sync:
+//! - Source:      `lib/runtime/src/metrics/prometheus_names.rs`
+//! - This file:   `lib/bindings/python/rust/prometheus_names.rs`
+//! - Type stubs:  `lib/bindings/python/src/dynamo/_core.pyi`
+//!
+//! ## Python Usage Example
+//!
+//! ```python
+//! from dynamo._core import prometheus_names
+//!
+//! # Access metrics directly (no constructor call needed!)
+//! frontend = prometheus_names.frontend
+//! print(frontend.requests_total)           # "dynamo_frontend_requests_total"
+//! print(frontend.request_duration_seconds) # "dynamo_frontend_request_duration_seconds"
+//! print(frontend.inter_token_latency_seconds) # "dynamo_frontend_inter_token_latency_seconds"
+//!
+//! work_handler = prometheus_names.work_handler
+//! print(work_handler.requests_total)       # "dynamo_component_requests_total"
+//! print(work_handler.errors_total)         # "dynamo_component_errors_total"
+//!
+//! # Use in Prometheus queries
+//! query = f"rate({frontend.requests_total}[5m])"
+//! pattern = rf'{work_handler.requests_total}\{{[^}}]*model="[^"]*"[^}}]*\}}'
+//! ```
+use dynamo_runtime::metrics::prometheus_names::*;
+use pyo3::prelude::*;
+/// Main container for all Prometheus metric name constants
+#[pyclass]
+pub struct PrometheusNames;
+#[pymethods]
+impl PrometheusNames {
+    /// Frontend service metrics
+    #[getter]
+    fn frontend(&self) -> FrontendService {
+        FrontendService
+    }
+    /// Work handler metrics
+    #[getter]
+    fn work_handler(&self) -> WorkHandler {
+        WorkHandler
+    }
+}
+/// Frontend service metrics (LLM HTTP service)
+/// These methods return the full metric names with the "dynamo_frontend_" prefix
+///
+/// Note: We use instance methods instead of static methods for better Python ergonomics
+/// - The `concat!` macro only accepts string literals, not const references
+/// - We need to combine `name_prefix::FRONTEND` + `frontend_service::*` constants at runtime
+/// - This ensures we use actual Rust constants rather than hardcoded literals
+#[pyclass]
+pub struct FrontendService;
+#[pymethods]
+impl FrontendService {
+    /// Total number of LLM requests processed
+    #[getter]
+    fn requests_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::REQUESTS_TOTAL
+        )
+    }
+    /// Number of requests waiting in HTTP queue before receiving the first response
+    #[getter]
+    fn queued_requests_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::QUEUED_REQUESTS_TOTAL
+        )
+    }
+    /// Number of inflight requests going to the engine (vLLM, SGLang, ...)
+    #[getter]
+    fn inflight_requests_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::INFLIGHT_REQUESTS_TOTAL
+        )
+    }
+    /// Duration of LLM requests
+    #[getter]
+    fn request_duration_seconds(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::REQUEST_DURATION_SECONDS
+        )
+    }
+    /// Input sequence length in tokens
+    #[getter]
+    fn input_sequence_tokens(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::INPUT_SEQUENCE_TOKENS
+        )
+    }
+    /// Output sequence length in tokens
+    #[getter]
+    fn output_sequence_tokens(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::OUTPUT_SEQUENCE_TOKENS
+        )
+    }
+    /// Time to first token in seconds
+    #[getter]
+    fn time_to_first_token_seconds(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::TIME_TO_FIRST_TOKEN_SECONDS
+        )
+    }
+    /// Inter-token latency in seconds
+    #[getter]
+    fn inter_token_latency_seconds(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::FRONTEND,
+            frontend_service::INTER_TOKEN_LATENCY_SECONDS
+        )
+    }
+}
+/// Work handler metrics (component request processing)
+/// These methods return the full metric names with the "dynamo_component_" prefix
+#[pyclass]
+pub struct WorkHandler;
+#[pymethods]
+impl WorkHandler {
+    /// Total number of requests processed by work handler
+    #[getter]
+    fn requests_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::COMPONENT,
+            work_handler::REQUESTS_TOTAL
+        )
+    }
+    /// Total number of bytes received in requests by work handler
+    #[getter]
+    fn request_bytes_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::COMPONENT,
+            work_handler::REQUEST_BYTES_TOTAL
+        )
+    }
+    /// Total number of bytes sent in responses by work handler
+    #[getter]
+    fn response_bytes_total(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::COMPONENT,
+            work_handler::RESPONSE_BYTES_TOTAL
+        )
+    }
+    /// Number of requests currently being processed by work handler
+    #[getter]
+    fn inflight_requests(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::COMPONENT,
+            work_handler::INFLIGHT_REQUESTS
+        )
+    }
+    /// Time spent processing requests by work handler (histogram)
+    #[getter]
+    fn request_duration_seconds(&self) -> String {
+        format!(
+            "{}_{}",
+            name_prefix::COMPONENT,
+            work_handler::REQUEST_DURATION_SECONDS
+        )
+    }
+    /// Total number of errors in work handler processing
+    #[getter]
+    fn errors_total(&self) -> String {
+        format!("{}_{}", name_prefix::COMPONENT, work_handler::ERRORS_TOTAL)
+    }
+}
+/// Add prometheus_names module to the Python bindings
+pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
+    m.add_class::<PrometheusNames>()?;
+    m.add_class::<FrontendService>()?;
+    m.add_class::<WorkHandler>()?;
+    // Add a module-level singleton instance for convenience
+    let prometheus_names_instance = PrometheusNames;
+    m.add("prometheus_names", prometheus_names_instance)?;
+    Ok(())
+}
--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -1276,3 +1276,134 @@ class VirtualConnectorClient:
        """Blocks until there is a new decision to fetch using 'get'"""
        ...
+class PrometheusNames:
+    """
+    Main container for all Prometheus metric name constants
+    """
+    @property
+    def frontend(self) -> FrontendService:
+        """
+        Frontend service metrics
+        """
+        ...
+    @property
+    def work_handler(self) -> WorkHandler:
+        """
+        Work handler metrics
+        """
+        ...
+class FrontendService:
+    """
+    Frontend service metrics (LLM HTTP service)
+    These methods return the full metric names with the "dynamo_frontend_" prefix
+    """
+    @property
+    def requests_total(self) -> str:
+        """
+        Total number of LLM requests processed
+        """
+        ...
+    @property
+    def queued_requests_total(self) -> str:
+        """
+        Number of requests waiting in HTTP queue before receiving the first response
+        """
+        ...
+    @property
+    def inflight_requests_total(self) -> str:
+        """
+        Number of inflight requests going to the engine (vLLM, SGLang, ...)
+        """
+        ...
+    @property
+    def request_duration_seconds(self) -> str:
+        """
+        Duration of LLM requests
+        """
+        ...
+    @property
+    def input_sequence_tokens(self) -> str:
+        """
+        Input sequence length in tokens
+        """
+        ...
+    @property
+    def output_sequence_tokens(self) -> str:
+        """
+        Output sequence length in tokens
+        """
+        ...
+    @property
+    def time_to_first_token_seconds(self) -> str:
+        """
+        Time to first token in seconds
+        """
+        ...
+    @property
+    def inter_token_latency_seconds(self) -> str:
+        """
+        Inter-token latency in seconds
+        """
+        ...
+class WorkHandler:
+    """
+    Work handler metrics (component request processing)
+    These methods return the full metric names with the "dynamo_component_" prefix
+    """
+    @property
+    def requests_total(self) -> str:
+        """
+        Total number of requests processed by work handler
+        """
+        ...
+    @property
+    def request_bytes_total(self) -> str:
+        """
+        Total number of bytes received in requests by work handler
+        """
+        ...
+    @property
+    def response_bytes_total(self) -> str:
+        """
+        Total number of bytes sent in responses by work handler
+        """
+        ...
+    @property
+    def inflight_requests(self) -> str:
+        """
+        Number of requests currently being processed by work handler
+        """
+        ...
+    @property
+    def request_duration_seconds(self) -> str:
+        """
+        Time spent processing requests by work handler (histogram)
+        """
+        ...
+    @property
+    def errors_total(self) -> str:
+        """
+        Total number of errors in work handler processing
+        """
+        ...
+# Module-level singleton instance for convenient access
+prometheus_names: PrometheusNames
--- a/lib/runtime/src/metrics/prometheus_names.rs
+++ b/lib/runtime/src/metrics/prometheus_names.rs
@@ -6,6 +6,13 @@
 //! This module provides centralized Prometheus metric name constants and sanitization functions
 //! for various components to ensure consistency and avoid duplication across the codebase.
 //!
+//! ⚠️  **CRITICAL: SYNC WITH PYTHON BINDINGS** ⚠️
+//! When modifying constants in this file, you MUST also update:
+//! `lib/bindings/python/rust/prometheus_names.rs`
+//!
+//! The Python bindings expose these constants to Python code and must stay in sync.
+//! Any changes here should be reflected in the Python bindings immediately.
+//!
 //! ## Naming Conventions
 //!
 //! All metric names should follow: `{prefix}_{name}_{suffix}`
@@ -64,6 +71,9 @@ pub mod labels {
 }
 /// Frontend service metrics (LLM HTTP service)
+///
+/// ⚠️  SYNC ALERT: These constants are exposed to Python via:
+/// `lib/bindings/python/rust/prometheus_names.rs` - FrontendService class
 pub mod frontend_service {
    // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs
    // for centralized environment variable constant management across the codebase

--- a/tests/utils/payloads.py
+++ b/tests/utils/payloads.py
@@ -20,6 +20,8 @@ from copy import deepcopy
 from dataclasses import dataclass
 from typing import Any, Dict, List
+from dynamo._core import prometheus_names
 logger = logging.getLogger(__name__)
@@ -163,23 +165,26 @@ class MetricsPayload(BasePayload):
        return response.text
    def validate(self, response: Any, content: str) -> None:
-        pattern = r'dynamo_component_requests_total\{[^}]*model="[^"]*"[^}]*\}\s+(\d+)'
+        requests_total_name = prometheus_names.work_handler.requests_total
+        pattern = (
+            rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)'
+        )
        matches = re.findall(pattern, content)
        if not matches:
            raise AssertionError(
-                "Metric 'dynamo_component_requests_total' with model label not found in metrics output"
+                f"Metric '{requests_total_name}' with model label not found in metrics output"
            )
        for match in matches:
            request_count = int(match)
            if request_count >= self.min_num_requests:
                logger.info(
-                    f"SUCCESS: Found dynamo_component_requests_total with count: {request_count}"
+                    f"SUCCESS: Found {requests_total_name} with count: {request_count}"
                )
                return
        raise AssertionError(
-            f"dynamo_component_requests_total exists but has count {request_count} which is less than required {self.min_num_requests}"
+            f"{requests_total_name} exists but has count {request_count} which is less than required {self.min_num_requests}"
        )