Unverified Commit cacac9b9 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: add Python const for Prometheus metric names (#3244)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 481bf395
...@@ -19,6 +19,7 @@ import typing ...@@ -19,6 +19,7 @@ import typing
from prometheus_api_client import PrometheusConnect from prometheus_api_client import PrometheusConnect
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from dynamo._core import prometheus_names
from dynamo.runtime.logging import configure_dynamo_logging from dynamo.runtime.logging import configure_dynamo_logging
configure_dynamo_logging() configure_dynamo_logging()
...@@ -47,18 +48,14 @@ class PrometheusAPIClient: ...@@ -47,18 +48,14 @@ class PrometheusAPIClient:
self.dynamo_namespace = dynamo_namespace self.dynamo_namespace = dynamo_namespace
def _get_average_metric( def _get_average_metric(
self, self, full_metric_name: str, interval: str, operation_name: str, model_name: str
metric_name: str,
interval: str,
operation_name: str,
model_name: str,
) -> float: ) -> float:
""" """
Helper method to get average metrics using the pattern: Helper method to get average metrics using the pattern:
increase(metric_sum[interval])/increase(metric_count[interval]) increase(metric_sum[interval])/increase(metric_count[interval])
Args: Args:
metric_name: Base metric name (e.g., 'inter_token_latency_seconds') full_metric_name: Full metric name (e.g., 'dynamo_frontend_inter_token_latency_seconds')
interval: Time interval for the query (e.g., '60s') interval: Time interval for the query (e.g., '60s')
operation_name: Human-readable name for error logging operation_name: Human-readable name for error logging
...@@ -66,8 +63,6 @@ class PrometheusAPIClient: ...@@ -66,8 +63,6 @@ class PrometheusAPIClient:
Average metric value or 0 if no data/error Average metric value or 0 if no data/error
""" """
try: try:
# TODO: use prometheus_names.rs
full_metric_name = f"dynamo_frontend_{metric_name}"
query = f"increase({full_metric_name}_sum[{interval}])/increase({full_metric_name}_count[{interval}])" query = f"increase({full_metric_name}_sum[{interval}])/increase({full_metric_name}_count[{interval}])"
result = self.prom.custom_query(query=query) result = self.prom.custom_query(query=query)
if not result: if not result:
...@@ -93,7 +88,7 @@ class PrometheusAPIClient: ...@@ -93,7 +88,7 @@ class PrometheusAPIClient:
def get_avg_inter_token_latency(self, interval: str, model_name: str): def get_avg_inter_token_latency(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
"inter_token_latency_seconds", prometheus_names.frontend.inter_token_latency_seconds,
interval, interval,
"avg inter token latency", "avg inter token latency",
model_name, model_name,
...@@ -101,7 +96,7 @@ class PrometheusAPIClient: ...@@ -101,7 +96,7 @@ class PrometheusAPIClient:
def get_avg_time_to_first_token(self, interval: str, model_name: str): def get_avg_time_to_first_token(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
"time_to_first_token_seconds", prometheus_names.frontend.time_to_first_token_seconds,
interval, interval,
"avg time to first token", "avg time to first token",
model_name, model_name,
...@@ -109,7 +104,7 @@ class PrometheusAPIClient: ...@@ -109,7 +104,7 @@ class PrometheusAPIClient:
def get_avg_request_duration(self, interval: str, model_name: str): def get_avg_request_duration(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
"request_duration_seconds", prometheus_names.frontend.request_duration_seconds,
interval, interval,
"avg request duration", "avg request duration",
model_name, model_name,
...@@ -118,8 +113,9 @@ class PrometheusAPIClient: ...@@ -118,8 +113,9 @@ class PrometheusAPIClient:
def get_avg_request_count(self, interval: str, model_name: str): def get_avg_request_count(self, interval: str, model_name: str):
# This function follows a different query pattern than the other metrics # This function follows a different query pattern than the other metrics
try: try:
requests_total_metric = prometheus_names.frontend.requests_total
raw_res = self.prom.custom_query( raw_res = self.prom.custom_query(
query=f"increase(dynamo_frontend_requests_total[{interval}])" query=f"increase({requests_total_metric}[{interval}])"
) )
metrics_containers = parse_frontend_metric_containers(raw_res) metrics_containers = parse_frontend_metric_containers(raw_res)
total_count = 0.0 total_count = 0.0
...@@ -136,7 +132,7 @@ class PrometheusAPIClient: ...@@ -136,7 +132,7 @@ class PrometheusAPIClient:
def get_avg_input_sequence_tokens(self, interval: str, model_name: str): def get_avg_input_sequence_tokens(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
"input_sequence_tokens", prometheus_names.frontend.input_sequence_tokens,
interval, interval,
"avg input sequence tokens", "avg input sequence tokens",
model_name, model_name,
...@@ -144,7 +140,7 @@ class PrometheusAPIClient: ...@@ -144,7 +140,7 @@ class PrometheusAPIClient:
def get_avg_output_sequence_tokens(self, interval: str, model_name: str): def get_avg_output_sequence_tokens(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
"output_sequence_tokens", prometheus_names.frontend.output_sequence_tokens,
interval, interval,
"avg output sequence tokens", "avg output sequence tokens",
model_name, model_name,
......
...@@ -55,6 +55,7 @@ mod http; ...@@ -55,6 +55,7 @@ mod http;
mod llm; mod llm;
mod parsers; mod parsers;
mod planner; mod planner;
mod prometheus_names;
type JsonServerStreamingIngress = type JsonServerStreamingIngress =
Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>; Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>;
...@@ -184,6 +185,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { ...@@ -184,6 +185,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
engine::add_to_module(m)?; engine::add_to_module(m)?;
parsers::add_to_module(m)?; parsers::add_to_module(m)?;
prometheus_names::add_to_module(m)?;
#[cfg(feature = "block-manager")] #[cfg(feature = "block-manager")]
llm::block_manager::add_to_module(m)?; llm::block_manager::add_to_module(m)?;
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Python bindings for Prometheus metric name constants
//!
//! ⚠️ **CRITICAL: SYNC WITH RUST SOURCE AND PYTHON TYPE STUBS** ⚠️
//! This file exposes constants from `lib/runtime/src/metrics/prometheus_names.rs` to Python.
//! When the source file is modified, you MUST update BOTH files to match:
//!
//! 1. **This Rust file** - Update the actual Python bindings implementation
//! 2. **Python type stubs** - Update `lib/bindings/python/src/dynamo/_core.pyi`
//! The .pyi file provides type hints for IDEs and static type checkers.
//! Without updating it, IDEs won't recognize new classes/methods for autocomplete.
//!
//! The constants here should mirror the structure and values from the Rust source.
//! Any changes to metric names in the source must be reflected here immediately.
//!
//! Files to sync:
//! - Source: `lib/runtime/src/metrics/prometheus_names.rs`
//! - This file: `lib/bindings/python/rust/prometheus_names.rs`
//! - Type stubs: `lib/bindings/python/src/dynamo/_core.pyi`
//!
//! ## Python Usage Example
//!
//! ```python
//! from dynamo._core import prometheus_names
//!
//! # Access metrics directly (no constructor call needed!)
//! frontend = prometheus_names.frontend
//! print(frontend.requests_total) # "dynamo_frontend_requests_total"
//! print(frontend.request_duration_seconds) # "dynamo_frontend_request_duration_seconds"
//! print(frontend.inter_token_latency_seconds) # "dynamo_frontend_inter_token_latency_seconds"
//!
//! work_handler = prometheus_names.work_handler
//! print(work_handler.requests_total) # "dynamo_component_requests_total"
//! print(work_handler.errors_total) # "dynamo_component_errors_total"
//!
//! # Use in Prometheus queries
//! query = f"rate({frontend.requests_total}[5m])"
//! pattern = rf'{work_handler.requests_total}\{{[^}}]*model="[^"]*"[^}}]*\}}'
//! ```
use dynamo_runtime::metrics::prometheus_names::*;
use pyo3::prelude::*;
/// Main container for all Prometheus metric name constants
#[pyclass]
pub struct PrometheusNames;
#[pymethods]
impl PrometheusNames {
/// Frontend service metrics
#[getter]
fn frontend(&self) -> FrontendService {
FrontendService
}
/// Work handler metrics
#[getter]
fn work_handler(&self) -> WorkHandler {
WorkHandler
}
}
/// Frontend service metrics (LLM HTTP service)
/// These methods return the full metric names with the "dynamo_frontend_" prefix
///
/// Note: We use instance methods instead of static methods for better Python ergonomics
/// - The `concat!` macro only accepts string literals, not const references
/// - We need to combine `name_prefix::FRONTEND` + `frontend_service::*` constants at runtime
/// - This ensures we use actual Rust constants rather than hardcoded literals
#[pyclass]
pub struct FrontendService;
#[pymethods]
impl FrontendService {
/// Total number of LLM requests processed
#[getter]
fn requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::REQUESTS_TOTAL
)
}
/// Number of requests waiting in HTTP queue before receiving the first response
#[getter]
fn queued_requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::QUEUED_REQUESTS_TOTAL
)
}
/// Number of inflight requests going to the engine (vLLM, SGLang, ...)
#[getter]
fn inflight_requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INFLIGHT_REQUESTS_TOTAL
)
}
/// Duration of LLM requests
#[getter]
fn request_duration_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::REQUEST_DURATION_SECONDS
)
}
/// Input sequence length in tokens
#[getter]
fn input_sequence_tokens(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INPUT_SEQUENCE_TOKENS
)
}
/// Output sequence length in tokens
#[getter]
fn output_sequence_tokens(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::OUTPUT_SEQUENCE_TOKENS
)
}
/// Time to first token in seconds
#[getter]
fn time_to_first_token_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::TIME_TO_FIRST_TOKEN_SECONDS
)
}
/// Inter-token latency in seconds
#[getter]
fn inter_token_latency_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INTER_TOKEN_LATENCY_SECONDS
)
}
}
/// Work handler metrics (component request processing)
/// These methods return the full metric names with the "dynamo_component_" prefix
#[pyclass]
pub struct WorkHandler;
#[pymethods]
impl WorkHandler {
/// Total number of requests processed by work handler
#[getter]
fn requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUESTS_TOTAL
)
}
/// Total number of bytes received in requests by work handler
#[getter]
fn request_bytes_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUEST_BYTES_TOTAL
)
}
/// Total number of bytes sent in responses by work handler
#[getter]
fn response_bytes_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::RESPONSE_BYTES_TOTAL
)
}
/// Number of requests currently being processed by work handler
#[getter]
fn inflight_requests(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::INFLIGHT_REQUESTS
)
}
/// Time spent processing requests by work handler (histogram)
#[getter]
fn request_duration_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUEST_DURATION_SECONDS
)
}
/// Total number of errors in work handler processing
#[getter]
fn errors_total(&self) -> String {
format!("{}_{}", name_prefix::COMPONENT, work_handler::ERRORS_TOTAL)
}
}
/// Add prometheus_names module to the Python bindings
pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PrometheusNames>()?;
m.add_class::<FrontendService>()?;
m.add_class::<WorkHandler>()?;
// Add a module-level singleton instance for convenience
let prometheus_names_instance = PrometheusNames;
m.add("prometheus_names", prometheus_names_instance)?;
Ok(())
}
...@@ -1276,3 +1276,134 @@ class VirtualConnectorClient: ...@@ -1276,3 +1276,134 @@ class VirtualConnectorClient:
"""Blocks until there is a new decision to fetch using 'get'""" """Blocks until there is a new decision to fetch using 'get'"""
... ...
class PrometheusNames:
"""
Main container for all Prometheus metric name constants
"""
@property
def frontend(self) -> FrontendService:
"""
Frontend service metrics
"""
...
@property
def work_handler(self) -> WorkHandler:
"""
Work handler metrics
"""
...
class FrontendService:
"""
Frontend service metrics (LLM HTTP service)
These methods return the full metric names with the "dynamo_frontend_" prefix
"""
@property
def requests_total(self) -> str:
"""
Total number of LLM requests processed
"""
...
@property
def queued_requests_total(self) -> str:
"""
Number of requests waiting in HTTP queue before receiving the first response
"""
...
@property
def inflight_requests_total(self) -> str:
"""
Number of inflight requests going to the engine (vLLM, SGLang, ...)
"""
...
@property
def request_duration_seconds(self) -> str:
"""
Duration of LLM requests
"""
...
@property
def input_sequence_tokens(self) -> str:
"""
Input sequence length in tokens
"""
...
@property
def output_sequence_tokens(self) -> str:
"""
Output sequence length in tokens
"""
...
@property
def time_to_first_token_seconds(self) -> str:
"""
Time to first token in seconds
"""
...
@property
def inter_token_latency_seconds(self) -> str:
"""
Inter-token latency in seconds
"""
...
class WorkHandler:
"""
Work handler metrics (component request processing)
These methods return the full metric names with the "dynamo_component_" prefix
"""
@property
def requests_total(self) -> str:
"""
Total number of requests processed by work handler
"""
...
@property
def request_bytes_total(self) -> str:
"""
Total number of bytes received in requests by work handler
"""
...
@property
def response_bytes_total(self) -> str:
"""
Total number of bytes sent in responses by work handler
"""
...
@property
def inflight_requests(self) -> str:
"""
Number of requests currently being processed by work handler
"""
...
@property
def request_duration_seconds(self) -> str:
"""
Time spent processing requests by work handler (histogram)
"""
...
@property
def errors_total(self) -> str:
"""
Total number of errors in work handler processing
"""
...
# Module-level singleton instance for convenient access
prometheus_names: PrometheusNames
...@@ -6,6 +6,13 @@ ...@@ -6,6 +6,13 @@
//! This module provides centralized Prometheus metric name constants and sanitization functions //! This module provides centralized Prometheus metric name constants and sanitization functions
//! for various components to ensure consistency and avoid duplication across the codebase. //! for various components to ensure consistency and avoid duplication across the codebase.
//! //!
//! ⚠️ **CRITICAL: SYNC WITH PYTHON BINDINGS** ⚠️
//! When modifying constants in this file, you MUST also update:
//! `lib/bindings/python/rust/prometheus_names.rs`
//!
//! The Python bindings expose these constants to Python code and must stay in sync.
//! Any changes here should be reflected in the Python bindings immediately.
//!
//! ## Naming Conventions //! ## Naming Conventions
//! //!
//! All metric names should follow: `{prefix}_{name}_{suffix}` //! All metric names should follow: `{prefix}_{name}_{suffix}`
...@@ -64,6 +71,9 @@ pub mod labels { ...@@ -64,6 +71,9 @@ pub mod labels {
} }
/// Frontend service metrics (LLM HTTP service) /// Frontend service metrics (LLM HTTP service)
///
/// ⚠️ SYNC ALERT: These constants are exposed to Python via:
/// `lib/bindings/python/rust/prometheus_names.rs` - FrontendService class
pub mod frontend_service { pub mod frontend_service {
// TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs
// for centralized environment variable constant management across the codebase // for centralized environment variable constant management across the codebase
......
...@@ -20,6 +20,8 @@ from copy import deepcopy ...@@ -20,6 +20,8 @@ from copy import deepcopy
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List from typing import Any, Dict, List
from dynamo._core import prometheus_names
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -163,23 +165,26 @@ class MetricsPayload(BasePayload): ...@@ -163,23 +165,26 @@ class MetricsPayload(BasePayload):
return response.text return response.text
def validate(self, response: Any, content: str) -> None: def validate(self, response: Any, content: str) -> None:
pattern = r'dynamo_component_requests_total\{[^}]*model="[^"]*"[^}]*\}\s+(\d+)' requests_total_name = prometheus_names.work_handler.requests_total
pattern = (
rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)'
)
matches = re.findall(pattern, content) matches = re.findall(pattern, content)
if not matches: if not matches:
raise AssertionError( raise AssertionError(
"Metric 'dynamo_component_requests_total' with model label not found in metrics output" f"Metric '{requests_total_name}' with model label not found in metrics output"
) )
for match in matches: for match in matches:
request_count = int(match) request_count = int(match)
if request_count >= self.min_num_requests: if request_count >= self.min_num_requests:
logger.info( logger.info(
f"SUCCESS: Found dynamo_component_requests_total with count: {request_count}" f"SUCCESS: Found {requests_total_name} with count: {request_count}"
) )
return return
raise AssertionError( raise AssertionError(
f"dynamo_component_requests_total exists but has count {request_count} which is less than required {self.min_num_requests}" f"{requests_total_name} exists but has count {request_count} which is less than required {self.min_num_requests}"
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment