Unverified Commit f2ba58e5 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: add Rust-to-Python const code generator for prometheus_names.py (#3425)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent ca674098
...@@ -2091,6 +2091,28 @@ dependencies = [ ...@@ -2091,6 +2091,28 @@ dependencies = [
"uuid 1.18.1", "uuid 1.18.1",
] ]
[[package]]
name = "dynamo-codegen"
version = "0.1.0"
dependencies = [
"anyhow",
"proc-macro2",
"quote",
"syn 2.0.106",
]
[[package]]
name = "dynamo-engine-llamacpp"
version = "0.5.1"
dependencies = [
"async-stream",
"dynamo-llm",
"dynamo-runtime",
"llama-cpp-2",
"tokio",
"tracing",
]
[[package]] [[package]]
name = "dynamo-engine-mistralrs" name = "dynamo-engine-mistralrs"
version = "0.5.1" version = "0.5.1"
......
...@@ -10,6 +10,7 @@ members = [ ...@@ -10,6 +10,7 @@ members = [
"lib/async-openai", "lib/async-openai",
"lib/parsers", "lib/parsers",
"lib/bindings/c", "lib/bindings/c",
"lib/bindings/python/codegen",
"lib/engines/*", "lib/engines/*",
] ]
# Exclude certain packages that are slow to build and we don't ship as flagship # Exclude certain packages that are slow to build and we don't ship as flagship
......
...@@ -19,7 +19,7 @@ import typing ...@@ -19,7 +19,7 @@ import typing
from prometheus_api_client import PrometheusConnect from prometheus_api_client import PrometheusConnect
from pydantic import BaseModel, ValidationError from pydantic import BaseModel, ValidationError
from dynamo._core import prometheus_names from dynamo import prometheus_names
from dynamo.runtime.logging import configure_dynamo_logging from dynamo.runtime.logging import configure_dynamo_logging
configure_dynamo_logging() configure_dynamo_logging()
...@@ -94,7 +94,7 @@ class PrometheusAPIClient: ...@@ -94,7 +94,7 @@ class PrometheusAPIClient:
def get_avg_inter_token_latency(self, interval: str, model_name: str): def get_avg_inter_token_latency(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
prometheus_names.frontend.inter_token_latency_seconds, prometheus_names.frontend_service.INTER_TOKEN_LATENCY_SECONDS,
interval, interval,
"avg inter token latency", "avg inter token latency",
model_name, model_name,
...@@ -102,7 +102,7 @@ class PrometheusAPIClient: ...@@ -102,7 +102,7 @@ class PrometheusAPIClient:
def get_avg_time_to_first_token(self, interval: str, model_name: str): def get_avg_time_to_first_token(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
prometheus_names.frontend.time_to_first_token_seconds, prometheus_names.frontend_service.TIME_TO_FIRST_TOKEN_SECONDS,
interval, interval,
"avg time to first token", "avg time to first token",
model_name, model_name,
...@@ -110,7 +110,7 @@ class PrometheusAPIClient: ...@@ -110,7 +110,7 @@ class PrometheusAPIClient:
def get_avg_request_duration(self, interval: str, model_name: str): def get_avg_request_duration(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
prometheus_names.frontend.request_duration_seconds, prometheus_names.frontend_service.REQUEST_DURATION_SECONDS,
interval, interval,
"avg request duration", "avg request duration",
model_name, model_name,
...@@ -119,7 +119,7 @@ class PrometheusAPIClient: ...@@ -119,7 +119,7 @@ class PrometheusAPIClient:
def get_avg_request_count(self, interval: str, model_name: str): def get_avg_request_count(self, interval: str, model_name: str):
# This function follows a different query pattern than the other metrics # This function follows a different query pattern than the other metrics
try: try:
requests_total_metric = prometheus_names.frontend.requests_total requests_total_metric = prometheus_names.frontend_service.REQUESTS_TOTAL
raw_res = self.prom.custom_query( raw_res = self.prom.custom_query(
query=f"increase({requests_total_metric}[{interval}])" query=f"increase({requests_total_metric}[{interval}])"
) )
...@@ -138,7 +138,7 @@ class PrometheusAPIClient: ...@@ -138,7 +138,7 @@ class PrometheusAPIClient:
def get_avg_input_sequence_tokens(self, interval: str, model_name: str): def get_avg_input_sequence_tokens(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
prometheus_names.frontend.input_sequence_tokens, prometheus_names.frontend_service.INPUT_SEQUENCE_TOKENS,
interval, interval,
"avg input sequence tokens", "avg input sequence tokens",
model_name, model_name,
...@@ -146,7 +146,7 @@ class PrometheusAPIClient: ...@@ -146,7 +146,7 @@ class PrometheusAPIClient:
def get_avg_output_sequence_tokens(self, interval: str, model_name: str): def get_avg_output_sequence_tokens(self, interval: str, model_name: str):
return self._get_average_metric( return self._get_average_metric(
prometheus_names.frontend.output_sequence_tokens, prometheus_names.frontend_service.OUTPUT_SEQUENCE_TOKENS,
interval, interval,
"avg output sequence tokens", "avg output sequence tokens",
model_name, model_name,
......
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
[package]
name = "dynamo-codegen"
version = "0.1.0"
edition = "2021"
license = "Apache-2.0"
[dependencies]
syn = { version = "2.0", features = ["full", "extra-traits"] }
quote = "1.0"
proc-macro2 = "1.0"
anyhow = "1.0"
[[bin]]
name = "gen-python-prometheus-names"
path = "src/gen_python_prometheus_names.rs"
# Dynamo Codegen
Python code generator for Dynamo Python bindings.
## gen-python-prometheus-names
Generates `prometheus_names.py` from Rust source `lib/runtime/src/metrics/prometheus_names.rs`.
### Usage
```bash
cargo run -p dynamo-codegen --bin gen-python-prometheus-names
```
### What it does
- Parses Rust AST from `lib/runtime/src/metrics/prometheus_names.rs`
- Generates Python classes with constants at `lib/bindings/python/src/dynamo/prometheus_names.py`
- Handles macro-generated constants (e.g., `kvstats_name!("active_blocks")``"kvstats_active_blocks"`)
### Example
**Rust input:**
```rust
pub mod kvstats {
pub const ACTIVE_BLOCKS: &str = kvstats_name!("active_blocks");
}
```
**Python output:**
```python
class kvstats:
ACTIVE_BLOCKS = "kvstats_active_blocks"
```
### When to run
Run after modifying `lib/runtime/src/metrics/prometheus_names.rs` to regenerate the Python file.
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Binary to generate Python prometheus_names from Rust source
use anyhow::{Context, Result};
use dynamo_codegen::prometheus_parser::{ModuleDef, PrometheusParser};
use std::collections::HashMap;
use std::path::PathBuf;
/// Generates Python module code from parsed Rust prometheus_names modules.
/// Converts Rust const declarations into Python class attributes with deterministic ordering.
struct PythonGenerator<'a> {
modules: &'a HashMap<String, ModuleDef>,
}
impl<'a> PythonGenerator<'a> {
fn new(parser: &'a PrometheusParser) -> Self {
Self {
modules: &parser.modules,
}
}
fn load_template(template_name: &str) -> String {
let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("templates")
.join(template_name);
std::fs::read_to_string(&template_path)
.unwrap_or_else(|_| panic!("Failed to read template: {}", template_path.display()))
}
fn generate_python_file(&self) -> String {
let mut output = Self::load_template("prometheus_names.py.template");
// Append generated classes
output.push_str(&self.generate_classes());
output
}
fn generate_classes(&self) -> String {
let mut lines = Vec::new();
// Sort module names to ensure deterministic output
let mut module_names: Vec<&String> = self.modules.keys().collect();
module_names.sort();
// Generate simple classes with constants as class attributes
for module_name in module_names {
let module = &self.modules[module_name];
lines.push(format!("class {}:", module_name));
// Use doc comment from module if available
if !module.doc_comment.is_empty() {
let first_line = module.doc_comment.lines().next().unwrap_or("").trim();
if !first_line.is_empty() {
lines.push(format!(" \"\"\"{}\"\"\"", first_line));
}
}
lines.push("".to_string());
for constant in &module.constants {
if !constant.doc_comment.is_empty() {
for comment_line in constant.doc_comment.lines() {
lines.push(format!(" # {}", comment_line));
}
}
lines.push(format!(" {} = \"{}\"", constant.name, constant.value));
}
lines.push("".to_string());
}
lines.join("\n")
}
}
fn main() -> Result<()> {
let args: Vec<String> = std::env::args().collect();
let mut source_path: Option<PathBuf> = None;
let mut output_path: Option<PathBuf> = None;
let mut i = 1;
while i < args.len() {
match args[i].as_str() {
"--source" => {
i += 1;
if i < args.len() {
source_path = Some(PathBuf::from(&args[i]));
}
}
"--output" => {
i += 1;
if i < args.len() {
output_path = Some(PathBuf::from(&args[i]));
}
}
"--help" | "-h" => {
print_usage();
return Ok(());
}
_ => {
eprintln!("Unknown argument: {}", args[i]);
print_usage();
std::process::exit(1);
}
}
i += 1;
}
// Determine paths relative to codegen directory
let codegen_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
let source = source_path.unwrap_or_else(|| {
// From: lib/bindings/python/codegen
// To: lib/runtime/src/metrics/prometheus_names.rs
codegen_dir
.join("../../../runtime/src/metrics/prometheus_names.rs")
.canonicalize()
.expect("Failed to resolve source path")
});
let output = output_path.unwrap_or_else(|| {
// From: lib/bindings/python/codegen
// To: lib/bindings/python/src/dynamo/prometheus_names.py
codegen_dir
.join("../src/dynamo/prometheus_names.py")
.canonicalize()
.unwrap_or_else(|_| {
// If file doesn't exist yet, resolve the parent directory
let dir = codegen_dir
.join("../src/dynamo")
.canonicalize()
.expect("Failed to resolve output directory");
dir.join("prometheus_names.py")
})
});
println!("Generating Python prometheus_names from Rust source");
println!("Source: {}", source.display());
println!("Output: {}", output.display());
println!();
let content = std::fs::read_to_string(&source)
.with_context(|| format!("Failed to read source file: {}", source.display()))?;
println!("Parsing Rust AST...");
let parser = PrometheusParser::parse_file(&content)?;
println!("Found {} modules:", parser.modules.len());
let mut module_names: Vec<&String> = parser.modules.keys().collect();
module_names.sort();
for name in module_names.iter() {
let module = &parser.modules[name.as_str()];
println!(
" - {}: {} constants{}",
name,
module.constants.len(),
if module.is_macro_generated {
" (macro-generated)"
} else {
""
}
);
}
println!("\nGenerating Python prometheus_names module...");
let generator = PythonGenerator::new(&parser);
let python_code = generator.generate_python_file();
// Ensure output directory exists
if let Some(parent) = output.parent() {
std::fs::create_dir_all(parent)
.with_context(|| format!("Failed to create output directory: {}", parent.display()))?;
}
std::fs::write(&output, python_code)
.with_context(|| format!("Failed to write output file: {}", output.display()))?;
println!("✓ Generated Python prometheus_names: {}", output.display());
println!("\nSuccess! Python module ready for import.");
Ok(())
}
fn print_usage() {
println!(
r#"
gen-python-prometheus-names - Generate Python prometheus_names from Rust source
Usage: gen-python-prometheus-names [OPTIONS]
Parses lib/runtime/src/metrics/prometheus_names.rs and generates a pure Python
module with 1:1 constant mappings at lib/bindings/python/src/dynamo/prometheus_names.py
This allows Python code to import Prometheus metric constants without Rust bindings:
from dynamo.prometheus_names import frontend_service, kvstats
OPTIONS:
--source PATH Path to Rust source file
(default: lib/runtime/src/metrics/prometheus_names.rs)
--output PATH Path to Python output file
(default: lib/bindings/python/src/dynamo/prometheus_names.py)
--help, -h Print this help message
EXAMPLES:
# Generate with default paths
cargo run -p dynamo-codegen --bin gen-python-prometheus-names
# Generate with custom output
cargo run -p dynamo-codegen --bin gen-python-prometheus-names -- --output /tmp/test.py
"#
);
}
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Code generation utilities for Dynamo project
//!
//! This crate provides tools to generate code from Rust sources to other languages.
pub mod prometheus_parser;
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Parser for prometheus_names.rs to extract constants and modules
use anyhow::{Context, Result};
use std::collections::HashMap;
use syn::{File, Item, ItemConst, ItemMacro, ItemMod};
#[derive(Debug, Clone)]
pub struct ConstantDef {
pub name: String,
pub value: String,
pub doc_comment: String,
}
#[derive(Debug, Clone)]
pub struct ModuleDef {
pub name: String,
pub constants: Vec<ConstantDef>,
pub doc_comment: String,
pub is_macro_generated: bool,
pub macro_prefix: Option<String>,
}
pub struct PrometheusParser {
pub modules: HashMap<String, ModuleDef>,
}
impl PrometheusParser {
pub fn parse_file(content: &str) -> Result<Self> {
let ast: File = syn::parse_str(content).context("Failed to parse Rust file")?;
let mut modules = HashMap::new();
for item in ast.items {
if let Item::Mod(module) = item {
if let Some(parsed_module) = Self::parse_module(&module)? {
modules.insert(parsed_module.name.clone(), parsed_module);
}
}
}
Ok(Self { modules })
}
fn parse_module(module: &ItemMod) -> Result<Option<ModuleDef>> {
// Only process public modules
if !matches!(module.vis, syn::Visibility::Public(_)) {
return Ok(None);
}
let module_name = module.ident.to_string();
let doc_comment = Self::extract_doc_comment(&module.attrs);
let (_, items) = match &module.content {
Some(content) => content,
None => return Ok(None),
};
let mut constants = Vec::new();
let mut is_macro_generated = false;
let mut macro_prefix = None;
for item in items {
match item {
Item::Const(const_item) => {
if let Some(const_def) = Self::parse_const(const_item)? {
constants.push(const_def);
}
}
Item::Macro(macro_item) => {
// Check if this is a macro_rules! that generates names with a prefix
if let Some(prefix) = Self::extract_macro_prefix(macro_item) {
is_macro_generated = true;
macro_prefix = Some(prefix);
}
}
_ => {}
}
}
// Apply macro prefix to constants if needed
if is_macro_generated && macro_prefix.is_some() {
let prefix = macro_prefix.as_ref().unwrap();
for constant in &mut constants {
// Only apply if the constant doesn't already have the prefix
if constant.name == "PREFIX" {
// PREFIX constant should be just the prefix with trailing underscore
continue;
}
// Check if value looks like it should have prefix applied
// (doesn't already start with the prefix)
if !constant.value.starts_with(prefix) {
constant.value = format!("{}_{}", prefix, constant.value);
}
}
}
Ok(Some(ModuleDef {
name: module_name,
constants,
doc_comment,
is_macro_generated,
macro_prefix,
}))
}
fn parse_const(const_item: &ItemConst) -> Result<Option<ConstantDef>> {
// Only process public constants
if !matches!(const_item.vis, syn::Visibility::Public(_)) {
return Ok(None);
}
// Only process &str constants
let is_str_type = matches!(&*const_item.ty, syn::Type::Reference(type_ref)
if matches!(&*type_ref.elem, syn::Type::Path(path)
if path.path.segments.last().map(|s| s.ident == "str").unwrap_or(false)));
if !is_str_type {
return Ok(None);
}
let name = const_item.ident.to_string();
let doc_comment = Self::extract_doc_comment(&const_item.attrs);
// Extract the string value
let value = Self::extract_string_value(&const_item.expr)?;
Ok(Some(ConstantDef {
name,
value,
doc_comment,
}))
}
fn extract_string_value(expr: &syn::Expr) -> Result<String> {
match expr {
// Direct string literal: "value"
syn::Expr::Lit(lit_expr) => {
if let syn::Lit::Str(lit_str) = &lit_expr.lit {
Ok(lit_str.value())
} else {
anyhow::bail!("Expected string literal")
}
}
// Macro invocation: some_macro!("value")
syn::Expr::Macro(macro_expr) => {
// Try to extract the string from macro arguments
Self::extract_from_macro_tokens(&macro_expr.mac.tokens)
}
// Method call: "value".to_string()
syn::Expr::MethodCall(method_call) => Self::extract_string_value(&method_call.receiver),
_ => anyhow::bail!("Unsupported expression type for constant value"),
}
}
fn extract_from_macro_tokens(tokens: &proc_macro2::TokenStream) -> Result<String> {
// Parse the tokens to find string literals
let tokens_str = tokens.to_string();
// Look for string literals in the token stream
// This handles cases like: concat!("prefix_", "value")
let parts: Vec<&str> = tokens_str
.split('"')
.enumerate()
.filter(|(i, _)| i % 2 == 1)
.map(|(_, s)| s)
.collect();
if parts.is_empty() {
anyhow::bail!("No string literals found in macro");
}
// Concatenate all string parts (for concat! macro)
Ok(parts.join(""))
}
fn extract_macro_prefix(macro_item: &ItemMacro) -> Option<String> {
// Check if this is a macro_rules! with a name ending in "_name"
let macro_name = macro_item.ident.as_ref()?.to_string();
if !macro_name.ends_with("_name") {
return None;
}
// Try to extract the prefix from the macro body
// Looking for patterns like: concat!("prefix_", $name)
let tokens_str = macro_item.mac.tokens.to_string();
// Look for concat! with a string literal
// Pattern: concat ! ( "prefix_" , ...
if let Some(concat_start) = tokens_str.find("concat !") {
let after_concat = &tokens_str[concat_start..];
// Find the first string literal after concat!
if let Some(quote_start) = after_concat.find('"') {
let after_quote = &after_concat[quote_start + 1..];
if let Some(quote_end) = after_quote.find('"') {
let prefix = &after_quote[..quote_end];
// Remove trailing underscore if present
return Some(prefix.trim_end_matches('_').to_string());
}
}
}
None
}
fn extract_doc_comment(attrs: &[syn::Attribute]) -> String {
let mut doc_lines = Vec::new();
for attr in attrs {
if attr.path().is_ident("doc") {
if let syn::Meta::NameValue(meta) = &attr.meta {
if let syn::Expr::Lit(lit) = &meta.value {
if let syn::Lit::Str(lit_str) = &lit.lit {
let line = lit_str.value().trim().to_string();
if !line.is_empty() {
doc_lines.push(line);
}
}
}
}
}
}
doc_lines.join("\n")
}
}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Python constants for Prometheus metric names
AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs
DO NOT EDIT THIS FILE MANUALLY
To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs:
cargo run -p dynamo-codegen --bin gen-python-prometheus-names
This module provides pure Python access to Prometheus metric name constants
without requiring Rust bindings.
Usage (both patterns supported):
# Pattern 1: Import module
from dynamo import prometheus_names
print(prometheus_names.frontend_service.REQUESTS_TOTAL) # "requests_total"
print(prometheus_names.kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks"
# Pattern 2: Import specific classes
from dynamo.prometheus_names import frontend_service, kvstats
print(frontend_service.REQUESTS_TOTAL) # "requests_total"
print(kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks"
"""
from __future__ import annotations
...@@ -56,7 +56,6 @@ mod llm; ...@@ -56,7 +56,6 @@ mod llm;
mod parsers; mod parsers;
mod planner; mod planner;
mod prometheus_metrics; mod prometheus_metrics;
mod prometheus_names;
type JsonServerStreamingIngress = type JsonServerStreamingIngress =
Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>; Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>;
...@@ -185,7 +184,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { ...@@ -185,7 +184,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
engine::add_to_module(m)?; engine::add_to_module(m)?;
parsers::add_to_module(m)?; parsers::add_to_module(m)?;
prometheus_names::add_to_module(m)?;
m.add_class::<prometheus_metrics::PyRuntimeMetrics>()?; m.add_class::<prometheus_metrics::PyRuntimeMetrics>()?;
let prometheus_metrics = PyModule::new(m.py(), "prometheus_metrics")?; let prometheus_metrics = PyModule::new(m.py(), "prometheus_metrics")?;
......
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Python bindings for Prometheus metric name constants
//!
//! ⚠️ **CRITICAL: SYNC WITH RUST SOURCE AND PYTHON TYPE STUBS** ⚠️
//! This file exposes constants from `lib/runtime/src/metrics/prometheus_names.rs` to Python.
//! When the source file is modified, you MUST update BOTH files to match:
//!
//! 1. **This Rust file** - Update the actual Python bindings implementation
//! 2. **Python type stubs** - Update `lib/bindings/python/src/dynamo/_core.pyi`
//! The .pyi file provides type hints for IDEs and static type checkers.
//! Without updating it, IDEs won't recognize new classes/methods for autocomplete.
//!
//! The constants here should mirror the structure and values from the Rust source.
//! Any changes to metric names in the source must be reflected here immediately.
//!
//! Files to sync:
//! - Source: `lib/runtime/src/metrics/prometheus_names.rs`
//! - This file: `lib/bindings/python/rust/prometheus_names.rs`
//! - Type stubs: `lib/bindings/python/src/dynamo/_core.pyi`
//!
//! ## Python Usage Example
//!
//! ```python
//! from dynamo._core import prometheus_names
//!
//! # Access metrics directly (no constructor call needed!)
//! frontend = prometheus_names.frontend
//! print(frontend.requests_total) # "dynamo_frontend_requests_total"
//! print(frontend.queued_requests) # "dynamo_frontend_queued_requests"
//! print(frontend.inflight_requests) # "dynamo_frontend_inflight_requests"
//! print(frontend.disconnected_clients) # "dynamo_frontend_disconnected_clients"
//! print(frontend.request_duration_seconds) # "dynamo_frontend_request_duration_seconds"
//! print(frontend.input_sequence_tokens) # "dynamo_frontend_input_sequence_tokens"
//! print(frontend.output_sequence_tokens) # "dynamo_frontend_output_sequence_tokens"
//! print(frontend.time_to_first_token_seconds) # "dynamo_frontend_time_to_first_token_seconds"
//! print(frontend.inter_token_latency_seconds) # "dynamo_frontend_inter_token_latency_seconds"
//! print(frontend.model_context_length) # "dynamo_frontend_model_context_length"
//! print(frontend.model_kv_cache_block_size) # "dynamo_frontend_model_kv_cache_block_size"
//! print(frontend.model_migration_limit) # "dynamo_frontend_model_migration_limit"
//!
//! work_handler = prometheus_names.work_handler
//! print(work_handler.requests_total) # "dynamo_component_requests_total"
//! print(work_handler.request_bytes_total) # "dynamo_component_request_bytes_total"
//! print(work_handler.response_bytes_total) # "dynamo_component_response_bytes_total"
//! print(work_handler.inflight_requests) # "dynamo_component_inflight_requests"
//! print(work_handler.request_duration_seconds) # "dynamo_component_request_duration_seconds"
//! print(work_handler.errors_total) # "dynamo_component_errors_total"
//!
//! kvstats = prometheus_names.kvstats
//! print(kvstats.active_blocks) # "kvstats_active_blocks"
//! print(kvstats.total_blocks) # "kvstats_total_blocks"
//! print(kvstats.gpu_cache_usage_percent) # "kvstats_gpu_cache_usage_percent"
//! print(kvstats.gpu_prefix_cache_hit_rate) # "kvstats_gpu_prefix_cache_hit_rate"
//!
//! # Use in Prometheus queries
//! query = f"rate({frontend.requests_total}[5m])"
//! pattern = rf'{work_handler.requests_total}\{{[^}}]*model="[^"]*"[^}}]*\}}'
//! ```
use dynamo_runtime::metrics::prometheus_names::*;
use pyo3::prelude::*;
/// Main container for all Prometheus metric name constants
#[pyclass]
pub struct PrometheusNames;
#[pymethods]
impl PrometheusNames {
/// Frontend service metrics
#[getter]
fn frontend(&self) -> FrontendService {
FrontendService
}
/// Work handler metrics
#[getter]
fn work_handler(&self) -> WorkHandler {
WorkHandler
}
/// KV stats metrics
#[getter]
fn kvstats(&self) -> KvStatsMetrics {
KvStatsMetrics
}
}
/// Frontend service metrics (LLM HTTP service)
/// These methods return the full metric names with the "dynamo_frontend_" prefix
///
/// Note: We use instance methods instead of static methods for better Python ergonomics
/// - The `concat!` macro only accepts string literals, not const references
/// - We need to combine `name_prefix::FRONTEND` + `frontend_service::*` constants at runtime
/// - This ensures we use actual Rust constants rather than hardcoded literals
#[pyclass]
pub struct FrontendService;
#[pymethods]
impl FrontendService {
/// Total number of LLM requests processed
#[getter]
fn requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::REQUESTS_TOTAL
)
}
/// Number of requests waiting in HTTP queue before receiving the first response
#[getter]
fn queued_requests(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::QUEUED_REQUESTS
)
}
/// Number of inflight requests going to the engine (vLLM, SGLang, ...)
#[getter]
fn inflight_requests(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INFLIGHT_REQUESTS
)
}
/// Duration of LLM requests
#[getter]
fn request_duration_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::REQUEST_DURATION_SECONDS
)
}
/// Input sequence length in tokens
#[getter]
fn input_sequence_tokens(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INPUT_SEQUENCE_TOKENS
)
}
/// Output sequence length in tokens
#[getter]
fn output_sequence_tokens(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::OUTPUT_SEQUENCE_TOKENS
)
}
/// Time to first token in seconds
#[getter]
fn time_to_first_token_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::TIME_TO_FIRST_TOKEN_SECONDS
)
}
/// Inter-token latency in seconds
#[getter]
fn inter_token_latency_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::INTER_TOKEN_LATENCY_SECONDS
)
}
/// Number of disconnected clients
#[getter]
fn disconnected_clients(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::DISCONNECTED_CLIENTS
)
}
/// Model total KV blocks
#[getter]
fn model_total_kv_blocks(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_TOTAL_KV_BLOCKS
)
}
/// Model max number of sequences
#[getter]
fn model_max_num_seqs(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_MAX_NUM_SEQS
)
}
/// Model max number of batched tokens
#[getter]
fn model_max_num_batched_tokens(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_MAX_NUM_BATCHED_TOKENS
)
}
/// Model context length
#[getter]
fn model_context_length(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_CONTEXT_LENGTH
)
}
/// Model KV cache block size
#[getter]
fn model_kv_cache_block_size(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_KV_CACHE_BLOCK_SIZE
)
}
/// Model migration limit
#[getter]
fn model_migration_limit(&self) -> String {
format!(
"{}_{}",
name_prefix::FRONTEND,
frontend_service::MODEL_MIGRATION_LIMIT
)
}
}
/// Work handler metrics (component request processing)
/// These methods return the full metric names with the "dynamo_component_" prefix
#[pyclass]
pub struct WorkHandler;
#[pymethods]
impl WorkHandler {
/// Total number of requests processed by work handler
#[getter]
fn requests_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUESTS_TOTAL
)
}
/// Total number of bytes received in requests by work handler
#[getter]
fn request_bytes_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUEST_BYTES_TOTAL
)
}
/// Total number of bytes sent in responses by work handler
#[getter]
fn response_bytes_total(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::RESPONSE_BYTES_TOTAL
)
}
/// Number of requests currently being processed by work handler
#[getter]
fn inflight_requests(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::INFLIGHT_REQUESTS
)
}
/// Time spent processing requests by work handler (histogram)
#[getter]
fn request_duration_seconds(&self) -> String {
format!(
"{}_{}",
name_prefix::COMPONENT,
work_handler::REQUEST_DURATION_SECONDS
)
}
/// Total number of errors in work handler processing
#[getter]
fn errors_total(&self) -> String {
format!("{}_{}", name_prefix::COMPONENT, work_handler::ERRORS_TOTAL)
}
}
/// KV stats metrics (KV cache statistics)
/// These methods return the metric names with the "kvstats_" prefix
#[pyclass]
pub struct KvStatsMetrics;
#[pymethods]
impl KvStatsMetrics {
/// Number of active KV cache blocks currently in use
#[getter]
fn active_blocks(&self) -> String {
kvstats::ACTIVE_BLOCKS.to_string()
}
/// Total number of KV cache blocks available
#[getter]
fn total_blocks(&self) -> String {
kvstats::TOTAL_BLOCKS.to_string()
}
/// GPU cache usage as a percentage (0.0-1.0)
#[getter]
fn gpu_cache_usage_percent(&self) -> String {
kvstats::GPU_CACHE_USAGE_PERCENT.to_string()
}
/// GPU prefix cache hit rate as a percentage (0.0-1.0)
#[getter]
fn gpu_prefix_cache_hit_rate(&self) -> String {
kvstats::GPU_PREFIX_CACHE_HIT_RATE.to_string()
}
}
/// Add prometheus_names module to the Python bindings
pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<PrometheusNames>()?;
m.add_class::<FrontendService>()?;
m.add_class::<WorkHandler>()?;
m.add_class::<KvStatsMetrics>()?;
// Add a module-level singleton instance for convenience
let prometheus_names_instance = PrometheusNames;
m.add("prometheus_names", prometheus_names_instance)?;
Ok(())
}
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Python type stubs for Prometheus metric name constants
⚠️ **CRITICAL: SYNC WITH RUST SOURCE** ⚠️
This file must stay in sync with:
- Source: `lib/runtime/src/metrics/prometheus_names.rs`
- Bindings: `lib/bindings/python/rust/prometheus_names.rs`
When the Rust source is modified, update all three files immediately.
"""
class PrometheusNames:
"""
Main container for all Prometheus metric name constants
"""
@property
def frontend(self) -> FrontendService:
"""
Frontend service metrics
"""
...
@property
def work_handler(self) -> WorkHandler:
"""
Work handler metrics
"""
...
@property
def kvstats(self) -> KvStatsMetrics:
"""
KV stats metrics
"""
...
class FrontendService:
"""
Frontend service metrics (LLM HTTP service)
These methods return the full metric names with the "dynamo_frontend_" prefix
"""
@property
def requests_total(self) -> str:
"""
Total number of LLM requests processed
"""
...
@property
def queued_requests(self) -> str:
"""
Number of requests waiting in HTTP queue before receiving the first response
"""
...
@property
def inflight_requests(self) -> str:
"""
Number of inflight requests going to the engine (vLLM, SGLang, ...)
"""
...
@property
def request_duration_seconds(self) -> str:
"""
Duration of LLM requests
"""
...
@property
def input_sequence_tokens(self) -> str:
"""
Input sequence length in tokens
"""
...
@property
def output_sequence_tokens(self) -> str:
"""
Output sequence length in tokens
"""
...
@property
def time_to_first_token_seconds(self) -> str:
"""
Time to first token in seconds
"""
...
@property
def inter_token_latency_seconds(self) -> str:
"""
Inter-token latency in seconds
"""
...
@property
def disconnected_clients(self) -> str:
"""
Number of disconnected clients
"""
...
@property
def model_total_kv_blocks(self) -> str:
"""
Model total KV blocks
"""
...
@property
def model_max_num_seqs(self) -> str:
"""
Model max number of sequences
"""
...
@property
def model_max_num_batched_tokens(self) -> str:
"""
Model max number of batched tokens
"""
...
@property
def model_context_length(self) -> str:
"""
Model context length
"""
...
@property
def model_kv_cache_block_size(self) -> str:
"""
Model KV cache block size
"""
...
@property
def model_migration_limit(self) -> str:
"""
Model migration limit
"""
...
class WorkHandler:
"""
Work handler metrics (component request processing)
These methods return the full metric names with the "dynamo_component_" prefix
"""
@property
def requests_total(self) -> str:
"""
Total number of requests processed by work handler
"""
...
@property
def request_bytes_total(self) -> str:
"""
Total number of bytes received in requests by work handler
"""
...
@property
def response_bytes_total(self) -> str:
"""
Total number of bytes sent in responses by work handler
"""
...
@property
def inflight_requests(self) -> str:
"""
Number of requests currently being processed by work handler
"""
...
@property
def request_duration_seconds(self) -> str:
"""
Time spent processing requests by work handler (histogram)
"""
...
@property
def errors_total(self) -> str:
"""
Total number of errors in work handler processing
"""
...
class KvStatsMetrics:
"""
KV stats metrics (KV cache statistics)
These methods return the metric names with the "kvstats_" prefix
"""
@property
def active_blocks(self) -> str:
"""
Number of active KV cache blocks currently in use
"""
...
@property
def total_blocks(self) -> str:
"""
Total number of KV cache blocks available
"""
...
@property
def gpu_cache_usage_percent(self) -> str:
"""
GPU cache usage as a percentage (0.0-1.0)
"""
...
@property
def gpu_prefix_cache_hit_rate(self) -> str:
"""
GPU prefix cache hit rate as a percentage (0.0-1.0)
"""
...
# Module-level singleton instance for convenient access
prometheus_names: PrometheusNames
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""
Python constants for Prometheus metric names
AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs
DO NOT EDIT THIS FILE MANUALLY
To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs:
cargo run -p dynamo-codegen --bin gen-python-prometheus-names
This module provides pure Python access to Prometheus metric name constants
without requiring Rust bindings.
Usage (both patterns supported):
# Pattern 1: Import module
from dynamo import prometheus_names
print(prometheus_names.frontend_service.REQUESTS_TOTAL) # "requests_total"
print(prometheus_names.kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks"
# Pattern 2: Import specific classes
from dynamo.prometheus_names import frontend_service, kvstats
print(frontend_service.REQUESTS_TOTAL) # "requests_total"
print(kvstats.ACTIVE_BLOCKS) # "kvstats_active_blocks"
"""
from __future__ import annotations
class distributed_runtime:
"""DistributedRuntime core metrics"""
# Total uptime of the DistributedRuntime in seconds
UPTIME_SECONDS = "uptime_seconds"
class frontend_service:
"""Frontend service metrics (LLM HTTP service)"""
# Environment variable that overrides the default metric prefix
METRICS_PREFIX_ENV = "DYN_METRICS_PREFIX"
# Total number of LLM requests processed
REQUESTS_TOTAL = "requests_total"
# Number of requests waiting in HTTP queue before receiving the first response (gauge)
QUEUED_REQUESTS = "queued_requests"
# Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
# Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
INFLIGHT_REQUESTS = "inflight_requests"
# Number of disconnected clients (gauge that can go up and down)
DISCONNECTED_CLIENTS = "disconnected_clients"
# Duration of LLM requests
REQUEST_DURATION_SECONDS = "request_duration_seconds"
# Input sequence length in tokens
INPUT_SEQUENCE_TOKENS = "input_sequence_tokens"
# Output sequence length in tokens
OUTPUT_SEQUENCE_TOKENS = "output_sequence_tokens"
# Time to first token in seconds
TIME_TO_FIRST_TOKEN_SECONDS = "time_to_first_token_seconds"
# Inter-token latency in seconds
INTER_TOKEN_LATENCY_SECONDS = "inter_token_latency_seconds"
# Model configuration metrics
# Runtime config metrics (from ModelRuntimeConfig):
# Total KV blocks available for a worker serving the model
MODEL_TOTAL_KV_BLOCKS = "model_total_kv_blocks"
# Maximum number of sequences for a worker serving the model (runtime config)
MODEL_MAX_NUM_SEQS = "model_max_num_seqs"
# Maximum number of batched tokens for a worker serving the model (runtime config)
MODEL_MAX_NUM_BATCHED_TOKENS = "model_max_num_batched_tokens"
# MDC metrics (from ModelDeploymentCard):
# Maximum context length for a worker serving the model (MDC)
MODEL_CONTEXT_LENGTH = "model_context_length"
# KV cache block size for a worker serving the model (MDC)
MODEL_KV_CACHE_BLOCK_SIZE = "model_kv_cache_block_size"
# Request migration limit for a worker serving the model (MDC)
MODEL_MIGRATION_LIMIT = "model_migration_limit"
class kvbm_connector:
"""KVBM connector"""
# KVBM connector leader
KVBM_CONNECTOR_LEADER = "kvbm_connector_leader"
# KVBM connector worker
KVBM_CONNECTOR_WORKER = "kvbm_connector_worker"
class kvrouter:
# Number of KV cache events applied to the index (including status)
KV_CACHE_EVENTS_APPLIED = "kv_cache_events_applied"
class kvstats:
"""KvStats metrics from LLM workers"""
# Prefix for all KvStats metrics
PREFIX = ""
# Number of active KV cache blocks currently in use
ACTIVE_BLOCKS = "kvstats_active_blocks"
# Total number of KV cache blocks available
TOTAL_BLOCKS = "kvstats_total_blocks"
# GPU cache usage as a percentage (0.0-1.0)
GPU_CACHE_USAGE_PERCENT = "kvstats_gpu_cache_usage_percent"
# GPU prefix cache hit rate as a percentage (0.0-1.0)
GPU_PREFIX_CACHE_HIT_RATE = "kvstats_gpu_prefix_cache_hit_rate"
class labels:
"""Automatically inserted Prometheus label names used across the metrics system"""
# Label for component identification
COMPONENT = "dynamo_component"
# Label for namespace identification
NAMESPACE = "dynamo_namespace"
# Label for endpoint identification
ENDPOINT = "dynamo_endpoint"
class name_prefix:
"""Metric name prefixes used across the metrics system"""
# Prefix for all Prometheus metric names.
COMPONENT = "dynamo_component"
# Prefix for frontend service metrics
FRONTEND = "dynamo_frontend"
class nats_client:
"""NATS client metrics. DistributedRuntime contains a NATS client shared by all children)"""
# Prefix for all NATS client metrics
PREFIX = ""
# Total number of bytes received by NATS client
IN_TOTAL_BYTES = "nats_client_in_total_bytes"
# Total number of bytes sent by NATS client
OUT_OVERHEAD_BYTES = "nats_client_out_overhead_bytes"
# Total number of messages received by NATS client
IN_MESSAGES = "nats_client_in_messages"
# Total number of messages sent by NATS client
OUT_MESSAGES = "nats_client_out_messages"
# Current number of active connections for NATS client
# Note: Gauge metric measuring current connections, not cumulative total
CURRENT_CONNECTIONS = "nats_client_current_connections"
# Current connection state of NATS client (0=disconnected, 1=connected, 2=reconnecting)
CONNECTION_STATE = "nats_client_connection_state"
class nats_service:
"""NATS service metrics, from the $SRV.STATS.<service_name> requests on NATS server"""
# Prefix for all NATS service metrics
PREFIX = ""
# Average processing time in milliseconds (maps to: average_processing_time in ms)
PROCESSING_MS_AVG = "nats_service_processing_ms_avg"
# Total errors across all endpoints (maps to: num_errors)
ERRORS_TOTAL = "nats_service_errors_total"
# Total requests across all endpoints (maps to: num_requests)
REQUESTS_TOTAL = "nats_service_requests_total"
# Total processing time in milliseconds (maps to: processing_time in ms)
PROCESSING_MS_TOTAL = "nats_service_processing_ms_total"
# Number of active services (derived from ServiceSet.services)
ACTIVE_SERVICES = "nats_service_active_services"
# Number of active endpoints (derived from ServiceInfo.endpoints)
ACTIVE_ENDPOINTS = "nats_service_active_endpoints"
class task_tracker:
"""Task tracker Prometheus metric name suffixes"""
# Total number of tasks issued/submitted
TASKS_ISSUED_TOTAL = "tasks_issued_total"
# Total number of tasks started
TASKS_STARTED_TOTAL = "tasks_started_total"
# Total number of successfully completed tasks
TASKS_SUCCESS_TOTAL = "tasks_success_total"
# Total number of cancelled tasks
TASKS_CANCELLED_TOTAL = "tasks_cancelled_total"
# Total number of failed tasks
TASKS_FAILED_TOTAL = "tasks_failed_total"
# Total number of rejected tasks
TASKS_REJECTED_TOTAL = "tasks_rejected_total"
class work_handler:
"""Work handler Prometheus metric names"""
# Total number of requests processed by work handler
REQUESTS_TOTAL = "requests_total"
# Total number of bytes received in requests by work handler
REQUEST_BYTES_TOTAL = "request_bytes_total"
# Total number of bytes sent in responses by work handler
RESPONSE_BYTES_TOTAL = "response_bytes_total"
# Number of requests currently being processed by work handler
# Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
INFLIGHT_REQUESTS = "inflight_requests"
# Time spent processing requests by work handler (histogram)
REQUEST_DURATION_SECONDS = "request_duration_seconds"
# Total number of errors in work handler processing
ERRORS_TOTAL = "errors_total"
# Label name for error type classification
ERROR_TYPE_LABEL = "error_type"
...@@ -6,12 +6,12 @@ ...@@ -6,12 +6,12 @@
//! This module provides centralized Prometheus metric name constants and sanitization functions //! This module provides centralized Prometheus metric name constants and sanitization functions
//! for various components to ensure consistency and avoid duplication across the codebase. //! for various components to ensure consistency and avoid duplication across the codebase.
//! //!
//! ⚠️ **CRITICAL: SYNC WITH PYTHON BINDINGS** ⚠️ //! ⚠️ **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️
//! When modifying constants in this file, you MUST also update: //! When modifying constants in this file, regenerate the Python module:
//! `lib/bindings/python/rust/prometheus_names.rs` //! cargo run -p dynamo-codegen --bin gen-python-prometheus-names
//! //!
//! The Python bindings expose these constants to Python code and must stay in sync. //! This generates `lib/bindings/python/src/dynamo/prometheus_names.py`
//! Any changes here should be reflected in the Python bindings immediately. //! with pure Python constants (no Rust bindings needed).
//! //!
//! ## Naming Conventions //! ## Naming Conventions
//! //!
...@@ -84,8 +84,7 @@ pub mod labels { ...@@ -84,8 +84,7 @@ pub mod labels {
/// Frontend service metrics (LLM HTTP service) /// Frontend service metrics (LLM HTTP service)
/// ///
/// ⚠️ SYNC ALERT: These constants are exposed to Python via: /// ⚠️ Python codegen: Run gen-python-prometheus-names after changes
/// `lib/bindings/python/rust/prometheus_names.rs` - FrontendService class
pub mod frontend_service { pub mod frontend_service {
// TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs
// for centralized environment variable constant management across the codebase // for centralized environment variable constant management across the codebase
......
...@@ -20,7 +20,7 @@ from copy import deepcopy ...@@ -20,7 +20,7 @@ from copy import deepcopy
from dataclasses import dataclass from dataclasses import dataclass
from typing import Any, Dict, List from typing import Any, Dict, List
from dynamo._core import prometheus_names from dynamo import prometheus_names
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -206,7 +206,7 @@ class MetricsPayload(BasePayload): ...@@ -206,7 +206,7 @@ class MetricsPayload(BasePayload):
return response.text return response.text
def validate(self, response: Any, content: str) -> None: def validate(self, response: Any, content: str) -> None:
requests_total_name = prometheus_names.work_handler.requests_total requests_total_name = prometheus_names.work_handler.REQUESTS_TOTAL
pattern = ( pattern = (
rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)' rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)'
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment