feat: add Rust-to-Python const code generator for prometheus_names.py (#3425)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>

feat: add Rust-to-Python const code generator for prometheus_names.py (#3425)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
f2ba58e5 · Keiven C · GitHub · ca674098 · f2ba58e5 · f2ba58e5
Unverified Commit f2ba58e5 authored Oct 10, 2025 by Keiven C Committed by GitHub Oct 10, 2025
15 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2091,6 +2091,28 @@ dependencies = [
 "uuid 1.18.1",
 ]
+[[package]]
+name = "dynamo-codegen"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.106",
+]
+[[package]]
+name = "dynamo-engine-llamacpp"
+version = "0.5.1"
+dependencies = [
+ "async-stream",
+ "dynamo-llm",
+ "dynamo-runtime",
+ "llama-cpp-2",
+ "tokio",
+ "tracing",
+]
 [[package]]
 name = "dynamo-engine-mistralrs"
 version = "0.5.1"

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,7 @@ members = [
    "lib/async-openai",
    "lib/parsers",
    "lib/bindings/c",
+    "lib/bindings/python/codegen",
    "lib/engines/*",
 ]
 # Exclude certain packages that are slow to build and we don't ship as flagship

--- a/components/src/dynamo/planner/utils/prometheus.py
+++ b/components/src/dynamo/planner/utils/prometheus.py
@@ -19,7 +19,7 @@ import typing
 from prometheus_api_client import PrometheusConnect
 from pydantic import BaseModel, ValidationError
-from dynamo._core import prometheus_names
+from dynamo import prometheus_names
 from dynamo.runtime.logging import configure_dynamo_logging
 configure_dynamo_logging()
@@ -94,7 +94,7 @@ class PrometheusAPIClient:
    def get_avg_inter_token_latency(self, interval: str, model_name: str):
        return self._get_average_metric(
-            prometheus_names.frontend.inter_token_latency_seconds,
+            prometheus_names.frontend_service.INTER_TOKEN_LATENCY_SECONDS,
            interval,
            "avg inter token latency",
            model_name,
@@ -102,7 +102,7 @@ class PrometheusAPIClient:
    def get_avg_time_to_first_token(self, interval: str, model_name: str):
        return self._get_average_metric(
-            prometheus_names.frontend.time_to_first_token_seconds,
+            prometheus_names.frontend_service.TIME_TO_FIRST_TOKEN_SECONDS,
            interval,
            "avg time to first token",
            model_name,
@@ -110,7 +110,7 @@ class PrometheusAPIClient:
    def get_avg_request_duration(self, interval: str, model_name: str):
        return self._get_average_metric(
-            prometheus_names.frontend.request_duration_seconds,
+            prometheus_names.frontend_service.REQUEST_DURATION_SECONDS,
            interval,
            "avg request duration",
            model_name,
@@ -119,7 +119,7 @@ class PrometheusAPIClient:
    def get_avg_request_count(self, interval: str, model_name: str):
        # This function follows a different query pattern than the other metrics
        try:
-            requests_total_metric = prometheus_names.frontend.requests_total
+            requests_total_metric = prometheus_names.frontend_service.REQUESTS_TOTAL
            raw_res = self.prom.custom_query(
                query=f"increase({requests_total_metric}[{interval}])"
            )
@@ -138,7 +138,7 @@ class PrometheusAPIClient:
    def get_avg_input_sequence_tokens(self, interval: str, model_name: str):
        return self._get_average_metric(
-            prometheus_names.frontend.input_sequence_tokens,
+            prometheus_names.frontend_service.INPUT_SEQUENCE_TOKENS,
            interval,
            "avg input sequence tokens",
            model_name,
@@ -146,7 +146,7 @@ class PrometheusAPIClient:
    def get_avg_output_sequence_tokens(self, interval: str, model_name: str):
        return self._get_average_metric(
-            prometheus_names.frontend.output_sequence_tokens,
+            prometheus_names.frontend_service.OUTPUT_SEQUENCE_TOKENS,
            interval,
            "avg output sequence tokens",
            model_name,

--- a/lib/bindings/python/codegen/Cargo.toml
+++ b/lib/bindings/python/codegen/Cargo.toml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+[package]
+name = "dynamo-codegen"
+version = "0.1.0"
+edition = "2021"
+license = "Apache-2.0"
+[dependencies]
+syn = { version = "2.0", features = ["full", "extra-traits"] }
+quote = "1.0"
+proc-macro2 = "1.0"
+anyhow = "1.0"
+[[bin]]
+name = "gen-python-prometheus-names"
+path = "src/gen_python_prometheus_names.rs"
--- a/lib/bindings/python/codegen/README.md
+++ b/lib/bindings/python/codegen/README.md
+# Dynamo Codegen
+Python code generator for Dynamo Python bindings.
+## gen-python-prometheus-names
+Generates `prometheus_names.py` from Rust source `lib/runtime/src/metrics/prometheus_names.rs`.
+### Usage
+```bash
+cargo run -p dynamo-codegen --bin gen-python-prometheus-names
+```
+### What it does
+- Parses Rust AST from `lib/runtime/src/metrics/prometheus_names.rs`
+- Generates Python classes with constants at `lib/bindings/python/src/dynamo/prometheus_names.py`
+- Handles macro-generated constants (e.g., `kvstats_name!("active_blocks")` → `"kvstats_active_blocks"`)
+### Example
+**Rust input:**
+```rust
+pub mod kvstats {
+    pub const ACTIVE_BLOCKS: &str = kvstats_name!("active_blocks");
+}
+```
+**Python output:**
+```python
+class kvstats:
+    ACTIVE_BLOCKS = "kvstats_active_blocks"
+```
+### When to run
+Run after modifying `lib/runtime/src/metrics/prometheus_names.rs` to regenerate the Python file.
--- a/lib/bindings/python/codegen/src/gen_python_prometheus_names.rs
+++ b/lib/bindings/python/codegen/src/gen_python_prometheus_names.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Binary to generate Python prometheus_names from Rust source
+use anyhow::{Context, Result};
+use dynamo_codegen::prometheus_parser::{ModuleDef, PrometheusParser};
+use std::collections::HashMap;
+use std::path::PathBuf;
+/// Generates Python module code from parsed Rust prometheus_names modules.
+/// Converts Rust const declarations into Python class attributes with deterministic ordering.
+struct PythonGenerator<'a> {
+    modules: &'a HashMap<String, ModuleDef>,
+}
+impl<'a> PythonGenerator<'a> {
+    fn new(parser: &'a PrometheusParser) -> Self {
+        Self {
+            modules: &parser.modules,
+        }
+    }
+    fn load_template(template_name: &str) -> String {
+        let template_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
+            .join("templates")
+            .join(template_name);
+        std::fs::read_to_string(&template_path)
+            .unwrap_or_else(|_| panic!("Failed to read template: {}", template_path.display()))
+    }
+    fn generate_python_file(&self) -> String {
+        let mut output = Self::load_template("prometheus_names.py.template");
+        // Append generated classes
+        output.push_str(&self.generate_classes());
+        output
+    }
+    fn generate_classes(&self) -> String {
+        let mut lines = Vec::new();
+        // Sort module names to ensure deterministic output
+        let mut module_names: Vec<&String> = self.modules.keys().collect();
+        module_names.sort();
+        // Generate simple classes with constants as class attributes
+        for module_name in module_names {
+            let module = &self.modules[module_name];
+            lines.push(format!("class {}:", module_name));
+            // Use doc comment from module if available
+            if !module.doc_comment.is_empty() {
+                let first_line = module.doc_comment.lines().next().unwrap_or("").trim();
+                if !first_line.is_empty() {
+                    lines.push(format!("    \"\"\"{}\"\"\"", first_line));
+                }
+            }
+            lines.push("".to_string());
+            for constant in &module.constants {
+                if !constant.doc_comment.is_empty() {
+                    for comment_line in constant.doc_comment.lines() {
+                        lines.push(format!("    # {}", comment_line));
+                    }
+                }
+                lines.push(format!("    {} = \"{}\"", constant.name, constant.value));
+            }
+            lines.push("".to_string());
+        }
+        lines.join("\n")
+    }
+}
+fn main() -> Result<()> {
+    let args: Vec<String> = std::env::args().collect();
+    let mut source_path: Option<PathBuf> = None;
+    let mut output_path: Option<PathBuf> = None;
+    let mut i = 1;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--source" => {
+                i += 1;
+                if i < args.len() {
+                    source_path = Some(PathBuf::from(&args[i]));
+                }
+            }
+            "--output" => {
+                i += 1;
+                if i < args.len() {
+                    output_path = Some(PathBuf::from(&args[i]));
+                }
+            }
+            "--help" | "-h" => {
+                print_usage();
+                return Ok(());
+            }
+            _ => {
+                eprintln!("Unknown argument: {}", args[i]);
+                print_usage();
+                std::process::exit(1);
+            }
+        }
+        i += 1;
+    }
+    // Determine paths relative to codegen directory
+    let codegen_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+    let source = source_path.unwrap_or_else(|| {
+        // From: lib/bindings/python/codegen
+        // To:   lib/runtime/src/metrics/prometheus_names.rs
+        codegen_dir
+            .join("../../../runtime/src/metrics/prometheus_names.rs")
+            .canonicalize()
+            .expect("Failed to resolve source path")
+    });
+    let output = output_path.unwrap_or_else(|| {
+        // From: lib/bindings/python/codegen
+        // To:   lib/bindings/python/src/dynamo/prometheus_names.py
+        codegen_dir
+            .join("../src/dynamo/prometheus_names.py")
+            .canonicalize()
+            .unwrap_or_else(|_| {
+                // If file doesn't exist yet, resolve the parent directory
+                let dir = codegen_dir
+                    .join("../src/dynamo")
+                    .canonicalize()
+                    .expect("Failed to resolve output directory");
+                dir.join("prometheus_names.py")
+            })
+    });
+    println!("Generating Python prometheus_names from Rust source");
+    println!("Source: {}", source.display());
+    println!("Output: {}", output.display());
+    println!();
+    let content = std::fs::read_to_string(&source)
+        .with_context(|| format!("Failed to read source file: {}", source.display()))?;
+    println!("Parsing Rust AST...");
+    let parser = PrometheusParser::parse_file(&content)?;
+    println!("Found {} modules:", parser.modules.len());
+    let mut module_names: Vec<&String> = parser.modules.keys().collect();
+    module_names.sort();
+    for name in module_names.iter() {
+        let module = &parser.modules[name.as_str()];
+        println!(
+            "  - {}: {} constants{}",
+            name,
+            module.constants.len(),
+            if module.is_macro_generated {
+                " (macro-generated)"
+            } else {
+                ""
+            }
+        );
+    }
+    println!("\nGenerating Python prometheus_names module...");
+    let generator = PythonGenerator::new(&parser);
+    let python_code = generator.generate_python_file();
+    // Ensure output directory exists
+    if let Some(parent) = output.parent() {
+        std::fs::create_dir_all(parent)
+            .with_context(|| format!("Failed to create output directory: {}", parent.display()))?;
+    }
+    std::fs::write(&output, python_code)
+        .with_context(|| format!("Failed to write output file: {}", output.display()))?;
+    println!("✓ Generated Python prometheus_names: {}", output.display());
+    println!("\nSuccess! Python module ready for import.");
+    Ok(())
+}
+fn print_usage() {
+    println!(
+        r#"
+gen-python-prometheus-names - Generate Python prometheus_names from Rust source
+Usage: gen-python-prometheus-names [OPTIONS]
+Parses lib/runtime/src/metrics/prometheus_names.rs and generates a pure Python
+module with 1:1 constant mappings at lib/bindings/python/src/dynamo/prometheus_names.py
+This allows Python code to import Prometheus metric constants without Rust bindings:
+    from dynamo.prometheus_names import frontend_service, kvstats
+OPTIONS:
+    --source PATH    Path to Rust source file
+                     (default: lib/runtime/src/metrics/prometheus_names.rs)
+    --output PATH    Path to Python output file
+                     (default: lib/bindings/python/src/dynamo/prometheus_names.py)
+    --help, -h       Print this help message
+EXAMPLES:
+    # Generate with default paths
+    cargo run -p dynamo-codegen --bin gen-python-prometheus-names
+    # Generate with custom output
+    cargo run -p dynamo-codegen --bin gen-python-prometheus-names -- --output /tmp/test.py
+"#
+    );
+}
--- a/lib/bindings/python/codegen/src/lib.rs
+++ b/lib/bindings/python/codegen/src/lib.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Code generation utilities for Dynamo project
+//!
+//! This crate provides tools to generate code from Rust sources to other languages.
+pub mod prometheus_parser;
--- a/lib/bindings/python/codegen/src/prometheus_parser.rs
+++ b/lib/bindings/python/codegen/src/prometheus_parser.rs
+// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//! Parser for prometheus_names.rs to extract constants and modules
+use anyhow::{Context, Result};
+use std::collections::HashMap;
+use syn::{File, Item, ItemConst, ItemMacro, ItemMod};
+#[derive(Debug, Clone)]
+pub struct ConstantDef {
+    pub name: String,
+    pub value: String,
+    pub doc_comment: String,
+}
+#[derive(Debug, Clone)]
+pub struct ModuleDef {
+    pub name: String,
+    pub constants: Vec<ConstantDef>,
+    pub doc_comment: String,
+    pub is_macro_generated: bool,
+    pub macro_prefix: Option<String>,
+}
+pub struct PrometheusParser {
+    pub modules: HashMap<String, ModuleDef>,
+}
+impl PrometheusParser {
+    pub fn parse_file(content: &str) -> Result<Self> {
+        let ast: File = syn::parse_str(content).context("Failed to parse Rust file")?;
+        let mut modules = HashMap::new();
+        for item in ast.items {
+            if let Item::Mod(module) = item {
+                if let Some(parsed_module) = Self::parse_module(&module)? {
+                    modules.insert(parsed_module.name.clone(), parsed_module);
+                }
+            }
+        }
+        Ok(Self { modules })
+    }
+    fn parse_module(module: &ItemMod) -> Result<Option<ModuleDef>> {
+        // Only process public modules
+        if !matches!(module.vis, syn::Visibility::Public(_)) {
+            return Ok(None);
+        }
+        let module_name = module.ident.to_string();
+        let doc_comment = Self::extract_doc_comment(&module.attrs);
+        let (_, items) = match &module.content {
+            Some(content) => content,
+            None => return Ok(None),
+        };
+        let mut constants = Vec::new();
+        let mut is_macro_generated = false;
+        let mut macro_prefix = None;
+        for item in items {
+            match item {
+                Item::Const(const_item) => {
+                    if let Some(const_def) = Self::parse_const(const_item)? {
+                        constants.push(const_def);
+                    }
+                }
+                Item::Macro(macro_item) => {
+                    // Check if this is a macro_rules! that generates names with a prefix
+                    if let Some(prefix) = Self::extract_macro_prefix(macro_item) {
+                        is_macro_generated = true;
+                        macro_prefix = Some(prefix);
+                    }
+                }
+                _ => {}
+            }
+        }
+        // Apply macro prefix to constants if needed
+        if is_macro_generated && macro_prefix.is_some() {
+            let prefix = macro_prefix.as_ref().unwrap();
+            for constant in &mut constants {
+                // Only apply if the constant doesn't already have the prefix
+                if constant.name == "PREFIX" {
+                    // PREFIX constant should be just the prefix with trailing underscore
+                    continue;
+                }
+                // Check if value looks like it should have prefix applied
+                // (doesn't already start with the prefix)
+                if !constant.value.starts_with(prefix) {
+                    constant.value = format!("{}_{}", prefix, constant.value);
+                }
+            }
+        }
+        Ok(Some(ModuleDef {
+            name: module_name,
+            constants,
+            doc_comment,
+            is_macro_generated,
+            macro_prefix,
+        }))
+    }
+    fn parse_const(const_item: &ItemConst) -> Result<Option<ConstantDef>> {
+        // Only process public constants
+        if !matches!(const_item.vis, syn::Visibility::Public(_)) {
+            return Ok(None);
+        }
+        // Only process &str constants
+        let is_str_type = matches!(&*const_item.ty, syn::Type::Reference(type_ref)
+            if matches!(&*type_ref.elem, syn::Type::Path(path)
+                if path.path.segments.last().map(|s| s.ident == "str").unwrap_or(false)));
+        if !is_str_type {
+            return Ok(None);
+        }
+        let name = const_item.ident.to_string();
+        let doc_comment = Self::extract_doc_comment(&const_item.attrs);
+        // Extract the string value
+        let value = Self::extract_string_value(&const_item.expr)?;
+        Ok(Some(ConstantDef {
+            name,
+            value,
+            doc_comment,
+        }))
+    }
+    fn extract_string_value(expr: &syn::Expr) -> Result<String> {
+        match expr {
+            // Direct string literal: "value"
+            syn::Expr::Lit(lit_expr) => {
+                if let syn::Lit::Str(lit_str) = &lit_expr.lit {
+                    Ok(lit_str.value())
+                } else {
+                    anyhow::bail!("Expected string literal")
+                }
+            }
+            // Macro invocation: some_macro!("value")
+            syn::Expr::Macro(macro_expr) => {
+                // Try to extract the string from macro arguments
+                Self::extract_from_macro_tokens(&macro_expr.mac.tokens)
+            }
+            // Method call: "value".to_string()
+            syn::Expr::MethodCall(method_call) => Self::extract_string_value(&method_call.receiver),
+            _ => anyhow::bail!("Unsupported expression type for constant value"),
+        }
+    }
+    fn extract_from_macro_tokens(tokens: &proc_macro2::TokenStream) -> Result<String> {
+        // Parse the tokens to find string literals
+        let tokens_str = tokens.to_string();
+        // Look for string literals in the token stream
+        // This handles cases like: concat!("prefix_", "value")
+        let parts: Vec<&str> = tokens_str
+            .split('"')
+            .enumerate()
+            .filter(|(i, _)| i % 2 == 1)
+            .map(|(_, s)| s)
+            .collect();
+        if parts.is_empty() {
+            anyhow::bail!("No string literals found in macro");
+        }
+        // Concatenate all string parts (for concat! macro)
+        Ok(parts.join(""))
+    }
+    fn extract_macro_prefix(macro_item: &ItemMacro) -> Option<String> {
+        // Check if this is a macro_rules! with a name ending in "_name"
+        let macro_name = macro_item.ident.as_ref()?.to_string();
+        if !macro_name.ends_with("_name") {
+            return None;
+        }
+        // Try to extract the prefix from the macro body
+        // Looking for patterns like: concat!("prefix_", $name)
+        let tokens_str = macro_item.mac.tokens.to_string();
+        // Look for concat! with a string literal
+        // Pattern: concat ! ( "prefix_" , ...
+        if let Some(concat_start) = tokens_str.find("concat !") {
+            let after_concat = &tokens_str[concat_start..];
+            // Find the first string literal after concat!
+            if let Some(quote_start) = after_concat.find('"') {
+                let after_quote = &after_concat[quote_start + 1..];
+                if let Some(quote_end) = after_quote.find('"') {
+                    let prefix = &after_quote[..quote_end];
+                    // Remove trailing underscore if present
+                    return Some(prefix.trim_end_matches('_').to_string());
+                }
+            }
+        }
+        None
+    }
+    fn extract_doc_comment(attrs: &[syn::Attribute]) -> String {
+        let mut doc_lines = Vec::new();
+        for attr in attrs {
+            if attr.path().is_ident("doc") {
+                if let syn::Meta::NameValue(meta) = &attr.meta {
+                    if let syn::Expr::Lit(lit) = &meta.value {
+                        if let syn::Lit::Str(lit_str) = &lit.lit {
+                            let line = lit_str.value().trim().to_string();
+                            if !line.is_empty() {
+                                doc_lines.push(line);
+                            }
+                        }
+                    }
+                }
+            }
+        }
+        doc_lines.join("\n")
+    }
+}
--- a/lib/bindings/python/codegen/templates/prometheus_names.py.template
+++ b/lib/bindings/python/codegen/templates/prometheus_names.py.template
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Python constants for Prometheus metric names
+AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs
+DO NOT EDIT THIS FILE MANUALLY
+To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs:
+    cargo run -p dynamo-codegen --bin gen-python-prometheus-names
+This module provides pure Python access to Prometheus metric name constants
+without requiring Rust bindings.
+Usage (both patterns supported):
+    # Pattern 1: Import module
+    from dynamo import prometheus_names
+    print(prometheus_names.frontend_service.REQUESTS_TOTAL)  # "requests_total"
+    print(prometheus_names.kvstats.ACTIVE_BLOCKS)  # "kvstats_active_blocks"
+    # Pattern 2: Import specific classes
+    from dynamo.prometheus_names import frontend_service, kvstats
+    print(frontend_service.REQUESTS_TOTAL)  # "requests_total"
+    print(kvstats.ACTIVE_BLOCKS)  # "kvstats_active_blocks"
+"""
+from __future__ import annotations
--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -56,7 +56,6 @@ mod llm;
 mod parsers;
 mod planner;
 mod prometheus_metrics;
-mod prometheus_names;
 type JsonServerStreamingIngress =
    Ingress<SingleIn<serde_json::Value>, ManyOut<RsAnnotated<serde_json::Value>>>;
@@ -185,7 +184,6 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
    engine::add_to_module(m)?;
    parsers::add_to_module(m)?;
-    prometheus_names::add_to_module(m)?;
    m.add_class::<prometheus_metrics::PyRuntimeMetrics>()?;
    let prometheus_metrics = PyModule::new(m.py(), "prometheus_metrics")?;

--- a/lib/bindings/python/rust/prometheus_names.rs
+++ b/lib/bindings/python/rust/prometheus_names.rs
-// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//! Python bindings for Prometheus metric name constants
-//!
-//! ⚠️  **CRITICAL: SYNC WITH RUST SOURCE AND PYTHON TYPE STUBS** ⚠️
-//! This file exposes constants from `lib/runtime/src/metrics/prometheus_names.rs` to Python.
-//! When the source file is modified, you MUST update BOTH files to match:
-//!
-//! 1. **This Rust file** - Update the actual Python bindings implementation
-//! 2. **Python type stubs** - Update `lib/bindings/python/src/dynamo/_core.pyi`
-//!    The .pyi file provides type hints for IDEs and static type checkers.
-//!    Without updating it, IDEs won't recognize new classes/methods for autocomplete.
-//!
-//! The constants here should mirror the structure and values from the Rust source.
-//! Any changes to metric names in the source must be reflected here immediately.
-//!
-//! Files to sync:
-//! - Source:      `lib/runtime/src/metrics/prometheus_names.rs`
-//! - This file:   `lib/bindings/python/rust/prometheus_names.rs`
-//! - Type stubs:  `lib/bindings/python/src/dynamo/_core.pyi`
-//!
-//! ## Python Usage Example
-//!
-//! ```python
-//! from dynamo._core import prometheus_names
-//!
-//! # Access metrics directly (no constructor call needed!)
-//! frontend = prometheus_names.frontend
-//! print(frontend.requests_total)           # "dynamo_frontend_requests_total"
-//! print(frontend.queued_requests)          # "dynamo_frontend_queued_requests"
-//! print(frontend.inflight_requests)        # "dynamo_frontend_inflight_requests"
-//! print(frontend.disconnected_clients)     # "dynamo_frontend_disconnected_clients"
-//! print(frontend.request_duration_seconds) # "dynamo_frontend_request_duration_seconds"
-//! print(frontend.input_sequence_tokens)    # "dynamo_frontend_input_sequence_tokens"
-//! print(frontend.output_sequence_tokens)   # "dynamo_frontend_output_sequence_tokens"
-//! print(frontend.time_to_first_token_seconds) # "dynamo_frontend_time_to_first_token_seconds"
-//! print(frontend.inter_token_latency_seconds) # "dynamo_frontend_inter_token_latency_seconds"
-//! print(frontend.model_context_length)     # "dynamo_frontend_model_context_length"
-//! print(frontend.model_kv_cache_block_size) # "dynamo_frontend_model_kv_cache_block_size"
-//! print(frontend.model_migration_limit)    # "dynamo_frontend_model_migration_limit"
-//!
-//! work_handler = prometheus_names.work_handler
-//! print(work_handler.requests_total)       # "dynamo_component_requests_total"
-//! print(work_handler.request_bytes_total)  # "dynamo_component_request_bytes_total"
-//! print(work_handler.response_bytes_total) # "dynamo_component_response_bytes_total"
-//! print(work_handler.inflight_requests)    # "dynamo_component_inflight_requests"
-//! print(work_handler.request_duration_seconds) # "dynamo_component_request_duration_seconds"
-//! print(work_handler.errors_total)         # "dynamo_component_errors_total"
-//!
-//! kvstats = prometheus_names.kvstats
-//! print(kvstats.active_blocks)             # "kvstats_active_blocks"
-//! print(kvstats.total_blocks)              # "kvstats_total_blocks"
-//! print(kvstats.gpu_cache_usage_percent)   # "kvstats_gpu_cache_usage_percent"
-//! print(kvstats.gpu_prefix_cache_hit_rate) # "kvstats_gpu_prefix_cache_hit_rate"
-//!
-//! # Use in Prometheus queries
-//! query = f"rate({frontend.requests_total}[5m])"
-//! pattern = rf'{work_handler.requests_total}\{{[^}}]*model="[^"]*"[^}}]*\}}'
-//! ```
-use dynamo_runtime::metrics::prometheus_names::*;
-use pyo3::prelude::*;
-/// Main container for all Prometheus metric name constants
-#[pyclass]
-pub struct PrometheusNames;
-#[pymethods]
-impl PrometheusNames {
-    /// Frontend service metrics
-    #[getter]
-    fn frontend(&self) -> FrontendService {
-        FrontendService
-    }
-    /// Work handler metrics
-    #[getter]
-    fn work_handler(&self) -> WorkHandler {
-        WorkHandler
-    }
-    /// KV stats metrics
-    #[getter]
-    fn kvstats(&self) -> KvStatsMetrics {
-        KvStatsMetrics
-    }
-}
-/// Frontend service metrics (LLM HTTP service)
-/// These methods return the full metric names with the "dynamo_frontend_" prefix
-///
-/// Note: We use instance methods instead of static methods for better Python ergonomics
-/// - The `concat!` macro only accepts string literals, not const references
-/// - We need to combine `name_prefix::FRONTEND` + `frontend_service::*` constants at runtime
-/// - This ensures we use actual Rust constants rather than hardcoded literals
-#[pyclass]
-pub struct FrontendService;
-#[pymethods]
-impl FrontendService {
-    /// Total number of LLM requests processed
-    #[getter]
-    fn requests_total(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::REQUESTS_TOTAL
-        )
-    }
-    /// Number of requests waiting in HTTP queue before receiving the first response
-    #[getter]
-    fn queued_requests(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::QUEUED_REQUESTS
-        )
-    }
-    /// Number of inflight requests going to the engine (vLLM, SGLang, ...)
-    #[getter]
-    fn inflight_requests(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::INFLIGHT_REQUESTS
-        )
-    }
-    /// Duration of LLM requests
-    #[getter]
-    fn request_duration_seconds(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::REQUEST_DURATION_SECONDS
-        )
-    }
-    /// Input sequence length in tokens
-    #[getter]
-    fn input_sequence_tokens(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::INPUT_SEQUENCE_TOKENS
-        )
-    }
-    /// Output sequence length in tokens
-    #[getter]
-    fn output_sequence_tokens(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::OUTPUT_SEQUENCE_TOKENS
-        )
-    }
-    /// Time to first token in seconds
-    #[getter]
-    fn time_to_first_token_seconds(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::TIME_TO_FIRST_TOKEN_SECONDS
-        )
-    }
-    /// Inter-token latency in seconds
-    #[getter]
-    fn inter_token_latency_seconds(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::INTER_TOKEN_LATENCY_SECONDS
-        )
-    }
-    /// Number of disconnected clients
-    #[getter]
-    fn disconnected_clients(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::DISCONNECTED_CLIENTS
-        )
-    }
-    /// Model total KV blocks
-    #[getter]
-    fn model_total_kv_blocks(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_TOTAL_KV_BLOCKS
-        )
-    }
-    /// Model max number of sequences
-    #[getter]
-    fn model_max_num_seqs(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_MAX_NUM_SEQS
-        )
-    }
-    /// Model max number of batched tokens
-    #[getter]
-    fn model_max_num_batched_tokens(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_MAX_NUM_BATCHED_TOKENS
-        )
-    }
-    /// Model context length
-    #[getter]
-    fn model_context_length(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_CONTEXT_LENGTH
-        )
-    }
-    /// Model KV cache block size
-    #[getter]
-    fn model_kv_cache_block_size(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_KV_CACHE_BLOCK_SIZE
-        )
-    }
-    /// Model migration limit
-    #[getter]
-    fn model_migration_limit(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::FRONTEND,
-            frontend_service::MODEL_MIGRATION_LIMIT
-        )
-    }
-}
-/// Work handler metrics (component request processing)
-/// These methods return the full metric names with the "dynamo_component_" prefix
-#[pyclass]
-pub struct WorkHandler;
-#[pymethods]
-impl WorkHandler {
-    /// Total number of requests processed by work handler
-    #[getter]
-    fn requests_total(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::COMPONENT,
-            work_handler::REQUESTS_TOTAL
-        )
-    }
-    /// Total number of bytes received in requests by work handler
-    #[getter]
-    fn request_bytes_total(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::COMPONENT,
-            work_handler::REQUEST_BYTES_TOTAL
-        )
-    }
-    /// Total number of bytes sent in responses by work handler
-    #[getter]
-    fn response_bytes_total(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::COMPONENT,
-            work_handler::RESPONSE_BYTES_TOTAL
-        )
-    }
-    /// Number of requests currently being processed by work handler
-    #[getter]
-    fn inflight_requests(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::COMPONENT,
-            work_handler::INFLIGHT_REQUESTS
-        )
-    }
-    /// Time spent processing requests by work handler (histogram)
-    #[getter]
-    fn request_duration_seconds(&self) -> String {
-        format!(
-            "{}_{}",
-            name_prefix::COMPONENT,
-            work_handler::REQUEST_DURATION_SECONDS
-        )
-    }
-    /// Total number of errors in work handler processing
-    #[getter]
-    fn errors_total(&self) -> String {
-        format!("{}_{}", name_prefix::COMPONENT, work_handler::ERRORS_TOTAL)
-    }
-}
-/// KV stats metrics (KV cache statistics)
-/// These methods return the metric names with the "kvstats_" prefix
-#[pyclass]
-pub struct KvStatsMetrics;
-#[pymethods]
-impl KvStatsMetrics {
-    /// Number of active KV cache blocks currently in use
-    #[getter]
-    fn active_blocks(&self) -> String {
-        kvstats::ACTIVE_BLOCKS.to_string()
-    }
-    /// Total number of KV cache blocks available
-    #[getter]
-    fn total_blocks(&self) -> String {
-        kvstats::TOTAL_BLOCKS.to_string()
-    }
-    /// GPU cache usage as a percentage (0.0-1.0)
-    #[getter]
-    fn gpu_cache_usage_percent(&self) -> String {
-        kvstats::GPU_CACHE_USAGE_PERCENT.to_string()
-    }
-    /// GPU prefix cache hit rate as a percentage (0.0-1.0)
-    #[getter]
-    fn gpu_prefix_cache_hit_rate(&self) -> String {
-        kvstats::GPU_PREFIX_CACHE_HIT_RATE.to_string()
-    }
-}
-/// Add prometheus_names module to the Python bindings
-pub fn add_to_module(m: &Bound<'_, PyModule>) -> PyResult<()> {
-    m.add_class::<PrometheusNames>()?;
-    m.add_class::<FrontendService>()?;
-    m.add_class::<WorkHandler>()?;
-    m.add_class::<KvStatsMetrics>()?;
-    // Add a module-level singleton instance for convenience
-    let prometheus_names_instance = PrometheusNames;
-    m.add("prometheus_names", prometheus_names_instance)?;
-    Ok(())
-}
--- a/lib/bindings/python/src/dynamo/_prometheus_names.pyi
+++ b/lib/bindings/python/src/dynamo/_prometheus_names.pyi
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-"""
-Python type stubs for Prometheus metric name constants
-⚠️  **CRITICAL: SYNC WITH RUST SOURCE** ⚠️
-This file must stay in sync with:
- Source: `lib/runtime/src/metrics/prometheus_names.rs`
- Bindings: `lib/bindings/python/rust/prometheus_names.rs`
-When the Rust source is modified, update all three files immediately.
-"""
-class PrometheusNames:
-    """
-    Main container for all Prometheus metric name constants
-    """
-    @property
-    def frontend(self) -> FrontendService:
-        """
-        Frontend service metrics
-        """
-        ...
-    @property
-    def work_handler(self) -> WorkHandler:
-        """
-        Work handler metrics
-        """
-        ...
-    @property
-    def kvstats(self) -> KvStatsMetrics:
-        """
-        KV stats metrics
-        """
-        ...
-class FrontendService:
-    """
-    Frontend service metrics (LLM HTTP service)
-    These methods return the full metric names with the "dynamo_frontend_" prefix
-    """
-    @property
-    def requests_total(self) -> str:
-        """
-        Total number of LLM requests processed
-        """
-        ...
-    @property
-    def queued_requests(self) -> str:
-        """
-        Number of requests waiting in HTTP queue before receiving the first response
-        """
-        ...
-    @property
-    def inflight_requests(self) -> str:
-        """
-        Number of inflight requests going to the engine (vLLM, SGLang, ...)
-        """
-        ...
-    @property
-    def request_duration_seconds(self) -> str:
-        """
-        Duration of LLM requests
-        """
-        ...
-    @property
-    def input_sequence_tokens(self) -> str:
-        """
-        Input sequence length in tokens
-        """
-        ...
-    @property
-    def output_sequence_tokens(self) -> str:
-        """
-        Output sequence length in tokens
-        """
-        ...
-    @property
-    def time_to_first_token_seconds(self) -> str:
-        """
-        Time to first token in seconds
-        """
-        ...
-    @property
-    def inter_token_latency_seconds(self) -> str:
-        """
-        Inter-token latency in seconds
-        """
-        ...
-    @property
-    def disconnected_clients(self) -> str:
-        """
-        Number of disconnected clients
-        """
-        ...
-    @property
-    def model_total_kv_blocks(self) -> str:
-        """
-        Model total KV blocks
-        """
-        ...
-    @property
-    def model_max_num_seqs(self) -> str:
-        """
-        Model max number of sequences
-        """
-        ...
-    @property
-    def model_max_num_batched_tokens(self) -> str:
-        """
-        Model max number of batched tokens
-        """
-        ...
-    @property
-    def model_context_length(self) -> str:
-        """
-        Model context length
-        """
-        ...
-    @property
-    def model_kv_cache_block_size(self) -> str:
-        """
-        Model KV cache block size
-        """
-        ...
-    @property
-    def model_migration_limit(self) -> str:
-        """
-        Model migration limit
-        """
-        ...
-class WorkHandler:
-    """
-    Work handler metrics (component request processing)
-    These methods return the full metric names with the "dynamo_component_" prefix
-    """
-    @property
-    def requests_total(self) -> str:
-        """
-        Total number of requests processed by work handler
-        """
-        ...
-    @property
-    def request_bytes_total(self) -> str:
-        """
-        Total number of bytes received in requests by work handler
-        """
-        ...
-    @property
-    def response_bytes_total(self) -> str:
-        """
-        Total number of bytes sent in responses by work handler
-        """
-        ...
-    @property
-    def inflight_requests(self) -> str:
-        """
-        Number of requests currently being processed by work handler
-        """
-        ...
-    @property
-    def request_duration_seconds(self) -> str:
-        """
-        Time spent processing requests by work handler (histogram)
-        """
-        ...
-    @property
-    def errors_total(self) -> str:
-        """
-        Total number of errors in work handler processing
-        """
-        ...
-class KvStatsMetrics:
-    """
-    KV stats metrics (KV cache statistics)
-    These methods return the metric names with the "kvstats_" prefix
-    """
-    @property
-    def active_blocks(self) -> str:
-        """
-        Number of active KV cache blocks currently in use
-        """
-        ...
-    @property
-    def total_blocks(self) -> str:
-        """
-        Total number of KV cache blocks available
-        """
-        ...
-    @property
-    def gpu_cache_usage_percent(self) -> str:
-        """
-        GPU cache usage as a percentage (0.0-1.0)
-        """
-        ...
-    @property
-    def gpu_prefix_cache_hit_rate(self) -> str:
-        """
-        GPU prefix cache hit rate as a percentage (0.0-1.0)
-        """
-        ...
-# Module-level singleton instance for convenient access
-prometheus_names: PrometheusNames
--- a/lib/bindings/python/src/dynamo/prometheus_names.py
+++ b/lib/bindings/python/src/dynamo/prometheus_names.py
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""
+Python constants for Prometheus metric names
+AUTO-GENERATED from lib/runtime/src/metrics/prometheus_names.rs
+DO NOT EDIT THIS FILE MANUALLY
+To regenerate this file after modifying lib/runtime/src/metrics/prometheus_names.rs:
+    cargo run -p dynamo-codegen --bin gen-python-prometheus-names
+This module provides pure Python access to Prometheus metric name constants
+without requiring Rust bindings.
+Usage (both patterns supported):
+    # Pattern 1: Import module
+    from dynamo import prometheus_names
+    print(prometheus_names.frontend_service.REQUESTS_TOTAL)  # "requests_total"
+    print(prometheus_names.kvstats.ACTIVE_BLOCKS)  # "kvstats_active_blocks"
+    # Pattern 2: Import specific classes
+    from dynamo.prometheus_names import frontend_service, kvstats
+    print(frontend_service.REQUESTS_TOTAL)  # "requests_total"
+    print(kvstats.ACTIVE_BLOCKS)  # "kvstats_active_blocks"
+"""
+from __future__ import annotations
+class distributed_runtime:
+    """DistributedRuntime core metrics"""
+    # Total uptime of the DistributedRuntime in seconds
+    UPTIME_SECONDS = "uptime_seconds"
+class frontend_service:
+    """Frontend service metrics (LLM HTTP service)"""
+    # Environment variable that overrides the default metric prefix
+    METRICS_PREFIX_ENV = "DYN_METRICS_PREFIX"
+    # Total number of LLM requests processed
+    REQUESTS_TOTAL = "requests_total"
+    # Number of requests waiting in HTTP queue before receiving the first response (gauge)
+    QUEUED_REQUESTS = "queued_requests"
+    # Number of inflight/concurrent requests going to the engine (vLLM, SGLang, ...)
+    # Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
+    INFLIGHT_REQUESTS = "inflight_requests"
+    # Number of disconnected clients (gauge that can go up and down)
+    DISCONNECTED_CLIENTS = "disconnected_clients"
+    # Duration of LLM requests
+    REQUEST_DURATION_SECONDS = "request_duration_seconds"
+    # Input sequence length in tokens
+    INPUT_SEQUENCE_TOKENS = "input_sequence_tokens"
+    # Output sequence length in tokens
+    OUTPUT_SEQUENCE_TOKENS = "output_sequence_tokens"
+    # Time to first token in seconds
+    TIME_TO_FIRST_TOKEN_SECONDS = "time_to_first_token_seconds"
+    # Inter-token latency in seconds
+    INTER_TOKEN_LATENCY_SECONDS = "inter_token_latency_seconds"
+    # Model configuration metrics
+    # Runtime config metrics (from ModelRuntimeConfig):
+    # Total KV blocks available for a worker serving the model
+    MODEL_TOTAL_KV_BLOCKS = "model_total_kv_blocks"
+    # Maximum number of sequences for a worker serving the model (runtime config)
+    MODEL_MAX_NUM_SEQS = "model_max_num_seqs"
+    # Maximum number of batched tokens for a worker serving the model (runtime config)
+    MODEL_MAX_NUM_BATCHED_TOKENS = "model_max_num_batched_tokens"
+    # MDC metrics (from ModelDeploymentCard):
+    # Maximum context length for a worker serving the model (MDC)
+    MODEL_CONTEXT_LENGTH = "model_context_length"
+    # KV cache block size for a worker serving the model (MDC)
+    MODEL_KV_CACHE_BLOCK_SIZE = "model_kv_cache_block_size"
+    # Request migration limit for a worker serving the model (MDC)
+    MODEL_MIGRATION_LIMIT = "model_migration_limit"
+class kvbm_connector:
+    """KVBM connector"""
+    # KVBM connector leader
+    KVBM_CONNECTOR_LEADER = "kvbm_connector_leader"
+    # KVBM connector worker
+    KVBM_CONNECTOR_WORKER = "kvbm_connector_worker"
+class kvrouter:
+    # Number of KV cache events applied to the index (including status)
+    KV_CACHE_EVENTS_APPLIED = "kv_cache_events_applied"
+class kvstats:
+    """KvStats metrics from LLM workers"""
+    # Prefix for all KvStats metrics
+    PREFIX = ""
+    # Number of active KV cache blocks currently in use
+    ACTIVE_BLOCKS = "kvstats_active_blocks"
+    # Total number of KV cache blocks available
+    TOTAL_BLOCKS = "kvstats_total_blocks"
+    # GPU cache usage as a percentage (0.0-1.0)
+    GPU_CACHE_USAGE_PERCENT = "kvstats_gpu_cache_usage_percent"
+    # GPU prefix cache hit rate as a percentage (0.0-1.0)
+    GPU_PREFIX_CACHE_HIT_RATE = "kvstats_gpu_prefix_cache_hit_rate"
+class labels:
+    """Automatically inserted Prometheus label names used across the metrics system"""
+    # Label for component identification
+    COMPONENT = "dynamo_component"
+    # Label for namespace identification
+    NAMESPACE = "dynamo_namespace"
+    # Label for endpoint identification
+    ENDPOINT = "dynamo_endpoint"
+class name_prefix:
+    """Metric name prefixes used across the metrics system"""
+    # Prefix for all Prometheus metric names.
+    COMPONENT = "dynamo_component"
+    # Prefix for frontend service metrics
+    FRONTEND = "dynamo_frontend"
+class nats_client:
+    """NATS client metrics. DistributedRuntime contains a NATS client shared by all children)"""
+    # Prefix for all NATS client metrics
+    PREFIX = ""
+    # Total number of bytes received by NATS client
+    IN_TOTAL_BYTES = "nats_client_in_total_bytes"
+    # Total number of bytes sent by NATS client
+    OUT_OVERHEAD_BYTES = "nats_client_out_overhead_bytes"
+    # Total number of messages received by NATS client
+    IN_MESSAGES = "nats_client_in_messages"
+    # Total number of messages sent by NATS client
+    OUT_MESSAGES = "nats_client_out_messages"
+    # Current number of active connections for NATS client
+    # Note: Gauge metric measuring current connections, not cumulative total
+    CURRENT_CONNECTIONS = "nats_client_current_connections"
+    # Current connection state of NATS client (0=disconnected, 1=connected, 2=reconnecting)
+    CONNECTION_STATE = "nats_client_connection_state"
+class nats_service:
+    """NATS service metrics, from the $SRV.STATS.<service_name> requests on NATS server"""
+    # Prefix for all NATS service metrics
+    PREFIX = ""
+    # Average processing time in milliseconds (maps to: average_processing_time in ms)
+    PROCESSING_MS_AVG = "nats_service_processing_ms_avg"
+    # Total errors across all endpoints (maps to: num_errors)
+    ERRORS_TOTAL = "nats_service_errors_total"
+    # Total requests across all endpoints (maps to: num_requests)
+    REQUESTS_TOTAL = "nats_service_requests_total"
+    # Total processing time in milliseconds (maps to: processing_time in ms)
+    PROCESSING_MS_TOTAL = "nats_service_processing_ms_total"
+    # Number of active services (derived from ServiceSet.services)
+    ACTIVE_SERVICES = "nats_service_active_services"
+    # Number of active endpoints (derived from ServiceInfo.endpoints)
+    ACTIVE_ENDPOINTS = "nats_service_active_endpoints"
+class task_tracker:
+    """Task tracker Prometheus metric name suffixes"""
+    # Total number of tasks issued/submitted
+    TASKS_ISSUED_TOTAL = "tasks_issued_total"
+    # Total number of tasks started
+    TASKS_STARTED_TOTAL = "tasks_started_total"
+    # Total number of successfully completed tasks
+    TASKS_SUCCESS_TOTAL = "tasks_success_total"
+    # Total number of cancelled tasks
+    TASKS_CANCELLED_TOTAL = "tasks_cancelled_total"
+    # Total number of failed tasks
+    TASKS_FAILED_TOTAL = "tasks_failed_total"
+    # Total number of rejected tasks
+    TASKS_REJECTED_TOTAL = "tasks_rejected_total"
+class work_handler:
+    """Work handler Prometheus metric names"""
+    # Total number of requests processed by work handler
+    REQUESTS_TOTAL = "requests_total"
+    # Total number of bytes received in requests by work handler
+    REQUEST_BYTES_TOTAL = "request_bytes_total"
+    # Total number of bytes sent in responses by work handler
+    RESPONSE_BYTES_TOTAL = "response_bytes_total"
+    # Number of requests currently being processed by work handler
+    # Note: This is a gauge metric (current state) that can go up and down, so no _total suffix
+    INFLIGHT_REQUESTS = "inflight_requests"
+    # Time spent processing requests by work handler (histogram)
+    REQUEST_DURATION_SECONDS = "request_duration_seconds"
+    # Total number of errors in work handler processing
+    ERRORS_TOTAL = "errors_total"
+    # Label name for error type classification
+    ERROR_TYPE_LABEL = "error_type"
--- a/lib/runtime/src/metrics/prometheus_names.rs
+++ b/lib/runtime/src/metrics/prometheus_names.rs
@@ -6,12 +6,12 @@
 //! This module provides centralized Prometheus metric name constants and sanitization functions
 //! for various components to ensure consistency and avoid duplication across the codebase.
 //!
-//! ⚠️  **CRITICAL: SYNC WITH PYTHON BINDINGS** ⚠️
+//! ⚠️  **CRITICAL: REGENERATE PYTHON FILE AFTER CHANGES** ⚠️
-//! When modifying constants in this file, you MUST also update:
+//! When modifying constants in this file, regenerate the Python module:
-//! `lib/bindings/python/rust/prometheus_names.rs`
+//!     cargo run -p dynamo-codegen --bin gen-python-prometheus-names
 //!
-//! The Python bindings expose these constants to Python code and must stay in sync.
+//! This generates `lib/bindings/python/src/dynamo/prometheus_names.py`
-//! Any changes here should be reflected in the Python bindings immediately.
+//! with pure Python constants (no Rust bindings needed).
 //!
 //! ## Naming Conventions
 //!
@@ -84,8 +84,7 @@ pub mod labels {
 /// Frontend service metrics (LLM HTTP service)
 ///
-/// ⚠️  SYNC ALERT: These constants are exposed to Python via:
+/// ⚠️  Python codegen: Run gen-python-prometheus-names after changes
-/// `lib/bindings/python/rust/prometheus_names.rs` - FrontendService class
 pub mod frontend_service {
    // TODO: Move DYN_METRICS_PREFIX and other environment variable names to environment_names.rs
    // for centralized environment variable constant management across the codebase

--- a/tests/utils/payloads.py
+++ b/tests/utils/payloads.py
@@ -20,7 +20,7 @@ from copy import deepcopy
 from dataclasses import dataclass
 from typing import Any, Dict, List
-from dynamo._core import prometheus_names
+from dynamo import prometheus_names
 logger = logging.getLogger(__name__)
@@ -206,7 +206,7 @@ class MetricsPayload(BasePayload):
        return response.text
    def validate(self, response: Any, content: str) -> None:
-        requests_total_name = prometheus_names.work_handler.requests_total
+        requests_total_name = prometheus_names.work_handler.REQUESTS_TOTAL
        pattern = (
            rf'{re.escape(requests_total_name)}\{{[^}}]*model="[^"]*"[^}}]*\}}\s+(\d+)'
        )