feat: add a hierarchical Prometheus MetricsRegistry trait for...

feat: add a hierarchical Prometheus MetricsRegistry trait for DistributedRuntime, Namespace, Components, and Endpoint (#2008) Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Ryan Olson <rolson@nvidia.com>

feat: add a hierarchical Prometheus MetricsRegistry trait for...
feat: add a hierarchical Prometheus MetricsRegistry trait for DistributedRuntime, Namespace, Components, and Endpoint (#2008) Co-authored-by: Keiven Chang <keivenchang@users.noreply.github.com> Co-authored-by: Ryan Olson <rolson@nvidia.com>
e5a8628f · Keiven C · GitHub · 20c5daf3 · e5a8628f · e5a8628f
Unverified Commit e5a8628f authored Jul 22, 2025 by Keiven C Committed by GitHub Jul 22, 2025
18 changed files
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -49,7 +49,6 @@ hf-hub = { version = "0.4.2", default-features = false, features = ["tokio", "ru
 humantime = { version = "2.2.0" }
 libc = { version = "0.2" }
 oneshot = { version = "0.1.11", features = ["std", "async"] }
-opentelemetry = { version = "0.27" }
 prometheus = { version = "0.14" }
 rand = { version = "0.9.0" }
 reqwest = { version = "0.12.22", default-features = false, features = ["json", "stream", "rustls-tls"] }

--- a/components/metrics/src/main.rs
+++ b/components/metrics/src/main.rs
@@ -173,6 +173,7 @@ async fn app(runtime: Runtime) -> Result<()> {
    let namespace_clone = namespace.clone();
    let metrics_collector_clone = metrics_collector.clone();

+    // Note: Subscribing to KVHitRateEvent for illustration purposes. They're not used in production.
    // Spawn a task to handle KV hit rate events
    tokio::spawn(async move {
        match namespace_clone.subscribe(kv_hit_rate_subject).await {

--- a/lib/runtime/examples/Cargo.lock
+++ b/lib/runtime/examples/Cargo.lock
@@ -2567,6 +2567,18 @@ dependencies = [
 "version-compare",
 ]

+[[package]]
+name = "system_metrics"
+version = "0.3.2"
+dependencies = [
+ "dynamo-runtime",
+ "futures",
+ "prometheus",
+ "serde",
+ "serde_json",
+ "tokio",
+]
+
 [[package]]
 name = "target-lexicon"
 version = "0.12.16"

--- a/lib/runtime/examples/Cargo.toml
+++ b/lib/runtime/examples/Cargo.toml
@@ -17,6 +17,7 @@
 members = [
    "hello_world",
    "service_metrics",
+    "system_metrics",
 ]
 resolver = "3"

@@ -32,3 +33,4 @@ repository = "https://github.com/ai-dynamo/dynamo.git"
 [workspace.dependencies]
 # local or crates.io
 dynamo-runtime = { path = "../" }
+prometheus = { workspace = true }
--- a/lib/runtime/examples/service_metrics/src/bin/service_client.rs
+++ b/lib/runtime/examples/service_metrics/src/bin/service_client.rs
@@ -45,6 +45,9 @@ async fn app(runtime: Runtime) -> Result<()> {
        println!("{:?}", resp);
    }

+    // This is just an illustration to invoke the server's stats_registry(<action>), where
+    // the action currently increments the `service_requests_total` metric. You can validate
+    // the result by running `curl http://localhost:8000/metrics`
    let service_set = component.scrape_stats(Duration::from_millis(100)).await?;
    println!("{:?}", service_set);


--- a/lib/runtime/examples/system_metrics/Cargo.toml
+++ b/lib/runtime/examples/system_metrics/Cargo.toml
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+[package]
+name = "system_metrics"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+
+[dependencies]
+dynamo-runtime = { workspace = true }
+
+# third-party
+futures = "0.3"
+serde = { version = "1", features = ["derive"] }
+serde_json = { version = "1" }
+tokio = { version = "1", features = ["full"] }
+prometheus = { version = "0.14" }
--- a/lib/runtime/examples/system_metrics/README.md
+++ b/lib/runtime/examples/system_metrics/README.md
+# System Metrics Example
+
+Demonstrates custom metrics and monitoring in Dynamo Runtime using Prometheus.
+
+## Overview
+
+- Automatic hierarchical labeling: Runtime automatically adds `namespace` → `component` → `endpoint` labels
+- Uses existing Prometheus implementations
+- HTTP metrics endpoint automatically added
+
+## Quick Start
+
+### Build
+```bash
+cd lib/runtime/examples/system_metrics
+cargo build
+```
+
+### Run Server
+```bash
+export DYN_LOG=1 DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8000
+cargo run --bin system_server
+```
+
+### Run Client
+```bash
+cargo run --bin system_client
+```
+
+Note: Running the client will increment `service_requests_total`.
+
+### View Metrics
+```bash
+curl http://localhost:8000/metrics
+```
+
+Example output:
+```
+# HELP service_request_duration_seconds Time spent processing requests
+# TYPE service_request_duration_seconds histogram
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.005"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.01"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.025"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.05"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.1"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.25"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.5"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="1"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="2.5"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="5"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="10"} 2
+service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="+Inf"} 2
+service_request_duration_seconds_sum{component="component",endpoint="endpoint",namespace="system",service="backend"} 0.000022239000000000002
+service_request_duration_seconds_count{component="component",endpoint="endpoint",namespace="system",service="backend"} 2
+# HELP service_requests_total Total number of requests processed
+# TYPE service_requests_total counter
+service_requests_total{component="component",endpoint="endpoint",namespace="system",service="backend"} 2
+# HELP uptime_seconds Total uptime of the DistributedRuntime in seconds
+# TYPE uptime_seconds gauge
+uptime_seconds{namespace="http_server"} 725.997013676
+```
+
+## Configuration
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `DYN_LOG` | Enable logging | `0` |
+| `DYN_SYSTEM_ENABLED` | Enable system metrics | `false` |
+| `DYN_SYSTEM_PORT` | HTTP server port | `8000` |
+
+## Metrics
+
+- `service_requests_total`: Request counter
+- `service_request_duration_seconds`: Request duration histogram
+- `uptime_seconds`: Server uptime gauge
+
+This provides automatic context and grouping for all metrics without manual configuration.
+
+## Troubleshooting
+
+- **Port in use**: Change `DYN_SYSTEM_PORT`
+- **Connection refused**: Ensure server is running first
+- **No metrics**: Verify `DYN_SYSTEM_ENABLED=true`
\ No newline at end of file
--- a/lib/runtime/examples/system_metrics/src/bin/system_client.rs
+++ b/lib/runtime/examples/system_metrics/src/bin/system_client.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use futures::StreamExt;
+use system_metrics::DEFAULT_NAMESPACE;
+
+use dynamo_runtime::{
+    logging, pipeline::PushRouter, protocols::annotated::Annotated, utils::Duration,
+    DistributedRuntime, Result, Runtime, Worker,
+};
+
+fn main() -> Result<()> {
+    logging::init();
+    let worker = Worker::from_settings()?;
+    worker.execute(app)
+}
+
+async fn app(runtime: Runtime) -> Result<()> {
+    let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
+
+    let namespace = distributed.namespace(DEFAULT_NAMESPACE)?;
+    let component = namespace.component("component")?;
+
+    let client = component.endpoint("endpoint").client().await?;
+
+    client.wait_for_instances().await?;
+    let router =
+        PushRouter::<String, Annotated<String>>::from_client(client, Default::default()).await?;
+
+    let mut stream = router.random("hello world".to_string().into()).await?;
+
+    while let Some(resp) = stream.next().await {
+        println!("{:?}", resp);
+    }
+
+    let service_set = component.scrape_stats(Duration::from_millis(100)).await?;
+    println!("{:?}", service_set);
+
+    runtime.shutdown();
+
+    Ok(())
+}
--- a/lib/runtime/examples/system_metrics/src/bin/system_server.rs
+++ b/lib/runtime/examples/system_metrics/src/bin/system_server.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use system_metrics::{MyStats, DEFAULT_NAMESPACE};
+
+use dynamo_runtime::{
+    logging,
+    metrics::MetricsRegistry,
+    pipeline::{
+        async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
+        ResponseStream, SingleIn,
+    },
+    protocols::annotated::Annotated,
+    stream, DistributedRuntime, Result, Runtime, Worker,
+};
+
+use prometheus::{Counter, Histogram};
+use std::sync::Arc;
+
+/// Service metrics struct using the metric classes from metrics.rs
+pub struct MySystemStatsMetrics {
+    pub request_counter: Arc<Counter>,
+    pub request_duration: Arc<Histogram>,
+}
+
+impl MySystemStatsMetrics {
+    /// Create a new ServiceMetrics instance using the metric backend
+    pub fn new<R: MetricsRegistry>(
+        metrics_registry: Arc<R>,
+    ) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
+        let request_counter = metrics_registry.create_counter(
+            "service_requests_total",
+            "Total number of requests processed",
+            &[("service", "backend")],
+        )?;
+        let request_duration = metrics_registry.create_histogram(
+            "service_request_duration_seconds",
+            "Time spent processing requests",
+            &[("service", "backend")],
+            None,
+        )?;
+        Ok(Self {
+            request_counter,
+            request_duration,
+        })
+    }
+}
+
+fn main() -> Result<()> {
+    logging::init();
+    let worker = Worker::from_settings()?;
+    worker.execute(app)
+}
+
+async fn app(runtime: Runtime) -> Result<()> {
+    let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
+    backend(distributed).await
+}
+
+struct RequestHandler {
+    metrics: Arc<MySystemStatsMetrics>,
+}
+
+impl RequestHandler {
+    fn new(metrics: Arc<MySystemStatsMetrics>) -> Arc<Self> {
+        Arc::new(Self { metrics })
+    }
+}
+
+#[async_trait]
+impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for RequestHandler {
+    async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
+        let start_time = std::time::Instant::now();
+
+        // Record request start
+        self.metrics.request_counter.inc();
+
+        let (data, ctx) = input.into_parts();
+
+        let chars = data
+            .chars()
+            .map(|c| Annotated::from_data(c.to_string()))
+            .collect::<Vec<_>>();
+
+        let stream = stream::iter(chars);
+
+        // Record request duration
+        let duration = start_time.elapsed();
+        self.metrics
+            .request_duration
+            .observe(duration.as_secs_f64());
+
+        Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
+    }
+}
+
+async fn backend(drt: DistributedRuntime) -> Result<()> {
+    let endpoint = drt
+        .namespace(DEFAULT_NAMESPACE)?
+        .component("component")?
+        .service_builder()
+        .create()
+        .await?
+        .endpoint("endpoint");
+
+    // make the ingress discoverable via a component service
+    // we must first create a service, then we can attach one more more endpoints
+    // attach an ingress to an engine, with the RequestHandler using the metrics struct
+    let endpoint_metrics = Arc::new(
+        MySystemStatsMetrics::new(Arc::new(endpoint.clone()))
+            .map_err(|e| Error::msg(e.to_string()))?,
+    );
+    let ingress = Ingress::for_engine(RequestHandler::new(endpoint_metrics.clone()))?;
+
+    endpoint
+        .endpoint_builder()
+        .stats_handler(|_stats| {
+            println!("Stats handler called with stats: {:?}", _stats);
+            let stats = MyStats { val: 10 };
+            serde_json::to_value(stats).unwrap()
+        })
+        .handler(ingress)
+        .start()
+        .await?;
+
+    Ok(())
+}
--- a/lib/runtime/examples/system_metrics/src/lib.rs
+++ b/lib/runtime/examples/system_metrics/src/lib.rs
+// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// SPDX-License-Identifier: Apache-2.0
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use serde::{Deserialize, Serialize};
+
+pub const DEFAULT_NAMESPACE: &str = "system";
+
+#[derive(Serialize, Deserialize)]
+// Dummy Stats object to demonstrate how to attach a custom stats handler
+pub struct MyStats {
+    pub val: u32,
+}
--- a/lib/runtime/src/component.rs
+++ b/lib/runtime/src/component.rs
@@ -29,7 +29,9 @@
 //!
 //! TODO: Top-level Overview of Endpoints/Functions

-use crate::{discovery::Lease, service::ServiceSet, transports::etcd::EtcdPath};
+use crate::{
+    discovery::Lease, metrics::MetricsRegistry, service::ServiceSet, transports::etcd::EtcdPath,
+};

 use super::{
    error,
@@ -168,6 +170,20 @@ impl RuntimeProvider for Component {
    }
 }

+impl MetricsRegistry for Component {
+    fn basename(&self) -> String {
+        self.name.clone()
+    }
+
+    fn parent_hierarchy(&self) -> Vec<String> {
+        [
+            self.namespace.parent_hierarchy(),
+            vec![self.namespace.basename()],
+        ]
+        .concat()
+    }
+}
+
 impl Component {
    /// The component part of an instance path in etcd.
    pub fn etcd_root(&self) -> String {
@@ -300,6 +316,20 @@ impl RuntimeProvider for Endpoint {
    }
 }

+impl MetricsRegistry for Endpoint {
+    fn basename(&self) -> String {
+        self.name.clone()
+    }
+
+    fn parent_hierarchy(&self) -> Vec<String> {
+        [
+            self.component.parent_hierarchy(),
+            vec![self.component.basename()],
+        ]
+        .concat()
+    }
+}
+
 impl Endpoint {
    pub fn id(&self) -> EndpointId {
        EndpointId {

--- a/lib/runtime/src/component/namespace.rs
+++ b/lib/runtime/src/component/namespace.rs
@@ -19,7 +19,7 @@ use futures::stream::StreamExt;
 use futures::{Stream, TryStreamExt};

 use super::*;
-
+use crate::metrics::MetricsRegistry;
 use crate::traits::events::{EventPublisher, EventSubscriber};

 #[async_trait]
@@ -78,6 +78,16 @@ impl EventSubscriber for Namespace {
    }
 }

+impl MetricsRegistry for Namespace {
+    fn basename(&self) -> String {
+        self.name.clone()
+    }
+
+    fn parent_hierarchy(&self) -> Vec<String> {
+        vec![self.drt().basename()]
+    }
+}
+
 #[cfg(feature = "integration")]
 #[cfg(test)]
 mod tests {

--- a/lib/runtime/src/distributed.rs
+++ b/lib/runtime/src/distributed.rs
@@ -17,6 +17,7 @@ pub use crate::component::Component;
 use crate::{
    component::{self, ComponentBuilder, Endpoint, InstanceSource, Namespace},
    discovery::DiscoveryClient,
+    metrics::MetricsRegistry,
    service::ServiceClient,
    transports::{etcd, nats, tcp},
    ErrorContext,
@@ -30,6 +31,16 @@ use std::collections::HashMap;
 use tokio::sync::Mutex;
 use tokio_util::sync::CancellationToken;

+impl MetricsRegistry for DistributedRuntime {
+    fn basename(&self) -> String {
+        "".to_string() // drt has no basename. Basename only begins with the Namespace.
+    }
+
+    fn parent_hierarchy(&self) -> Vec<String> {
+        vec![] // drt is the root, so no parent hierarchy
+    }
+}
+
 impl DistributedRuntime {
    pub async fn new(runtime: Runtime, config: DistributedConfig) -> Result<Self> {
        let secondary = runtime.secondary();
@@ -65,6 +76,16 @@ impl DistributedRuntime {
            })
            .await??;

+        // Start HTTP server for health and metrics if enabled in configuration
+        let config = crate::config::RuntimeConfig::from_settings().unwrap_or_default();
+        // IMPORTANT: We must extract cancel_token from runtime BEFORE moving runtime into the struct below.
+        // This is because after moving, runtime is no longer accessible in this scope (ownership rules).
+        let cancel_token = if config.system_server_enabled() {
+            Some(runtime.clone().child_token())
+        } else {
+            None
+        };
+
        let distributed_runtime = Self {
            runtime,
            etcd_client,
@@ -73,24 +94,27 @@ impl DistributedRuntime {
            component_registry: component::Registry::new(),
            is_static,
            instance_sources: Arc::new(Mutex::new(HashMap::new())),
-            start_time: std::time::Instant::now(),
+            prometheus_registries_by_prefix: Arc::new(std::sync::Mutex::new(HashMap::<
+                String,
+                prometheus::Registry,
+            >::new())),
        };

-        // Start HTTP server for health and metrics (if enabled)
-        let config = crate::config::RuntimeConfig::from_settings().unwrap_or_default();
-        if config.system_server_enabled() {
-            let drt_arc = Arc::new(distributed_runtime.clone());
-            let runtime_clone = distributed_runtime.runtime.clone();
-            // spawn_http_server spawns its own background task:
+        // Start HTTP server if enabled
+        if let Some(cancel_token) = cancel_token {
+            let host = config.system_host.clone();
+            let port = config.system_port;
+
+            // Start HTTP server (it spawns its own task internally)
            match crate::http_server::spawn_http_server(
-                &config.system_host,
-                config.system_port,
-                runtime_clone.child_token(),
-                drt_arc,
+                &host,
+                port,
+                cancel_token,
+                Arc::new(distributed_runtime.clone()),
            )
            .await
            {
-                Ok((addr, _handle)) => {
+                Ok((addr, _)) => {
                    tracing::info!("HTTP server started successfully on {}", addr);
                }
                Err(e) => {
@@ -191,11 +215,6 @@ impl DistributedRuntime {
    pub fn instance_sources(&self) -> Arc<Mutex<HashMap<Endpoint, Weak<InstanceSource>>>> {
        self.instance_sources.clone()
    }
-
-    /// Get the uptime of this DistributedRuntime in seconds
-    pub fn uptime(&self) -> std::time::Duration {
-        self.start_time.elapsed()
-    }
 }

 #[derive(Dissolve)]

--- a/lib/runtime/src/http_server.rs
+++ b/lib/runtime/src/http_server.rs
@@ -13,77 +13,106 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+use crate::metrics::MetricsRegistry;
+use crate::traits::DistributedRuntimeProvider;
 use axum::{body, http::StatusCode, response::IntoResponse, routing::get, Router};
-use prometheus::{
-    proto, register_gauge_with_registry, Encoder, Gauge, Opts, Registry, TextEncoder,
-};
 use std::sync::Arc;
+use std::sync::OnceLock;
+use std::time::Instant;
 use tokio::net::TcpListener;
 use tokio_util::sync::CancellationToken;
 use tracing;

-/// Runtime metrics for HTTP server
-pub struct RuntimeMetrics {
-    uptime_gauge: Gauge,
+pub struct HttpMetricsRegistry {
+    pub drt: Arc<crate::DistributedRuntime>,
 }

-impl RuntimeMetrics {
-    pub fn new(metrics_registry: &Arc<Registry>) -> anyhow::Result<Arc<Self>> {
-        let uptime_opts = Opts::new(
-            "uptime_seconds",
-            "Total uptime of the DistributedRuntime in seconds",
-        )
-        .namespace("dynamo")
-        .subsystem("runtime");
-
-        let uptime_gauge = register_gauge_with_registry!(uptime_opts, metrics_registry)?;
+impl crate::traits::DistributedRuntimeProvider for HttpMetricsRegistry {
+    fn drt(&self) -> &crate::DistributedRuntime {
+        &self.drt
+    }
+}

-        Ok(Arc::new(Self { uptime_gauge }))
+impl MetricsRegistry for HttpMetricsRegistry {
+    fn basename(&self) -> String {
+        "http_server".to_string()
    }

-    pub fn update_uptime(&self, uptime_seconds: f64) {
-        self.uptime_gauge.set(uptime_seconds);
+    fn parent_hierarchy(&self) -> Vec<String> {
+        [self.drt().parent_hierarchy(), vec![self.drt().basename()]].concat()
    }
 }

-/// HTTP server state containing pre-created metrics
+/// HTTP server state containing metrics and uptime tracking
 pub struct HttpServerState {
-    drt: Arc<crate::DistributedRuntime>,
-    registry: Arc<Registry>,
-    runtime_metrics: Arc<RuntimeMetrics>,
+    // global drt registry is for printing out the entire Prometheus format output
+    root_drt: Arc<crate::DistributedRuntime>,
+    start_time: OnceLock<Instant>,
+    uptime_gauge: Arc<prometheus::Gauge>,
 }

 impl HttpServerState {
-    /// Create new HTTP server state with pre-created metrics
+    /// Create new HTTP server state with the provided metrics registry
    pub fn new(drt: Arc<crate::DistributedRuntime>) -> anyhow::Result<Self> {
-        let registry = Arc::new(Registry::new());
+        let http_metrics_registry = Arc::new(HttpMetricsRegistry { drt: drt.clone() });
+        let uptime_gauge = http_metrics_registry.as_ref().create_gauge(
+            "uptime_seconds",
+            "Total uptime of the DistributedRuntime in seconds",
+            &[],
+        )?;
+        let state = Self {
+            root_drt: drt,
+            start_time: OnceLock::new(),
+            uptime_gauge,
+        };
+        Ok(state)
+    }

-        // Create runtime metrics
-        let runtime_metrics = RuntimeMetrics::new(&registry)?;
+    /// Initialize the start time (can only be called once)
+    pub fn initialize_start_time(&self) -> Result<(), &'static str> {
+        self.start_time
+            .set(Instant::now())
+            .map_err(|_| "Start time already initialized")
+    }

-        Ok(Self {
-            drt,
-            registry,
-            runtime_metrics,
-        })
+    pub fn uptime(&self) -> Result<std::time::Duration, &'static str> {
+        self.start_time
+            .get()
+            .ok_or("Start time not initialized")
+            .map(|start_time| start_time.elapsed())
+    }
+
+    /// Get a reference to the distributed runtime
+    pub fn drt(&self) -> &crate::DistributedRuntime {
+        &self.root_drt
+    }
+
+    /// Update the uptime gauge with current value
+    pub fn update_uptime_gauge(&self) {
+        if let Ok(uptime) = self.uptime() {
+            let uptime_seconds = uptime.as_secs_f64();
+            self.uptime_gauge.set(uptime_seconds);
+        } else {
+            tracing::warn!("Failed to update uptime gauge: start time not initialized");
+        }
    }
 }

-/// Start HTTP server with DistributedRuntime support
+/// Start HTTP server with metrics support
 pub async fn spawn_http_server(
    host: &str,
    port: u16,
    cancel_token: CancellationToken,
    drt: Arc<crate::DistributedRuntime>,
 ) -> anyhow::Result<(std::net::SocketAddr, tokio::task::JoinHandle<()>)> {
-    tracing::info!(
-        "[spawn_http_server] called with host={}, port={}",
-        host,
-        port
-    );
-    // Create HTTP server state with pre-created metrics
+    // Create HTTP server state with the provided metrics registry
    let server_state = Arc::new(HttpServerState::new(drt)?);

+    // Initialize the start time
+    server_state
+        .initialize_start_time()
+        .map_err(|e| anyhow::anyhow!("Failed to initialize start time: {}", e))?;
+
    let app = Router::new()
        .route(
            "/health",
@@ -146,48 +175,57 @@ pub async fn spawn_http_server(

 /// Health handler
 async fn health_handler(state: Arc<HttpServerState>) -> impl IntoResponse {
-    tracing::info!("[health_handler] called");
-    let uptime = state.drt.uptime();
-    let response = format!("OK\nUptime: {} seconds\n", uptime.as_secs());
-    (StatusCode::OK, response)
+    match state.uptime() {
+        Ok(uptime) => {
+            let response = format!("OK\nUptime: {} seconds\n", uptime.as_secs());
+            (StatusCode::OK, response)
+        }
+        Err(e) => {
+            tracing::error!("Failed to get uptime: {}", e);
+            (
+                StatusCode::INTERNAL_SERVER_ERROR,
+                "Failed to get uptime".to_string(),
+            )
+        }
+    }
 }

 /// Metrics handler with DistributedRuntime uptime
 async fn metrics_handler(state: Arc<HttpServerState>) -> impl IntoResponse {
    // Update the uptime gauge with current value
-    let uptime_seconds = state.drt.uptime().as_secs_f64();
-    state.runtime_metrics.update_uptime(uptime_seconds);
-
-    // Gather metrics from the registry
-    let metric_families = state.registry.gather();
+    state.update_uptime_gauge();

-    let encoder = TextEncoder::new();
-    let mut buffer = Vec::new();
-
-    match encoder.encode(&metric_families, &mut buffer) {
-        Ok(()) => match String::from_utf8(buffer) {
-            Ok(response) => (StatusCode::OK, response),
-            Err(e) => {
-                tracing::error!("Failed to encode metrics as UTF-8: {}", e);
-                (
-                    StatusCode::INTERNAL_SERVER_ERROR,
-                    "Failed to encode metrics as UTF-8".to_string(),
-                )
-            }
-        },
+    // Get metrics from the registry
+    match state.drt().prometheus_metrics_fmt() {
+        Ok(response) => (StatusCode::OK, response),
        Err(e) => {
-            tracing::error!("Failed to encode metrics: {}", e);
+            tracing::error!("Failed to get metrics from registry: {}", e);
            (
                StatusCode::INTERNAL_SERVER_ERROR,
-                "Failed to encode metrics".to_string(),
+                "Failed to get metrics".to_string(),
            )
        }
    }
 }

+// Regular tests: cargo test http_server --lib
+// Integration tests: cargo test http_server --lib --features integration
+
+#[cfg(test)]
+/// Helper function to create a DRT instance for async testing
+/// Uses the test-friendly constructor without discovery
+async fn create_test_drt_async() -> crate::DistributedRuntime {
+    let rt = crate::Runtime::from_current().unwrap();
+    crate::DistributedRuntime::from_settings_without_discovery(rt)
+        .await
+        .unwrap()
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
+    use crate::metrics::MetricsRegistry;
+    use std::sync::Arc;
    use tokio::time::{sleep, Duration};

    #[tokio::test]
@@ -220,68 +258,70 @@ mod tests {
        );
    }

+    #[cfg(feature = "integration")]
    #[tokio::test]
-    async fn test_runtime_metrics_creation() {
-        // Test RuntimeMetrics creation and functionality
-        let registry = Arc::new(Registry::new());
-        let runtime_metrics = RuntimeMetrics::new(&registry).unwrap();
-
-        // Wait a bit to ensure uptime is measurable
-        tokio::time::sleep(Duration::from_millis(10)).await;
+    async fn test_runtime_metrics_initialization_and_namespace() {
+        // Test that metrics have correct namespace
+        let drt = create_test_drt_async().await;
+        let runtime_metrics = HttpServerState::new(Arc::new(drt)).unwrap();

-        // Test updating uptime
-        let uptime_seconds = 123.456;
-        runtime_metrics.update_uptime(uptime_seconds);
+        // Initialize start time
+        runtime_metrics.initialize_start_time().unwrap();

-        // Gather metrics from the registry
-        let metric_families = registry.gather();
+        runtime_metrics.uptime_gauge.set(42.0);

-        let encoder = TextEncoder::new();
-        let mut buffer = Vec::new();
-        encoder.encode(&metric_families, &mut buffer).unwrap();
+        let response = runtime_metrics.drt().prometheus_metrics_fmt().unwrap();
+        println!("Full metrics response:\n{}", response);

-        let response = String::from_utf8(buffer).unwrap();
-        assert!(response.contains("dynamo_runtime_uptime_seconds"));
-        assert!(response.contains("123.456"));
+        let expected = "\
+# HELP uptime_seconds Total uptime of the DistributedRuntime in seconds
+# TYPE uptime_seconds gauge
+uptime_seconds{namespace=\"http_server\"} 42
+";
+        assert_eq!(response, expected);
    }

+    #[cfg(feature = "integration")]
    #[tokio::test]
-    async fn test_runtime_metrics_namespace() {
-        // Test that metrics have correct namespace
-        let registry = Arc::new(Registry::new());
-        let runtime_metrics = RuntimeMetrics::new(&registry).unwrap();
+    async fn test_start_time_initialization() {
+        // Test that start time can only be initialized once
+        let drt = create_test_drt_async().await;
+        let runtime_metrics = HttpServerState::new(Arc::new(drt)).unwrap();

-        runtime_metrics.update_uptime(42.0);
+        // First initialization should succeed
+        assert!(runtime_metrics.initialize_start_time().is_ok());

-        let metric_families = registry.gather();
-        let encoder = TextEncoder::new();
-        let mut buffer = Vec::new();
-        encoder.encode(&metric_families, &mut buffer).unwrap();
+        // Second initialization should fail
+        assert!(runtime_metrics.initialize_start_time().is_err());

-        let response = String::from_utf8(buffer).unwrap();
-        // Check for the full metric name with namespace and subsystem
-        assert!(response.contains("dynamo_runtime_uptime_seconds"));
-        assert!(response.contains("Total uptime of the DistributedRuntime in seconds"));
+        // Uptime should work after initialization
+        let _uptime = runtime_metrics.uptime().unwrap();
+        // If we get here, uptime calculation works correctly
    }

-    /*
+    #[cfg(feature = "integration")]
+    #[tokio::test]
+    async fn test_uptime_without_initialization() {
+        // Test that uptime returns an error if start time is not initialized
+        let drt = create_test_drt_async().await;
+        let runtime_metrics = HttpServerState::new(Arc::new(drt)).unwrap();
+
+        // This should return an error because start time is not initialized
+        let result = runtime_metrics.uptime();
+        assert!(result.is_err());
+        assert_eq!(result.unwrap_err(), "Start time not initialized");
+    }
+
+    #[cfg(feature = "integration")]
    #[tokio::test]
    async fn test_spawn_http_server_endpoints() {
-        use std::sync::Arc;
-        use tokio::time::sleep;
-        use tokio_util::sync::CancellationToken;
-        // use tokio::io::{AsyncReadExt, AsyncWriteExt};
        // use reqwest for HTTP requests
-        let runtime = crate::Runtime::from_settings().unwrap();
-        let drt = Arc::new(
-            crate::DistributedRuntime::from_settings_without_discovery(runtime)
-                .await
-                .unwrap(),
-        );
        let cancel_token = CancellationToken::new();
-        let (addr, server_handle) = spawn_http_server("127.0.0.1", 0, cancel_token.clone(), drt)
-            .await
-            .unwrap();
+        let drt = create_test_drt_async().await;
+        let (addr, server_handle) =
+            spawn_http_server("127.0.0.1", 0, cancel_token.clone(), Arc::new(drt))
+                .await
+                .unwrap();
        println!("[test] Waiting for server to start...");
        sleep(std::time::Duration::from_millis(1000)).await;
        println!("[test] Server should be up, starting requests...");
@@ -324,5 +364,36 @@ mod tests {
            }
        }
    }
-    */
+
+    #[cfg(feature = "integration")]
+    #[tokio::test]
+    async fn test_http_server_basic_functionality() {
+        // Test basic HTTP server functionality without requiring etcd
+        let cancel_token = CancellationToken::new();
+        let cancel_token_for_server = cancel_token.clone();
+
+        // Test basic HTTP server lifecycle
+        let app = Router::new().route("/test", get(|| async { (StatusCode::OK, "test") }));
+
+        // start HTTP server
+        let server_handle = tokio::spawn(async move {
+            let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
+            let _ = axum::serve(listener, app)
+                .with_graceful_shutdown(cancel_token_for_server.cancelled_owned())
+                .await;
+        });
+
+        // wait for a while to let the server start
+        sleep(Duration::from_millis(100)).await;
+
+        // cancel token
+        cancel_token.cancel();
+
+        // wait for the server to shut down
+        let result = tokio::time::timeout(Duration::from_secs(5), server_handle).await;
+        assert!(
+            result.is_ok(),
+            "HTTP server should shut down when cancel token is cancelled"
+        );
+    }
 }
--- a/lib/runtime/src/lib.rs
+++ b/lib/runtime/src/lib.rs
@@ -38,6 +38,7 @@ pub mod discovery;
 pub mod engine;
 pub mod http_server;
 pub mod logging;
+pub mod metrics;
 pub mod pipeline;
 pub mod prelude;
 pub mod protocols;
@@ -99,6 +100,6 @@ pub struct DistributedRuntime {

    instance_sources: Arc<Mutex<HashMap<Endpoint, Weak<InstanceSource>>>>,

-    // Start time for tracking uptime
-    start_time: std::time::Instant,
+    // This map associates metric prefixes with their corresponding Prometheus registries.
+    prometheus_registries_by_prefix: Arc<std::sync::Mutex<HashMap<String, prometheus::Registry>>>,
 }
--- a/lib/runtime/src/metrics.rs
+++ b/lib/runtime/src/metrics.rs
--- a/lib/runtime/src/traits.rs
+++ b/lib/runtime/src/traits.rs
@@ -31,3 +31,14 @@ impl RuntimeProvider for DistributedRuntime {
        &self.runtime
    }
 }
+
+// This implementation is required because:
+// 1. MetricsRegistry has a supertrait bound: `MetricsRegistry: Send + Sync + DistributedRuntimeProvider`
+// 2. DistributedRuntime implements MetricsRegistry (in distributed.rs)
+// 3. Therefore, DistributedRuntime must implement DistributedRuntimeProvider to satisfy the trait bound
+// 4. This enables DistributedRuntime to serve as both a provider (of itself) and a metrics registry
+impl DistributedRuntimeProvider for DistributedRuntime {
+    fn drt(&self) -> &DistributedRuntime {
+        self
+    }
+}
--- a/lib/runtime/src/transports/etcd.rs
+++ b/lib/runtime/src/transports/etcd.rs
@@ -616,7 +616,7 @@ mod tests {
    fn test_ectd_client() {
        let rt = Runtime::from_settings().unwrap();
        let rt_clone = rt.clone();
-        let config = DistributedConfig::from_settings();
+        let config = DistributedConfig::from_settings(false);

        rt_clone.primary().block_on(async move {
            let drt = DistributedRuntime::new(rt, config).await.unwrap();
@@ -628,8 +628,11 @@ mod tests {
        let key = "__integration_test_key";
        let value = b"test_value";

-        let client = drt.etcd_client();
-        let lease_id = drt.primary_lease().id();
+        let client = drt.etcd_client().expect("etcd client should be available");
+        let lease_id = drt
+            .primary_lease()
+            .expect("primary lease should be available")
+            .id();

        // Create the key
        let result = client