chore: Remove deprecated components/metrics and references (#3475)

Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>

chore: Remove deprecated components/metrics and references (#3475)
Signed-off-by: Keiven Chang <keivenchang@users.noreply.github.com>
be001a58 · Keiven C · GitHub · f712653e · be001a58 · be001a58
Unverified Commit be001a58 authored Oct 09, 2025 by Keiven C Committed by GitHub Oct 09, 2025
11 changed files
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4551,25 +4551,6 @@ dependencies = [
 "paste",
 ]
-[[package]]
-name = "metrics"
-version = "0.5.1"
-dependencies = [
- "axum 0.8.4",
- "clap 4.5.48",
- "dynamo-llm",
- "dynamo-runtime",
- "futures",
- "prometheus",
- "rand 0.9.2",
- "reqwest 0.12.23",
- "serde",
- "serde_json",
- "thiserror 2.0.16",
- "tokio",
- "tracing",
-]
 [[package]]
 name = "mime"
 version = "0.3.17"

--- a/Cargo.toml
+++ b/Cargo.toml
@@ -3,7 +3,6 @@
 [workspace]
 members = [
-    "components/metrics",
    "launch/dynamo-run",
    "lib/llm",
    "lib/runtime",
@@ -18,7 +17,6 @@ members = [
 # - launch/dynamo-run
 # - lib/engines/*
 default-members = [
-    "components/metrics",
    "lib/llm",
    "lib/runtime",
    "lib/tokens",

--- a/components/metrics/Cargo.toml
+++ b/components/metrics/Cargo.toml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-[package]
-name = "metrics"
-version.workspace = true
-edition.workspace = true
-authors.workspace = true
-license.workspace = true
-homepage.workspace = true
-repository.workspace = true
-[dependencies]
-dynamo-llm = { workspace = true }
-dynamo-runtime = { workspace = true }
-futures = { workspace = true }
-prometheus = { workspace = true }
-rand = { workspace = true }
-serde = { workspace = true }
-serde_json = { workspace = true }
-thiserror = { workspace = true }
-tokio = { workspace = true }
-tracing = { workspace = true }
-axum = { version = "0.8" }
-clap = { version = "4.5", features = ["derive", "env"] }
-reqwest = { version = "0.12.22", default-features = false, features = ["json", "rustls-tls"] }
--- a/components/metrics/README.md
+++ b/components/metrics/README.md
-# Metrics
-⚠️ **DEPRECATION NOTICE** ⚠️
-**This `metrics` component is unmaintained and being deprecated.**
-The deprecated `metrics` component is being replaced by the **`MetricsRegistry`** built-in functionality that is now available directly in the `DistributedRuntime` framework. The `MetricsRegistry` provides:
-**For new projects and existing deployments, please migrate to using `MetricsRegistry` instead of this component.**
-This component may be migrated to the MetricsRegistry in the future.
-**📖 See the [Dynamo MetricsRegistry Guide](../../docs/guides/metrics.md) for detailed information on using the new metrics system.**
---
-The deprecated `metrics` component is a utility for collecting, aggregating, and publishing metrics from a Dynamo deployment, but it is unmaintained and being deprecated in favor of `MetricsRegistry`.
-**Note**: This is a demo implementation. The deprecated `metrics` component is no longer under active development.
- In this demo the metrics names use the prefix "llm", but in production they will be prefixed with "dynamo" (e.g., the HTTP `/metrics` endpoint will serve metrics with "dynamo" prefixes)
-<div align="center">
-  <img src="images/dynamo_metrics_grafana.png" alt="Dynamo Metrics Dashboard"/>
-</div>
-## Quickstart
-To start the deprecated `metrics` component, simply point it at the `namespace/component/endpoint`
-trio for the Dynamo workers that you're interested in monitoring metrics on.
-This will:
-1. Collect statistics from workers associated with that `namespace/component/endpoint`
-2. Postprocess and aggregate those statistics across the workers
-3. Publish them on a Prometheus-compatible metrics endpoint
-For example:
-```bash
-# Default namespace is "dynamo", but can be configured with --namespace
-# For more detailed output, try setting the env var: DYN_LOG=debug
-metrics --component MyComponent --endpoint my_endpoint
-# 2025-03-17T00:07:05.202558Z  INFO metrics: Scraping endpoint dynamo/MyComponent/my_endpoint for stats
-# 2025-03-17T00:07:05.202955Z  INFO metrics: Prometheus metrics server started at 0.0.0.0:9091/metrics
-# ...
-```
-With no matching endpoints running to collect stats from, you should see warnings in the logs:
-```bash
-2025-03-17T00:07:06.204756Z  WARN metrics: No endpoints found matching dynamo/MyComponent/my_endpoint
-```
-After a worker with a matching endpoint gets started, the endpoint
-will get automatically discovered and the warnings will stop.
-## Workers
-The deprecated `metrics` component needs running workers to gather metrics from,
-so below are some examples of workers and how they can be monitored.
-### Mock Worker
-To try out how the deprecated `metrics` component works, there is a demo Rust-based
-[mock worker](src/bin/mock_worker.rs) that provides sample data through two mechanisms:
-1. Exposes a stats handler at `dynamo/MyComponent/my_endpoint` that responds to polling requests (from the deprecated `metrics` component) with randomly generated `ForwardPassMetrics` data
-2. Publishes mock `KVHitRateEvent` data every second to demonstrate event-based metrics
-Step 1: Launch a mock workers via the following command (if already built):
-```bash
-# or build/run from source: DYN_LOG=DEBUG cargo run --bin mock_worker
-mock_worker
-# 2025-03-16T23:49:28.101668Z  INFO mock_worker: Starting Mock Worker on Endpoint: dynamo/MyComponent/my_endpoint
-```
-Step 2: Monitor the metrics of these mock workers, and prepare its own Prometheus endpoint at
-port 9091 (a default, when --port is not specified) on /metrics:
-```bash
-metrics --component MyComponent --endpoint my_endpoint
-```
-### Real Worker
-To run a more realistic deployment to gather metrics:
-```bash
-python -m dynamo.frontend &
-python -m dynamo.vllm --model-path <your-model-checkout>
-```
-Then, to monitor the metrics of these VllmWorkers, run:
-```bash
-metrics --component backend --endpoint load_metrics
-```
-**NOTE**: `load_metrics` is currently a
-[hard-coded](https://github.com/ai-dynamo/dynamo/blob/d5220c7b1151372ba3d2a061c7d0a7ed72724789/lib/llm/src/kv_router/publisher.rs#L108)
-endpoint name used for python-based workers that register a `WorkerMetricsPublisher`.
-## Visualization
-To visualize the metrics being exposed on the Prometheus endpoint,
-see the Prometheus and Grafana configurations in
-[deploy/metrics](../../deploy/metrics):
-```bash
-docker compose -f deploy/docker-compose.yml --profile metrics up -d
-```
-## Metrics Collection Modes
-The deprecated `metrics` component supports two modes for exposing metrics in a Prometheus format:
-### Pull Mode (Default)
-When running in pull mode (the default), the deprecated `metrics` component will expose a
-Prometheus metrics endpoint on the specified host and port that a
-Prometheus server or curl client can pull from:
-```bash
-# Start metrics server on default host (0.0.0.0) and port (9091)
-metrics --component MyComponent --endpoint my_endpoint
-# Or specify a custom port
-metrics --component MyComponent --endpoint my_endpoint --port 9092
-```
-In pull mode:
- The `--host` parameter must be a valid IPv4 or IPv6 address (e.g., "0.0.0.0", "127.0.0.1")
- The `--port` parameter specifies which port the HTTP server will listen on
-You can then query the metrics using:
-```bash
-curl localhost:9091/metrics
-# # HELP llm_kv_blocks_active Active KV cache blocks
-# # TYPE llm_kv_blocks_active gauge
-# llm_kv_blocks_active{component="MyComponent",endpoint="my_endpoint",worker_id="7587884888253033398"} 40
-# llm_kv_blocks_active{component="MyComponent",endpoint="my_endpoint",worker_id="7587884888253033401"} 2
-# # HELP llm_kv_blocks_total Total KV cache blocks
-# # TYPE llm_kv_blocks_total gauge
-# llm_kv_blocks_total{component="MyComponent",endpoint="my_endpoint",worker_id="7587884888253033398"} 100
-# llm_kv_blocks_total{component="MyComponent",endpoint="my_endpoint",worker_id="7587884888253033401"} 100
-```
-### Push Mode
-For ephemeral or batch jobs, or when metrics need to be pushed through a firewall,
-you can use Push mode. In this mode, the deprecated `metrics` component will periodically push
-metrics to an externally hosted
-[Prometheus PushGateway](https://prometheus.io/docs/instrumenting/pushing/):
-Start a prometheus push gateway service via docker:
-```bash
-docker run --rm -d -p 9091:9091 --name pushgateway prom/pushgateway
-```
-Start the deprecated `metrics` component in `--push` mode, specifying the host and port of your PushGateway:
-```bash
-# Push metrics to a Prometheus PushGateway every --push-interval seconds
-metrics \
-    --component MyComponent \
-    --endpoint my_endpoint \
-    --host 127.0.0.1 \
-    --port 9091 \
-    --push
-```
-When using Push mode:
- The `--host` parameter must be a valid IPv4 or IPv6 address (e.g., "0.0.0.0", "127.0.0.1")
-  that the Prometheus PushGateway is running on
- The `--port` parameter specifies the port of the Prometheus PushGateway
- The push interval can be configured with `--push-interval` (default: 2 seconds)
- A default job name of "dynamo_metrics" is used for the Prometheus job label
- Metrics persist in the PushGateway until explicitly deleted
- Prometheus should be configured to scrape the PushGateway with `honor_labels: true`
-To view the metrics hosted on the PushGateway:
-```bash
-# View all metrics
-# curl http://<pushgateway_ip>:<pushgateway_port>/metrics
-curl 127.0.0.1:9091/metrics
-```
-## Building/Running from Source
-For easy iteration while making edits to the deprecated `metrics` component, you can use `cargo run`
-to build and run with your local changes:
-```bash
-cargo run --bin metrics -- --component MyComponent --endpoint my_endpoint
-```
--- a/components/metrics/images/dynamo_metrics_grafana.png
+++ b/components/metrics/images/dynamo_metrics_grafana.png
--- a/components/metrics/src/bin/mock_worker.rs
+++ b/components/metrics/src/bin/mock_worker.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-use dynamo_llm::kv_router::{
-    KV_HIT_RATE_SUBJECT,
-    protocols::{ForwardPassMetrics, KvStats, WorkerStats},
-    scheduler::KVHitRateEvent,
-};
-use dynamo_runtime::{
-    DistributedRuntime, Result, Runtime, Worker,
-    component::{Namespace, service::EndpointStats},
-    logging,
-    pipeline::{
-        AsyncEngine, AsyncEngineContextProvider, Error, ManyOut, ResponseStream, SingleIn,
-        async_trait, network::Ingress,
-    },
-    protocols::annotated::Annotated,
-    stream,
-    traits::events::EventPublisher,
-};
-use rand::Rng;
-use std::sync::Arc;
-use tokio::time::{Duration, interval};
-fn main() -> Result<()> {
-    logging::init();
-    let worker = Worker::from_settings()?;
-    worker.execute(app)
-}
-async fn app(runtime: Runtime) -> Result<()> {
-    let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
-    backend(distributed).await
-}
-struct MockRequestHandler {}
-impl MockRequestHandler {
-    fn new() -> Arc<Self> {
-        Arc::new(Self {})
-    }
-}
-#[async_trait]
-impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for MockRequestHandler {
-    async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
-        let (data, ctx) = input.into_parts();
-        let chars = data
-            .chars()
-            .map(|c| Annotated::from_data(c.to_string()))
-            .collect::<Vec<_>>();
-        let stream = stream::iter(chars);
-        Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
-    }
-}
-// FIXME: These events are just for testing and may not currently be used.
-/// Spawns a background task that periodically publishes mock KV hit rate events
-async fn mock_event_publisher(namespace: Namespace) {
-    // NOTE: These events are just for testing, and shouldn't be interpreted
-    // in correlation with the stats handler's data:
-    // 1. The worker ID associated with the events here won't match the
-    // worker ID of the endpoint's service stats handler.
-    // 2. These events aren't coming through the KV Router, so the metrics won't
-    // be reflective of the KV Router's performance.
-    // 3. The data in these events aren't in sync with the stats handler's
-    // ForwardPassMetrics data, so they may not correlate well.
-    let worker_id = rand::rng().random_range(1..=1000);
-    let mut interval = interval(Duration::from_secs(1));
-    loop {
-        interval.tick().await;
-        // Generate random KV hit rate event using a new thread_rng each time
-        let isl_blocks = rand::rng().random_range(0..=100);
-        let overlap_blocks = rand::rng().random_range(0..=isl_blocks);
-        let event = KVHitRateEvent {
-            worker_id,
-            isl_blocks,
-            overlap_blocks: overlap_blocks as u32,
-        };
-        if let Err(e) = namespace.publish(KV_HIT_RATE_SUBJECT, &event).await {
-            tracing::warn!("Failed to publish KV hit rate event: {e}");
-        } else {
-            tracing::debug!(
-                "Published KV hit rate event: worker_id={worker_id}, isl_blocks={isl_blocks}, overlap_blocks={overlap_blocks}, hit_rate={:.2}%",
-                (overlap_blocks as f64 / isl_blocks as f64) * 100.0
-            );
-        }
-    }
-}
-/// Generates mock forward pass metrics for stats handler
-fn mock_stats_handler(_stats: EndpointStats) -> serde_json::Value {
-    let request_total_slots = 100;
-    let request_active_slots = rand::rng().random_range(0..=request_total_slots);
-    let kv_total_blocks = 100;
-    let kv_active_blocks = rand::rng().random_range(0..=kv_total_blocks);
-    let num_requests_waiting = rand::rng().random_range(0..=100);
-    let gpu_cache_usage_perc = rand::rng().random_range(0.0..=1.0);
-    let gpu_prefix_cache_hit_rate = rand::rng().random_range(0.0..=1.0);
-    let worker_stats = WorkerStats {
-        data_parallel_rank: None, // Default for backwards compatibility
-        request_active_slots,
-        request_total_slots,
-        num_requests_waiting,
-    };
-    let kv_stats = KvStats {
-        kv_active_blocks,
-        kv_total_blocks,
-        gpu_cache_usage_perc,
-        gpu_prefix_cache_hit_rate,
-    };
-    let spec_decode_stats = None;
-    let stats = ForwardPassMetrics {
-        worker_stats,
-        kv_stats,
-        spec_decode_stats,
-    };
-    tracing::info!("Stats: {stats:?}");
-    serde_json::to_value(stats).unwrap()
-}
-async fn backend(runtime: DistributedRuntime) -> Result<()> {
-    let namespace = runtime.namespace("dynamo")?;
-    // we must first create a service, then we can attach one more more endpoints
-    let component = namespace
-        .component("MyComponent")?
-        .service_builder()
-        .create()
-        .await?;
-    let endpoint = component.endpoint("my_endpoint");
-    tracing::info!("Starting Mock Worker on Endpoint: {}", endpoint.path());
-    // Spawn background task for publishing KV hit rate events
-    let namespace_clone = namespace.clone();
-    tokio::spawn(async move {
-        mock_event_publisher(namespace_clone).await;
-    });
-    // Attach an ingress to the engine
-    let ingress = Ingress::for_engine(MockRequestHandler::new())?;
-    // Make the ingress discoverable via a component service
-    endpoint
-        .endpoint_builder()
-        // Dummy stats handler to demonstrate how to attach a custom stats handler
-        .stats_handler(mock_stats_handler)
-        .handler(ingress)
-        .start()
-        .await
-}
--- a/components/metrics/src/lib.rs
+++ b/components/metrics/src/lib.rs
--- a/components/metrics/src/main.rs
+++ b/components/metrics/src/main.rs
-// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-// SPDX-License-Identifier: Apache-2.0
-//! Metrics is a metrics aggregator designed to operate within a namespace and collect
-//! metrics from all workers.
-//!
-//! Metrics will collect for now:
-//!
-//! - LLM Worker Load:Capacity
-//!   - These metrics will be scraped by the LLM NATS Service API's stats request
-//!   - Request Slots: [Active, Total]
-//!   - KV Cache Blocks: [Active, Total]
-//! - KV Hit Rate:
-//!   - These metrics will be collected from KV hit rate events published by the KV router
-//!   - ISL Blocks: Cumulative count of total blocks in all KV hit rate events
-//!   - Overlap Blocks: Cumulative count of blocks that were already in the KV cache
-use clap::Parser;
-use dynamo_llm::kv_router::KV_HIT_RATE_SUBJECT;
-use dynamo_llm::kv_router::scheduler::KVHitRateEvent;
-use dynamo_runtime::{
-    DistributedRuntime, ErrorContext, Result, Runtime, Worker, error, logging,
-    traits::events::{EventPublisher, EventSubscriber},
-    utils::{Duration, Instant},
-};
-use futures::stream::StreamExt;
-use std::sync::Arc;
-// Import from our library
-use metrics::{
-    LLMWorkerLoadCapacityConfig, MetricsMode, PrometheusMetricsCollector, collect_endpoints,
-    extract_metrics, postprocess_metrics,
-};
-/// CLI arguments for the metrics application
-#[derive(Parser, Debug)]
-#[command(author, version, about, long_about = None)]
-struct Args {
-    /// Namespace to operate in and subscribe to events on
-    #[arg(long, env = "DYN_NAMESPACE", default_value = "dynamo")]
-    namespace: String,
-    /// Component to scrape metrics from
-    #[arg(long)]
-    component: String,
-    /// Endpoint to scrape metrics from
-    #[arg(long)]
-    endpoint: String,
-    /// Model name for the target component (optional)
-    #[arg(long)]
-    model_name: Option<String>,
-    /// Polling interval in seconds for scraping dynamo endpoint stats (minimum 1 second)
-    #[arg(long, default_value = "1")]
-    poll_interval: u64,
-    /// Host for serving or pushing prometheus metrics (default: 0.0.0.0)
-    #[arg(
-        long,
-        default_value = "0.0.0.0",
-        help_heading = "Prometheus Metrics Config"
-    )]
-    host: String,
-    /// Port to run the Prometheus metrics server on (default: 9091)
-    #[arg(
-        long,
-        default_value = "9091",
-        help_heading = "Prometheus Metrics Config"
-    )]
-    port: u16,
-    /// Push metrics to an external Prometheus Pushgateway instead of hosting them in-process
-    #[arg(long, help_heading = "Prometheus Metrics Config")]
-    push: bool,
-    /// Push interval in seconds, when using push mode (minimum 1 second, default: 2)
-    #[arg(long, default_value = "2", help_heading = "Prometheus Metrics Config")]
-    push_interval: u64,
-}
-fn get_config(args: &Args) -> Result<LLMWorkerLoadCapacityConfig> {
-    if args.component.is_empty() {
-        return Err(error!("Component name cannot be empty"));
-    }
-    if args.endpoint.is_empty() {
-        return Err(error!("Endpoint name cannot be empty"));
-    }
-    if args.poll_interval < 1 {
-        return Err(error!("Polling interval must be at least 1 second"));
-    }
-    if args.push && args.push_interval < 1 {
-        return Err(error!("Push interval must be at least 1 second"));
-    }
-    Ok(LLMWorkerLoadCapacityConfig {
-        component_name: args.component.clone(),
-        endpoint_name: args.endpoint.clone(),
-        model_name: args.model_name.clone(),
-    })
-}
-async fn app(runtime: Runtime) -> Result<()> {
-    let args = Args::parse();
-    let config = get_config(&args)?;
-    tracing::debug!("Config: {config:?}");
-    let drt = DistributedRuntime::from_settings(runtime.clone()).await?;
-    let namespace = drt.namespace(args.namespace)?;
-    let component = namespace.component("count")?;
-    // Create unique instance of Count
-    let key = format!("{}/instance", component.etcd_root());
-    tracing::debug!("Creating unique instance of Count at {key}");
-    drt.etcd_client()
-        .expect("Unreachable because of DistributedRuntime::from_settings above")
-        .kv_create(&key, serde_json::to_vec_pretty(&config)?, None)
-        .await
-        .context("Unable to create unique instance of Count; possibly one already exists")?;
-    let target_component = namespace.component(&config.component_name)?;
-    let target_endpoint = target_component.endpoint(&config.endpoint_name);
-    let service_path = target_endpoint.path();
-    let service_subject = target_endpoint.subject();
-    tracing::info!("Scraping endpoint {service_path} for stats");
-    // Safety: DistributedRuntime::from_settings ensures this is Some
-    let token = drt.primary_lease().unwrap().child_token();
-    let event_name = format!("l2c.{}.{}", config.component_name, config.endpoint_name);
-    // Initialize Prometheus metrics with the selected mode
-    let metrics_collector = PrometheusMetricsCollector::new()?;
-    let metrics_collector = Arc::new(tokio::sync::Mutex::new(metrics_collector));
-    // Start metrics collection in the selected mode
-    let metrics_mode = if args.push {
-        MetricsMode::Push {
-            host: args.host,
-            port: args.port,
-            job: "dynamo_push_metrics".to_string(),
-            interval: args.push_interval,
-        }
-    } else {
-        MetricsMode::Pull {
-            host: args.host,
-            port: args.port,
-        }
-    };
-    metrics_collector.lock().await.start(metrics_mode)?;
-    // TODO: Consider removing event subscription until metrics are more standardized
-    // Subscribe to KV hit rate events
-    let kv_hit_rate_subject = KV_HIT_RATE_SUBJECT;
-    tracing::debug!("Subscribing to KV hit rate events on subject: {kv_hit_rate_subject}");
-    // Clone fields for the event subscription task
-    let config_clone = config.clone();
-    let namespace_clone = namespace.clone();
-    let metrics_collector_clone = metrics_collector.clone();
-    // Note: Subscribing to KVHitRateEvent for illustration purposes. They're not used in production.
-    // Spawn a task to handle KV hit rate events
-    tokio::spawn(async move {
-        match namespace_clone.subscribe(kv_hit_rate_subject).await {
-            Ok(mut subscriber) => {
-                tracing::debug!("Successfully subscribed to KV hit rate events");
-                while let Some(msg) = subscriber.next().await {
-                    match serde_json::from_slice::<KVHitRateEvent>(&msg.payload) {
-                        Ok(event) => {
-                            // TODO: Lower to debug
-                            let cache_hit_pct =
-                                (event.overlap_blocks as f64 / event.isl_blocks as f64) * 100.0;
-                            tracing::debug!(
-                                "Received KV hit rate event: worker_id={}, isl_blocks={}, overlap_blocks={}, cache_hit_pct={:.2}%",
-                                event.worker_id,
-                                event.isl_blocks,
-                                event.overlap_blocks,
-                                cache_hit_pct
-                            );
-                            // Update metrics with the event data
-                            let mut metrics = metrics_collector_clone.lock().await;
-                            metrics.update_kv_hit_rate(
-                                &config_clone,
-                                event.worker_id,
-                                event.isl_blocks,
-                                event.overlap_blocks as usize,
-                            );
-                        }
-                        Err(e) => {
-                            tracing::warn!("Failed to deserialize KV hit rate event: {e}");
-                        }
-                    }
-                }
-                tracing::warn!("KV hit rate event subscription stream ended");
-            }
-            Err(e) => {
-                tracing::error!("Failed to subscribe to KV hit rate events: {:?}", e);
-            }
-        }
-    });
-    loop {
-        let next = Instant::now() + Duration::from_secs(args.poll_interval);
-        // Collect and process metrics
-        let scrape_timeout = Duration::from_secs(1);
-        let endpoints =
-            collect_endpoints(&target_component, &service_subject, scrape_timeout).await?;
-        if endpoints.is_empty() {
-            tracing::warn!("No endpoints found matching {service_path}");
-            continue;
-        }
-        let metrics = extract_metrics(&endpoints);
-        let processed = postprocess_metrics(&metrics, &endpoints);
-        if processed.endpoints.is_empty() {
-            tracing::warn!("No metrics found matching {service_path}");
-        } else {
-            tracing::info!("Aggregated metrics: {processed:?}");
-        }
-        // Update Prometheus metrics
-        metrics_collector.lock().await.update(&config, &processed);
-        // TODO: Enable KV Routers to subscribe to metrics events published here
-        // for a single view of the aggregated metrics, as opposed to the current
-        // approach where each KV Router computes and published its own metrics.
-        // Publish metrics event
-        namespace.publish(&event_name, &processed).await?;
-        // Wait until cancelled or the next tick
-        match tokio::time::timeout_at(next, token.cancelled()).await {
-            Ok(_) => break,
-            Err(_) => continue,
-        }
-    }
-    Ok(())
-}
-fn main() -> Result<()> {
-    logging::init();
-    let worker = Worker::from_settings()?;
-    worker.execute(app)
-}
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use std::env;
-    #[test]
-    fn test_namespace_from_env() {
-        unsafe { env::set_var("DYN_NAMESPACE", "test-namespace") };
-        let args = Args::parse_from(["count", "--component", "comp", "--endpoint", "end"]);
-        assert_eq!(args.namespace, "test-namespace");
-    }
-}
--- a/deploy/metrics/README.md
+++ b/deploy/metrics/README.md
@@ -174,7 +174,7 @@ The following configuration files should be present in this directory:
 - [grafana_dashboards/grafana-dashboard-providers.yml](./grafana_dashboards/grafana-dashboard-providers.yml): Contains Grafana dashboard provider configuration
 - [grafana_dashboards/grafana-dynamo-dashboard.json](./grafana_dashboards/grafana-dynamo-dashboard.json): A general Dynamo Dashboard for both SW and HW metrics.
 - [grafana_dashboards/grafana-dcgm-metrics.json](./grafana_dashboards/grafana-dcgm-metrics.json): Contains Grafana dashboard configuration for DCGM GPU metrics
- [grafana_dashboards/grafana-llm-metrics.json](./grafana_dashboards/grafana-llm-metrics.json): This file, which is being phased out, contains the Grafana dashboard configuration for LLM-specific metrics. It requires an additional `metrics` component to operate concurrently. A new version is under development.
+- [grafana_dashboards/grafana-kvbm-dashboard.json](./grafana_dashboards/grafana-kvbm-dashboard.json): Contains Grafana dashboard configuration for KVBM metrics
 ### Metric Name Constants
@@ -237,8 +237,6 @@ This centralized approach ensures all Dynamo components use consistent, valid Pr
   - DCGM Exporter: `http://localhost:9401/metrics`
-   - Start the [components/metrics](../../components/metrics/README.md) application to begin monitoring for metric events from dynamo workers and aggregating them on a Prometheus metrics endpoint: `http://localhost:9091/metrics`.
-   - Uncomment the appropriate lines in prometheus.yml to poll port 9091.
   - Start worker(s) that publishes KV Cache metrics: [lib/runtime/examples/service_metrics/README.md](../../lib/runtime/examples/service_metrics/README.md) can populate dummy KV Cache metrics.
 ### Configuration
@@ -275,7 +273,7 @@ Grafana is pre-configured with:
  docker compose logs grafana
  ```
-3. For issues with the legacy metrics component (being phased out), see [components/metrics/README.md](../../components/metrics/README.md) for details on the exposed metrics and troubleshooting steps.
+3. Check Prometheus targets at `http://localhost:9090/targets` to verify metric collection.
 ## Developer Guide
@@ -477,21 +475,6 @@ let requests_total = namespace.create_counter(
 )?;
 ```
-## Running the deprecated `components/metrics` program
-⚠️ **DEPRECATION NOTICE** ⚠️
-When you run the example [components/metrics](../../components/metrics/README.md) program, it exposes a Prometheus /metrics endpoint with the following metrics (defined in [components/metrics/src/lib.rs](../../components/metrics/src/lib.rs)):
-**⚠️ The following `llm_kv_*` metrics are deprecated:**
- `llm_requests_active_slots`: Active request slots per worker
- `llm_requests_total_slots`: Total available request slots per worker
- `llm_kv_blocks_active`: Active KV blocks per worker
- `llm_kv_blocks_total`: Total KV blocks available per worker
- `llm_kv_hit_rate_percent`: KV Cache hit percent per worker
- `llm_load_avg`: Average load across workers
- `llm_load_std`: Load standard deviation across workers
 ## Troubleshooting
@@ -506,4 +489,4 @@ When you run the example [components/metrics](../../components/metrics/README.md
  docker compose logs grafana
  ```
-3. For issues with the legacy metrics component (being phased out), see [components/metrics/README.md](../../components/metrics/README.md) for details on the exposed metrics and troubleshooting steps.
+3. Check Prometheus targets at `http://localhost:9090/targets` to verify metric collection.
--- a/deploy/metrics/grafana_dashboards/grafana-llm-metrics.json
+++ b/deploy/metrics/grafana_dashboards/grafana-llm-metrics.json
--- a/deploy/metrics/prometheus.yml
+++ b/deploy/metrics/prometheus.yml
@@ -33,13 +33,13 @@ scrape_configs:
    static_configs:
      - targets: ['dcgm-exporter:9401']  # on the "monitoring" network
-  # This is a demo service that needs to be launched manually. See components/metrics/README.md
+  # This is a demo service that needs to be launched manually
-  # Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 8080/tcp
+  # Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 8000/tcp
-  # You can also force the port, if the default is different: python -m dynamo.frontend --http-port 8080
+  # You can also force the port, if the default is different: python -m dynamo.frontend --http-port 8000
  - job_name: 'dynamo-frontend'
    scrape_interval: 10s
    static_configs:
-      - targets: ['host.docker.internal:8080']  # on the "monitoring" network
+      - targets: ['host.docker.internal:8000']  # on the "monitoring" network
  # Launch via: DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 dynamo.<backend> ...
  # If you want to update the scrape_interval, you may want to also update component.rs's MAX_DELAY
@@ -48,15 +48,6 @@ scrape_configs:
    static_configs:
      - targets: ['host.docker.internal:8081']
-  # DEPRECATED: This metrics aggregation service is being deprecated in favor of MetricsRegistry
-  # The new system uses the 'dynamo-backend' job above instead of this separate service
-  # This is another demo aggregator that needs to be launched manually. See components/metrics/README.md
-  # Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 9091/tcp
-  - job_name: 'metrics-aggregation-service'
-    scrape_interval: 2s
-    static_configs:
-      # - targets: ['localhost:9091']  # metrics aggregation service on host
-      - targets: ['host.docker.internal:9091']  # metrics aggregation service on host
  # KVBM leader related metrics
  - job_name: 'kvbm-metrics'