refactor: rename count to metrics and move location (#21)

ac13ed06 · Neelay Shah · GitHub · 1b96c2c4 · ac13ed06 · 1b96c2c4
Commit ac13ed06 authored Mar 06, 2025 by Neelay Shah Committed by GitHub Mar 06, 2025
15 changed files
--- a/.github/workflows/pre-merge-rust.yml
+++ b/.github/workflows/pre-merge-rust.yml
@@ -40,7 +40,7 @@ jobs:
  pre-merge-rust:
    runs-on: ubuntu-latest
    strategy:
-      matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynemo-run', 'applications/llm/count', 'examples/rust'] }
+      matrix: { dir: ['lib/runtime', 'lib/llm', 'lib/bindings/c', 'lib/bindings/python', 'launch/dynemo-run', 'components/metrics', 'examples/rust'] }
    permissions:
      contents: read
    steps:

--- a/applications/llm/count/visualization/docker-compose.yml
+++ b/applications/llm/count/visualization/docker-compose.yml
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
-# SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-services:
-  prometheus:
-    image: prom/prometheus:latest
-    container_name: prometheus
-    volumes:
-      - ./prometheus.yml:/etc/prometheus/prometheus.yml
-    command:
-      - '--config.file=/etc/prometheus/prometheus.yml'
-      - '--storage.tsdb.path=/prometheus'
-      # These provide the web console functionality
-      - '--web.console.libraries=/etc/prometheus/console_libraries'
-      - '--web.console.templates=/etc/prometheus/consoles'
-      - '--web.enable-lifecycle'
-    restart: unless-stopped
-    # TODO: Use more explicit networking setup when count is containerized
-    #ports:
-    #  - "9090:9090"
-    #networks:
-    #  - monitoring
-    network_mode: "host"
-
-  grafana:
-    image: grafana/grafana-enterprise:latest
-    container_name: grafana
-    volumes:
-      - ./grafana.json:/etc/grafana/provisioning/dashboards/llm-worker-dashboard.json
-      - ./grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
-      - ./grafana-dashboard-providers.yml:/etc/grafana/provisioning/dashboards/dashboard-providers.yml
-    environment:
-      - GF_SECURITY_ADMIN_USER=admin
-      - GF_SECURITY_ADMIN_PASSWORD=admin
-      - GF_USERS_ALLOW_SIGN_UP=false
-      - GF_INSTALL_PLUGINS=grafana-piechart-panel
-      # Default min interval is 5s, but can be configured lower
-      - GF_DASHBOARDS_MIN_REFRESH_INTERVAL=2s
-    restart: unless-stopped
-    # TODO: Use more explicit networking setup when count is containerized
-    #ports:
-    #  - "3000:3000"
-    #networks:
-    #  - monitoring
-    network_mode: "host"
-    depends_on:
-      - prometheus
-
-networks:
-  monitoring:
-    driver: bridge
--- a/applications/llm/count/Cargo.lock
+++ b/applications/llm/count/Cargo.lock
@@ -731,27 +731,6 @@ version = "0.8.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"

-[[package]]
-name = "count"
-version = "0.1.0"
-dependencies = [
- "axum 0.6.20",
- "clap",
- "dynemo-llm",
- "dynemo-runtime",
- "futures",
- "opentelemetry",
- "opentelemetry-prometheus",
- "prometheus",
- "rand",
- "reqwest 0.11.27",
- "serde",
- "serde_json",
- "thiserror 1.0.69",
- "tokio",
- "tracing",
-]
-
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -2216,6 +2195,27 @@ dependencies = [
 "autocfg",
 ]

+[[package]]
+name = "metrics"
+version = "0.1.0"
+dependencies = [
+ "axum 0.6.20",
+ "clap",
+ "dynemo-llm",
+ "dynemo-runtime",
+ "futures",
+ "opentelemetry",
+ "opentelemetry-prometheus",
+ "prometheus",
+ "rand",
+ "reqwest 0.11.27",
+ "serde",
+ "serde_json",
+ "thiserror 1.0.69",
+ "tokio",
+ "tracing",
+]
+
 [[package]]
 name = "mime"
 version = "0.3.17"

--- a/applications/llm/count/Cargo.toml
+++ b/applications/llm/count/Cargo.toml
@@ -14,15 +14,16 @@
 # limitations under the License.

 [package]
-name = "count"
+name = "metrics"
 version = "0.1.0"
 edition = "2021"
 license = "Apache-2.0"

 [dependencies]
 # local
-dynemo-runtime = { path = "../../../lib/runtime" }
-dynemo-llm = { path = "../../../lib/llm" }
+
+dynemo-runtime = { path = "../../lib/runtime" }
+dynemo-llm = { path = "../../lib/llm" }

 # workspace - todo


--- a/applications/llm/count/README.md
+++ b/applications/llm/count/README.md
-# Count
+# Metrics

 ## Quickstart

-To start `count`, simply point it at the namespace/component/endpoint trio that
+To start `metrics`, simply point it at the namespace/component/endpoint trio that
 you're interested in observing metrics from. This will scrape statistics from
 the services associated with that endpoint, do some postprocessing on them,
 and then publish an event with the postprocessed data.

 ```bash
 # For more details, try DYN_LOG=debug
-DYN_LOG=info cargo run --bin count -- --namespace dynemo --component backend --endpoint generate
+DYN_LOG=info cargo run --bin metrics -- --namespace dynemo --component backend --endpoint generate

-# 2025-02-26T18:45:05.467026Z  INFO count: Creating unique instance of Count at dynemo/components/count/instance
-# 2025-02-26T18:45:05.472146Z  INFO count: Scraping service dynemo_init_backend_720278f8 and filtering on subject dynemo_init_backend_720278f8.generate
+# 2025-02-26T18:45:05.467026Z  INFO metrics: Creating unique instance of Metrics at dynemo/components/metrics/instance
+# 2025-02-26T18:45:05.472146Z  INFO metrics: Scraping service dynemo_backend_720278f8 and filtering on subject dynemo_backend_720278f8.generate
 # ...
 ```

 With no matching endpoints running, you should see warnings in the logs:
 ```bash
-2025-02-26T18:45:06.474161Z  WARN count: No endpoints found matching subject dynemo_init_backend_720278f8.generate
+2025-02-26T18:45:06.474161Z  WARN metrics: No endpoints found matching subject dynemo_backend_720278f8.generate
 ```

 To see metrics published to a matching endpoint, you can use the
@@ -32,10 +32,10 @@ cargo run --bin mock_worker
 After a matching endpoint gets started, you should see the warnings go away
 since the endpoint will automatically get discovered.

-When stats are found from the target endpoints being listened on, count will
-aggregate and publish some metrics as both an event and to a prometheus web server:
+When stats are found from target endpoints, the metrics component will
+aggregate and publish metrics as both events and as updates to a prometheus server:
 ```
-2025-02-28T04:05:58.077901Z  INFO count: Aggregated metrics: ProcessedEndpoints { endpoints: [Endpoint { name: "worker-7587884888253033398", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06bb6", data: ForwardPassMetrics { request_active_slots: 58, request_total_slots: 100, kv_active_blocks: 77, kv_total_blocks: 100 } }, Endpoint { name: "worker-7587884888253033401", subject: "dynemo_init_backend_720278f8.generate-694d951a80e06bb9", data: ForwardPassMetrics { request_active_slots: 71, request_total_slots: 100, kv_active_blocks: 29, kv_total_blocks: 100 } }], worker_ids: [7587884888253033398, 7587884888253033401], load_avg: 53.0, load_std: 24.0 }
+2025-02-28T04:05:58.077901Z  INFO metrics: Aggregated metrics: ProcessedEndpoints { endpoints: [Endpoint { name: "worker-7587884888253033398", subject: "dynemo_backend_720278f8.generate-694d951a80e06bb6", data: ForwardPassMetrics { request_active_slots: 58, request_total_slots: 100, kv_active_blocks: 77, kv_total_blocks: 100 } }, Endpoint { name: "worker-7587884888253033401", subject: "dynemo_backend_720278f8.generate-694d951a80e06bb9", data: ForwardPassMetrics { request_active_slots: 71, request_total_slots: 100, kv_active_blocks: 29, kv_total_blocks: 100 } }], worker_ids: [7587884888253033398, 7587884888253033401], load_avg: 53.0, load_std: 24.0 }
 ```

 To see the metrics being published in prometheus format, you can run:

--- a/applications/llm/count/src/bin/mock_worker.rs
+++ b/applications/llm/count/src/bin/mock_worker.rs
--- a/applications/llm/count/src/lib.rs
+++ b/applications/llm/count/src/lib.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! Library functions for the count application.
+//! Library functions for the metrics application.

 use axum::{routing::get, Router};
 use prometheus::{register_counter_vec, register_gauge_vec};

--- a/applications/llm/count/src/main.rs
+++ b/applications/llm/count/src/main.rs
@@ -13,7 +13,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-//! Count is a metrics aggregator designed to operate within a namespace and collect
+//! Metrics is a metrics aggregator designed to operate within a namespace and collect
 //! metrics from all workers.
 //!
 //! Metrics will collect for now:
@@ -38,12 +38,12 @@ use futures::stream::StreamExt;
 use std::sync::Arc;

 // Import from our library
-use count::{
+use metrics::{
    collect_endpoints, extract_metrics, postprocess_metrics, LLMWorkerLoadCapacityConfig,
    PrometheusMetricsServer,
 };

-/// CLI arguments for the count application
+/// CLI arguments for the metrics application
 #[derive(Parser, Debug)]
 #[command(author, version, about, long_about = None)]
 struct Args {

--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@@ -29,3 +29,50 @@ services:
    ports:
      - 2379:2379
      - 2380:2380
+
+  prometheus:
+    image: prom/prometheus:latest
+    container_name: prometheus
+    volumes:
+      - ./metrics/prometheus.yml:/etc/prometheus/prometheus.yml
+    command:
+      - '--config.file=/etc/prometheus/prometheus.yml'
+      - '--storage.tsdb.path=/prometheus'
+      # These provide the web console functionality
+      - '--web.console.libraries=/etc/prometheus/console_libraries'
+      - '--web.console.templates=/etc/prometheus/consoles'
+      - '--web.enable-lifecycle'
+    restart: unless-stopped
+    # TODO: Use more explicit networking setup when metrics is containerized
+    #ports:
+    #  - "9090:9090"
+    #networks:
+    #  - monitoring
+    network_mode: "host"
+    profiles: [metrics]
+
+  grafana:
+    image: grafana/grafana-enterprise:latest
+    container_name: grafana
+    volumes:
+      - ./metrics/grafana.json:/etc/grafana/provisioning/dashboards/llm-worker-dashboard.json
+      - ./metrics/grafana-datasources.yml:/etc/grafana/provisioning/datasources/datasources.yml
+      - ./metrics/grafana-dashboard-providers.yml:/etc/grafana/provisioning/dashboards/dashboard-providers.yml
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_USERS_ALLOW_SIGN_UP=false
+      - GF_INSTALL_PLUGINS=grafana-piechart-panel
+      # Default min interval is 5s, but can be configured lower
+      - GF_DASHBOARDS_MIN_REFRESH_INTERVAL=2s
+    restart: unless-stopped
+    # TODO: Use more explicit networking setup when metrics is containerized
+    #ports:
+    #  - "3000:3000"
+    #networks:
+    #  - monitoring
+    network_mode: "host"
+    profiles: [metrics]
+    depends_on:
+      - prometheus
+
--- a/applications/llm/count/visualization/README.md
+++ b/applications/llm/count/visualization/README.md
@@ -11,17 +11,23 @@ This directory contains configuration for visualizing metrics from the metrics a

 1. Make sure Docker and Docker Compose are installed on your system

-2. Start `count` and the corresponding `examples/rust/service_metrics/bin/server.rs` that populates dummy KV Cache metrics.
+2. Start the `components/metrics` application to begin monitoring for metric events from dynemo workers
+   and aggregating them on a prometheus metrics endpoint: `http://localhost:9091/metrics`.

-3. Start the visualization stack:
+3. Start worker(s) that publishes KV Cache metrics.
+  - For quick testing, `examples/rust/service_metrics/bin/server.rs` can populate dummy KV Cache metrics.
+  - For a real workflow with real data, see the KV Routing example in `examples/python_rs/llm/vllm`.
+
+4. Start the visualization stack:

  ```bash
-  docker compose up -d
+  docker compose --profile metrics up -d
  ```

-4. Web servers started:
-   - Grafana: http://localhost:3000 (default login: admin/admin)
-   - Prometheus: http://localhost:9090
+5. Web servers started:
+   - Grafana: `http://localhost:3000` (default login: admin/admin) (started by docker compose)
+   - Prometheus Server: `http://localhost:9090` (started by docker compose)
+   - Prometheus Metrics Endpoint: `http://localhost:9091/metrics` (started by `components/metrics` application)

 ## Configuration

@@ -40,7 +46,7 @@ Grafana is pre-configured with:
 ## Required Files

 The following configuration files should be present in this directory:
- `docker-compose.yml`: Defines the Prometheus and Grafana services
+- `..\docker-compose.yml`: Defines the Prometheus and Grafana services
 - `prometheus.yml`: Contains Prometheus scraping configuration
 - `grafana.json`: Contains Grafana dashboard configuration
 - `grafana-datasources.yml`: Contains Grafana datasource configuration
@@ -55,6 +61,8 @@ The prometheus service exposes the following metrics:
 - `llm_requests_total_slots`: Total available request slots
 - `llm_kv_blocks_active`: Number of active KV blocks
 - `llm_kv_blocks_total`: Total KV blocks available
+- `llm_kv_hit_rate_isl_blocks`: Cumulative count of ISL blocks in KV hit rate events
+- `llm_kv_hit_rate_overlap_blocks`: Cumulative count of overlapping blocks in KV hit rate events

 ## Troubleshooting


--- a/applications/llm/count/visualization/grafana-dashboard-providers.yml
+++ b/applications/llm/count/visualization/grafana-dashboard-providers.yml
--- a/applications/llm/count/visualization/grafana-datasources.yml
+++ b/applications/llm/count/visualization/grafana-datasources.yml
--- a/applications/llm/count/visualization/grafana.json
+++ b/applications/llm/count/visualization/grafana.json
--- a/applications/llm/count/visualization/prometheus.yml
+++ b/applications/llm/count/visualization/prometheus.yml
--- a/dynemo.code-workspace
+++ b/dynemo.code-workspace
@@ -6,7 +6,7 @@
    ],
    "settings": {
        "rust-analyzer.linkedProjects": [
-            "applications/llm/count/Cargo.toml",
+            "components/metrics/Cargo.toml",
            "lib/llm/Cargo.toml",
            "lib/runtime/Cargo.toml",
            "lib/bindings/python/Cargo.toml",