refactor: rename KvMetricsPublisher to WorkerMetricsPublisher (#1284)

2f8da9ad · Alec · GitHub · 9210a26d · 2f8da9ad · 2f8da9ad
Unverified Commit 2f8da9ad authored May 30, 2025 by Alec Committed by GitHub May 30, 2025
15 changed files
--- a/components/metrics/README.md
+++ b/components/metrics/README.md
@@ -82,7 +82,7 @@ metrics --component VllmWorker --endpoint load_metrics

 **NOTE**: `load_metrics` is currently a
 [hard-coded](https://github.com/ai-dynamo/dynamo/blob/d5220c7b1151372ba3d2a061c7d0a7ed72724789/lib/llm/src/kv_router/publisher.rs#L108)
-endpoint name used for python-based workers that register a `KvMetricsPublisher`.
+endpoint name used for python-based workers that register a `WorkerMetricsPublisher`.

 ## Visualization


--- a/docs/architecture/kv_cache_routing.md
+++ b/docs/architecture/kv_cache_routing.md
@@ -113,7 +113,7 @@ In the above image, our cost function is (KV match - Load) so we select Worker 2

 ## Events

-In Dynamo, we want to support KV Cache Routing and load balancing for many backends that have different implementations of KV Cache and record different metrics. To that end, we built a KVPublisher that can be plugged into any framework to publish KV Events and a KvMetricsPublisher that can publish Metric Events.
+In Dynamo, we want to support KV Cache Routing and load balancing for many backends that have different implementations of KV Cache and record different metrics. To that end, we built a KVPublisher that can be plugged into any framework to publish KV Events and a WorkerMetricsPublisher that can publish Metric Events.

 On the receiving side we have a KVIndexer which accepts events from the KVPublisher and puts them into a global prefix tree and a KvMetricsAggregator which aggregates metric events by worker.

@@ -174,7 +174,7 @@ Sample Output:
 This example is designed to help you understand KV cache routing; it won't run outside of the context of dynamo serve. See the examples/ directory for runnable examples.
 ```

-### KvMetricsPublisher
+### WorkerMetricsPublisher
 We added a KvMetrics Publisher which sends the following metrics to the KvMetricsAggregator:
 - num_requests_waiting
 - gpu_cache_usage_perc
@@ -184,7 +184,7 @@ We added a KvMetrics Publisher which sends the following metrics to the KvMetric
 - kv_active_blocks
 - kv_total_blocks

-Currently, the KvMetricsPublisher exists as a Python binding.
+Currently, the WorkerMetricsPublisher exists as a Python binding.

 ### KvMetricsAggregator
 The KvMetricsAggregator receives these metrics and aggregates them. It has a method `get_metrics` which returns an object of `AggregatedMetrics`.

--- a/docs/guides/backend.md
+++ b/docs/guides/backend.md
@@ -210,7 +210,7 @@ KV-aware routing is a powerful feature of Dynamo that optimizes for routing
 requests to specific workers while minimizing a specific KV-cache based cost function.

 In its simplest form, all a worker needs to do to enable KV-aware routing is to
-publish KV metrics through the `KvMetricsPublisher`, which is consumed
+publish KV metrics through the `WorkerMetricsPublisher`, which is consumed
 by a Dynamo KV Router through the `KvMetricsAggregator`:

 ```python
@@ -221,7 +221,7 @@ import logging
 import random

 from pydantic import BaseModel
-from dynamo.llm import KvMetricsPublisher
+from dynamo.llm import WorkerMetricsPublisher
 from dynamo.sdk import endpoint, service, dynamo_context

 logger = logging.getLogger(__name__)
@@ -241,7 +241,7 @@ class YourWorker:
    def __init__(self):
        # Initialize metrics publisher from Dynamo
        self.component = dynamo_context["component"]
-        self.metrics_publisher = KvMetricsPublisher()
+        self.metrics_publisher = WorkerMetricsPublisher()
        # Register an endpoint for consumers of the KV Metrics
        # (KvMetricsAggregator) to listen/gather on.
        self.metrics_publisher.create_endpoint(self.component)

--- a/examples/llm/components/worker.py
+++ b/examples/llm/components/worker.py
@@ -31,7 +31,7 @@ from vllm.entrypoints.openai.api_server import (
 from vllm.remote_prefill import RemotePrefillParams, RemotePrefillRequest
 from vllm.sampling_params import RequestOutputKind

-from dynamo.llm import KvMetricsPublisher
+from dynamo.llm import WorkerMetricsPublisher
 from dynamo.sdk import async_on_start, depends, dynamo_context, endpoint, service

 logger = logging.getLogger(__name__)
@@ -87,7 +87,7 @@ class VllmWorker:
            os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
            os.environ["VLLM_KV_COMPONENT"] = class_name

-        self.metrics_publisher = KvMetricsPublisher()
+        self.metrics_publisher = WorkerMetricsPublisher()

        signal.signal(signal.SIGTERM, self.shutdown_vllm_engine)
        signal.signal(signal.SIGINT, self.shutdown_vllm_engine)

--- a/examples/tensorrt_llm/common/base_engine.py
+++ b/examples/tensorrt_llm/common/base_engine.py
@@ -42,7 +42,7 @@ from tensorrt_llm.llmapi.disagg_utils import (
 )
 from tensorrt_llm.serve.openai_protocol import DisaggregatedParams

-from dynamo.llm import KvEventPublisher, KvMetricsPublisher
+from dynamo.llm import KvEventPublisher, WorkerMetricsPublisher
 from dynamo.sdk import dynamo_context

 logger = logging.getLogger(__name__)
@@ -134,7 +134,7 @@ class BaseTensorrtLLMEngine:
            self._publish_events = False

        if self._publish_stats:
-            self._kv_metrics_publisher = KvMetricsPublisher()
+            self._kv_metrics_publisher = WorkerMetricsPublisher()

        if self._publish_events:
            if self._worker_id is None:

--- a/examples/vllm_v0/components/worker.py
+++ b/examples/vllm_v0/components/worker.py
@@ -34,7 +34,7 @@ from vllm.inputs import TokensPrompt
 from vllm.remote_prefill import RemotePrefillParams, RemotePrefillRequest
 from vllm.sampling_params import RequestOutputKind

-from dynamo.llm import KvMetricsPublisher, ModelType, register_llm
+from dynamo.llm import ModelType, WorkerMetricsPublisher, register_llm
 from dynamo.sdk import async_on_start, depends, dynamo_context, endpoint, service

 logger = logging.getLogger(__name__)
@@ -90,7 +90,7 @@ class VllmWorker:
            os.environ["VLLM_KV_NAMESPACE"] = "dynamo"
            os.environ["VLLM_KV_COMPONENT"] = class_name

-        self.metrics_publisher = KvMetricsPublisher()
+        self.metrics_publisher = WorkerMetricsPublisher()

        model_config = self.engine_args.create_model_config()
        self.default_sampling_params = model_config.get_diff_sampling_param()

--- a/launch/dynamo-run/src/subprocess/vllm_inc.py
+++ b/launch/dynamo-run/src/subprocess/vllm_inc.py
@@ -26,7 +26,7 @@ from vllm.entrypoints.openai.api_server import (
 )
 from vllm.inputs import TokensPrompt

-from dynamo.llm import KvMetricsPublisher, ModelType, register_llm
+from dynamo.llm import ModelType, WorkerMetricsPublisher, register_llm
 from dynamo.runtime import DistributedRuntime, dynamo_worker

 # Only used if you run it manually from the command line
@@ -59,7 +59,7 @@ class RequestHandler:
        self.component = component
        self.engine_client = engine
        self.default_sampling_params = default_sampling_params
-        self.metrics_publisher = KvMetricsPublisher()
+        self.metrics_publisher = WorkerMetricsPublisher()

    def setup_kv_metrics(self):
        if not hasattr(self.engine_client, "set_metrics_publisher"):

--- a/launch/dynamo-run/src/subprocess/vllm_v1_inc.py
+++ b/launch/dynamo-run/src/subprocess/vllm_v1_inc.py
@@ -33,8 +33,8 @@ from vllm.v1.metrics.loggers import StatLoggerBase
 from vllm.v1.metrics.stats import IterationStats, SchedulerStats

 from dynamo.llm import (
-    KvMetricsPublisher,
    ModelType,
+    WorkerMetricsPublisher,
    ZmqKvEventPublisher,
    ZmqKvEventPublisherConfig,
    register_llm,
@@ -64,10 +64,10 @@ class Config:


 class DynamoStatLoggerPublisher(StatLoggerBase):
-    """Stat logger publisher. Wrapper for the KvMetricsPublisher to match the StatLoggerBase interface."""
+    """Stat logger publisher. Wrapper for the WorkerMetricsPublisher to match the StatLoggerBase interface."""

    def __init__(self, component: Component, dp_rank: int) -> None:
-        self.inner = KvMetricsPublisher()
+        self.inner = WorkerMetricsPublisher()
        self.inner.create_endpoint(component)
        self.dp_rank = dp_rank


--- a/lib/bindings/python/rust/lib.rs
+++ b/lib/bindings/python/rust/lib.rs
@@ -51,7 +51,7 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
    m.add_class::<AsyncResponseStream>()?;
    m.add_class::<llm::kv::KvRouter>()?;
    m.add_class::<llm::disagg_router::DisaggregatedRouter>()?;
-    m.add_class::<llm::kv::KvMetricsPublisher>()?;
+    m.add_class::<llm::kv::WorkerMetricsPublisher>()?;
    m.add_class::<llm::model_card::ModelDeploymentCard>()?;
    m.add_class::<llm::preprocessor::OAIChatPreprocessor>()?;
    m.add_class::<llm::backend::Backend>()?;

--- a/lib/bindings/python/rust/llm/kv.rs
+++ b/lib/bindings/python/rust/llm/kv.rs
@@ -63,15 +63,16 @@ impl KvRouter {
 }

 #[pyclass]
-pub(crate) struct KvMetricsPublisher {
-    inner: Arc<llm_rs::kv_router::publisher::KvMetricsPublisher>,
+pub(crate) struct WorkerMetricsPublisher {
+    inner: Arc<llm_rs::kv_router::publisher::WorkerMetricsPublisher>,
 }

 #[pymethods]
-impl KvMetricsPublisher {
+impl WorkerMetricsPublisher {
    #[new]
    fn new() -> PyResult<Self> {
-        let inner = llm_rs::kv_router::publisher::KvMetricsPublisher::new().map_err(to_pyerr)?;
+        let inner =
+            llm_rs::kv_router::publisher::WorkerMetricsPublisher::new().map_err(to_pyerr)?;
        Ok(Self {
            inner: inner.into(),
        })

--- a/lib/bindings/python/src/dynamo/_core.pyi
+++ b/lib/bindings/python/src/dynamo/_core.pyi
@@ -344,16 +344,16 @@ class DisaggregatedRouter:
        """
        ...

-class KvMetricsPublisher:
+class WorkerMetricsPublisher:
    """
-    A metrics publisher will provide KV metrics to the router.
+    A metrics publisher will provide metrics to the router.
    """

    ...

    def __init__(self) -> None:
        """
-        Create a `KvMetricsPublisher` object
+        Create a `WorkerMetricsPublisher` object
        """

    def create_service(self, component: Component) -> None:

--- a/lib/bindings/python/src/dynamo/llm/__init__.py
+++ b/lib/bindings/python/src/dynamo/llm/__init__.py
@@ -29,11 +29,11 @@ from dynamo._core import HttpService as HttpService
 from dynamo._core import KvEventPublisher as KvEventPublisher
 from dynamo._core import KvIndexer as KvIndexer
 from dynamo._core import KvMetricsAggregator as KvMetricsAggregator
-from dynamo._core import KvMetricsPublisher as KvMetricsPublisher
 from dynamo._core import KvRecorder as KvRecorder
 from dynamo._core import KvRouter as KvRouter
 from dynamo._core import ModelType as ModelType
 from dynamo._core import OverlapScores as OverlapScores
+from dynamo._core import WorkerMetricsPublisher as WorkerMetricsPublisher
 from dynamo._core import ZmqKvEventPublisher as ZmqKvEventPublisher
 from dynamo._core import ZmqKvEventPublisherConfig as ZmqKvEventPublisherConfig
 from dynamo._core import register_llm as register_llm

--- a/lib/bindings/python/src/dynamo/llm/tensorrtllm/publisher.py
+++ b/lib/bindings/python/src/dynamo/llm/tensorrtllm/publisher.py
@@ -11,7 +11,7 @@ from contextlib import asynccontextmanager
 from queue import Queue
 from typing import Callable, Optional, Union

-from dynamo.llm import KvEventPublisher, KvMetricsPublisher
+from dynamo.llm import KvEventPublisher, WorkerMetricsPublisher

 logging.basicConfig(level=logging.DEBUG)

@@ -113,7 +113,7 @@ class Publisher:

    def initialize(self):
        # Setup the metrics publisher
-        self.metrics_publisher = KvMetricsPublisher()
+        self.metrics_publisher = WorkerMetricsPublisher()
        self._init_publish_metrics_thread()
        task = asyncio.create_task(self._create_metrics_publisher_endpoint())
        task.add_done_callback(

--- a/lib/bindings/python/tests/test_kv_bindings.py
+++ b/lib/bindings/python/tests/test_kv_bindings.py
@@ -28,7 +28,7 @@ from dynamo.llm import (
    KvEventPublisher,
    KvIndexer,
    KvMetricsAggregator,
-    KvMetricsPublisher,
+    WorkerMetricsPublisher,
 )
 from dynamo.runtime import Component, DistributedRuntime

@@ -256,7 +256,7 @@ async def test_metrics_aggregator(distributed_runtime):


 async def metrics_publisher_task(kv_listener, expected_metrics):
-    metrics_publisher = KvMetricsPublisher()
+    metrics_publisher = WorkerMetricsPublisher()
    metrics_publisher.publish(
        expected_metrics["request_active_slots"],
        expected_metrics["request_total_slots"],

--- a/lib/llm/src/kv_router/publisher.rs
+++ b/lib/llm/src/kv_router/publisher.rs
@@ -463,15 +463,15 @@ enum RawKvEvent {
 // Metrics Publishers ------------------------------------------------------
 // -------------------------------------------------------------------------

-pub struct KvMetricsPublisher {
+pub struct WorkerMetricsPublisher {
    tx: tokio::sync::watch::Sender<Arc<ForwardPassMetrics>>,
    rx: tokio::sync::watch::Receiver<Arc<ForwardPassMetrics>>,
 }

-impl KvMetricsPublisher {
+impl WorkerMetricsPublisher {
    pub fn new() -> Result<Self> {
        let (tx, rx) = tokio::sync::watch::channel(Arc::new(ForwardPassMetrics::default()));
-        Ok(KvMetricsPublisher { tx, rx })
+        Ok(WorkerMetricsPublisher { tx, rx })
    }

    pub fn publish(