Unverified Commit f67dc38b authored by Alec's avatar Alec Committed by GitHub
Browse files

fix: Renamed event publisher classes and configuration (#1273)

parent d3a7587a
...@@ -5,11 +5,10 @@ ...@@ -5,11 +5,10 @@
# Can also be used standalone: `python3 vllm_inc.py` - lots of optional cmd line params # Can also be used standalone: `python3 vllm_inc.py` - lots of optional cmd line params
# Setup checklist: # Setup checklist:
# - We are in a virtualenv with vllm installed. Must be newer than v0.9.0 (currently pre-release) # - We are in a virtualenv with vllm installed. V1 is compatible with v0.9.0
# 1f079540db5f1080a2f61a730da50d3009934c5a - this commit is working for me
# Steps: # Steps:
# git clone https://github.com/vllm-project/vllm.git # git clone https://github.com/vllm-project/vllm.git
# cd vllm && git checkout 1f079540db5f1080a2f61a730da50d3009934c5a # cd vllm && git checkout v0.9.0
# uv pip uninstall ai-dynamo-vllm # uv pip uninstall ai-dynamo-vllm
# VLLM_USE_PRECOMPILED=1 uv pip install --editable . # VLLM_USE_PRECOMPILED=1 uv pip install --editable .
...@@ -34,10 +33,10 @@ from vllm.v1.metrics.loggers import StatLoggerBase ...@@ -34,10 +33,10 @@ from vllm.v1.metrics.loggers import StatLoggerBase
from vllm.v1.metrics.stats import IterationStats, SchedulerStats from vllm.v1.metrics.stats import IterationStats, SchedulerStats
from dynamo.llm import ( from dynamo.llm import (
KvEventPublisherFromZmq,
KvEventPublisherFromZmqConfig,
KvMetricsPublisher, KvMetricsPublisher,
ModelType, ModelType,
ZmqKvEventPublisher,
ZmqKvEventPublisherConfig,
register_llm, register_llm,
) )
from dynamo.runtime import Component, DistributedRuntime, dynamo_worker from dynamo.runtime import Component, DistributedRuntime, dynamo_worker
...@@ -248,11 +247,11 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -248,11 +247,11 @@ async def init(runtime: DistributedRuntime, config: Config):
logger.info("VllmWorker has been initialized") logger.info("VllmWorker has been initialized")
zmq_config = KvEventPublisherFromZmqConfig( zmq_config = ZmqKvEventPublisherConfig(
worker_id=endpoint.lease_id(), kv_block_size=engine_args.block_size worker_id=endpoint.lease_id(), kv_block_size=engine_args.block_size
) )
_ = KvEventPublisherFromZmq(component=component, config=zmq_config) _ = ZmqKvEventPublisher(component=component, config=zmq_config)
handler = RequestHandler(component, engine_client, default_sampling_params) handler = RequestHandler(component, engine_client, default_sampling_params)
......
...@@ -61,8 +61,8 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { ...@@ -61,8 +61,8 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<llm::kv::AggregatedMetrics>()?; m.add_class::<llm::kv::AggregatedMetrics>()?;
m.add_class::<llm::kv::KvMetricsAggregator>()?; m.add_class::<llm::kv::KvMetricsAggregator>()?;
m.add_class::<llm::kv::KvEventPublisher>()?; m.add_class::<llm::kv::KvEventPublisher>()?;
m.add_class::<llm::kv::KvEventPublisherFromZmq>()?; m.add_class::<llm::kv::ZmqKvEventPublisher>()?;
m.add_class::<llm::kv::KvEventPublisherFromZmqConfig>()?; m.add_class::<llm::kv::ZmqKvEventPublisherConfig>()?;
m.add_class::<llm::kv::KvRecorder>()?; m.add_class::<llm::kv::KvRecorder>()?;
m.add_class::<llm::nats::NatsQueue>()?; m.add_class::<llm::nats::NatsQueue>()?;
m.add_class::<http::HttpService>()?; m.add_class::<http::HttpService>()?;
......
...@@ -128,7 +128,7 @@ impl KvMetricsPublisher { ...@@ -128,7 +128,7 @@ impl KvMetricsPublisher {
#[pyclass] #[pyclass]
#[derive(Clone)] #[derive(Clone)]
pub struct KvEventPublisherFromZmqConfig { pub struct ZmqKvEventPublisherConfig {
#[pyo3(get, set)] #[pyo3(get, set)]
pub worker_id: i64, pub worker_id: i64,
#[pyo3(get, set)] #[pyo3(get, set)]
...@@ -140,7 +140,7 @@ pub struct KvEventPublisherFromZmqConfig { ...@@ -140,7 +140,7 @@ pub struct KvEventPublisherFromZmqConfig {
} }
#[pymethods] #[pymethods]
impl KvEventPublisherFromZmqConfig { impl ZmqKvEventPublisherConfig {
#[new] #[new]
#[pyo3(signature = ( #[pyo3(signature = (
worker_id, worker_id,
...@@ -164,16 +164,16 @@ impl KvEventPublisherFromZmqConfig { ...@@ -164,16 +164,16 @@ impl KvEventPublisherFromZmqConfig {
} }
#[pyclass] #[pyclass]
pub(crate) struct KvEventPublisherFromZmq { pub(crate) struct ZmqKvEventPublisher {
inner: llm_rs::kv_router::publisher::KvEventPublisherFromZmq, inner: llm_rs::kv_router::publisher::ZmqKvEventPublisher,
} }
#[pymethods] #[pymethods]
impl KvEventPublisherFromZmq { impl ZmqKvEventPublisher {
#[new] #[new]
fn new(component: Component, config: KvEventPublisherFromZmqConfig) -> PyResult<Self> { fn new(component: Component, config: ZmqKvEventPublisherConfig) -> PyResult<Self> {
let mut inner = let mut inner =
llm_rs::kv_router::publisher::KvEventPublisherFromZmq::new(config.kv_block_size); llm_rs::kv_router::publisher::ZmqKvEventPublisher::new(config.kv_block_size);
inner.start_background_task( inner.start_background_task(
component.inner, component.inner,
config.worker_id, config.worker_id,
......
...@@ -579,7 +579,7 @@ class KvEventPublisher: ...@@ -579,7 +579,7 @@ class KvEventPublisher:
""" """
... ...
class KvEventPublisherFromZmqConfig: class ZmqKvEventPublisherConfig:
def __init__( def __init__(
self, self,
worker_id: int, worker_id: int,
...@@ -588,7 +588,7 @@ class KvEventPublisherFromZmqConfig: ...@@ -588,7 +588,7 @@ class KvEventPublisherFromZmqConfig:
zmq_topic: str = "" zmq_topic: str = ""
) -> None: ) -> None:
""" """
Configuration for the KvEventPublisherFromZmq. Configuration for the ZmqKvEventPublisher.
:param worker_id: The worker ID. :param worker_id: The worker ID.
:param kv_block_size: The block size for the key-value store. :param kv_block_size: The block size for the key-value store.
...@@ -597,10 +597,10 @@ class KvEventPublisherFromZmqConfig: ...@@ -597,10 +597,10 @@ class KvEventPublisherFromZmqConfig:
""" """
... ...
class KvEventPublisherFromZmq: class ZmqKvEventPublisher:
def __init__(self, component: Component, config: KvEventPublisherFromZmqConfig) -> None: def __init__(self, component: Component, config: ZmqKvEventPublisherConfig) -> None:
""" """
Initializes a new KvEventPublisherFromZmq instance. Initializes a new ZmqKvEventPublisher instance.
:param component: The component to be used. :param component: The component to be used.
:param config: Configuration for the event publisher. :param config: Configuration for the event publisher.
......
...@@ -27,8 +27,6 @@ from dynamo._core import HttpAsyncEngine as HttpAsyncEngine ...@@ -27,8 +27,6 @@ from dynamo._core import HttpAsyncEngine as HttpAsyncEngine
from dynamo._core import HttpError as HttpError from dynamo._core import HttpError as HttpError
from dynamo._core import HttpService as HttpService from dynamo._core import HttpService as HttpService
from dynamo._core import KvEventPublisher as KvEventPublisher from dynamo._core import KvEventPublisher as KvEventPublisher
from dynamo._core import KvEventPublisherFromZmq as KvEventPublisherFromZmq
from dynamo._core import KvEventPublisherFromZmqConfig as KvEventPublisherFromZmqConfig
from dynamo._core import KvIndexer as KvIndexer from dynamo._core import KvIndexer as KvIndexer
from dynamo._core import KvMetricsAggregator as KvMetricsAggregator from dynamo._core import KvMetricsAggregator as KvMetricsAggregator
from dynamo._core import KvMetricsPublisher as KvMetricsPublisher from dynamo._core import KvMetricsPublisher as KvMetricsPublisher
...@@ -36,6 +34,8 @@ from dynamo._core import KvRecorder as KvRecorder ...@@ -36,6 +34,8 @@ from dynamo._core import KvRecorder as KvRecorder
from dynamo._core import KvRouter as KvRouter from dynamo._core import KvRouter as KvRouter
from dynamo._core import ModelType as ModelType from dynamo._core import ModelType as ModelType
from dynamo._core import OverlapScores as OverlapScores from dynamo._core import OverlapScores as OverlapScores
from dynamo._core import ZmqKvEventPublisher as ZmqKvEventPublisher
from dynamo._core import ZmqKvEventPublisherConfig as ZmqKvEventPublisherConfig
from dynamo._core import register_llm as register_llm from dynamo._core import register_llm as register_llm
try: try:
......
...@@ -41,7 +41,9 @@ pub struct WorkerSelectionResult { ...@@ -41,7 +41,9 @@ pub struct WorkerSelectionResult {
#[derive(Debug, Clone, Serialize, Deserialize, Default)] #[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct ForwardPassMetrics { pub struct ForwardPassMetrics {
pub data_parallel_rank: Option<u32>, // backwards compatible // https://lmsys.org/blog/2024-12-04-sglang-v0-4/#data-parallelism-attention-for-deepseek-models
// Data parallel ranks are semi-independent, so we need to track metrics at the DP level
pub data_parallel_rank: Option<u32>, // Optional for backwards compatibility
pub request_active_slots: u64, pub request_active_slots: u64,
pub request_total_slots: u64, pub request_total_slots: u64,
pub kv_active_blocks: u64, pub kv_active_blocks: u64,
......
...@@ -92,7 +92,7 @@ fn start_publish_task( ...@@ -92,7 +92,7 @@ fn start_publish_task(
// For more info on zmq: https://zeromq.org/ // For more info on zmq: https://zeromq.org/
// This publisher reads those events and publishes them to NATS // This publisher reads those events and publishes them to NATS
// The indexer will get the events from NATS and put them in the global prefix tree. // The indexer will get the events from NATS and put them in the global prefix tree.
pub struct KvEventPublisherFromZmq { pub struct ZmqKvEventPublisher {
kv_block_size: usize, kv_block_size: usize,
processor_handle: Option<tokio::task::JoinHandle<()>>, processor_handle: Option<tokio::task::JoinHandle<()>>,
zmq_handle: Option<tokio::task::JoinHandle<()>>, zmq_handle: Option<tokio::task::JoinHandle<()>>,
...@@ -100,7 +100,7 @@ pub struct KvEventPublisherFromZmq { ...@@ -100,7 +100,7 @@ pub struct KvEventPublisherFromZmq {
warning_count: Arc<AtomicU32>, warning_count: Arc<AtomicU32>,
} }
impl KvEventPublisherFromZmq { impl ZmqKvEventPublisher {
pub fn new(kv_block_size: usize) -> Self { pub fn new(kv_block_size: usize) -> Self {
Self { Self {
kv_block_size, kv_block_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment