"vscode:/vscode.git/clone" did not exist on "b0401c19bae3cb3e1ebd721bab520caa6be43a2a"
Unverified Commit 713c96d2 authored by Michael Feil's avatar Michael Feil Committed by GitHub
Browse files

feat: dynamic batching of client side events (#6733)


Signed-off-by: default avatarmichaelfeil <63565275+michaelfeil@users.noreply.github.com>
parent e5850e23
...@@ -132,8 +132,25 @@ pub(crate) struct KvEventPublisher { ...@@ -132,8 +132,25 @@ pub(crate) struct KvEventPublisher {
#[pymethods] #[pymethods]
impl KvEventPublisher { impl KvEventPublisher {
/// Create a KV event publisher that batches raw engine events before forwarding
/// them to NATS / the event plane.
///
/// Args:
/// endpoint: The Dynamo component endpoint for this worker.
/// worker_id: Identifier of this worker (default 0).
/// kv_block_size: KV cache block size in tokens; must be > 0.
/// dp_rank: Data-parallel rank of this worker (default 0).
/// enable_local_indexer: When True, a local KV indexer is kept in-process
/// so that routers can recover events directly from this worker.
/// zmq_endpoint: Optional ZMQ SUB endpoint to read raw engine events from.
/// zmq_topic: ZMQ topic filter (default "").
/// batching_timeout_us: Maximum time (in **microseconds**) to accumulate
/// events into a single batch before flushing.
/// ``None`` uses the default window of 10000 µs (10 ms).
/// ``0`` disables batching: every event is published immediately.
#[new] #[new]
#[pyo3(signature = (endpoint, worker_id=0, kv_block_size=0, dp_rank=0, enable_local_indexer=false, zmq_endpoint=None, zmq_topic=None))] #[pyo3(signature = (endpoint, worker_id=0, kv_block_size=0, dp_rank=0, enable_local_indexer=false, zmq_endpoint=None, zmq_topic=None, batching_timeout_us=None))]
#[allow(clippy::too_many_arguments)]
fn new( fn new(
endpoint: Endpoint, endpoint: Endpoint,
worker_id: WorkerId, worker_id: WorkerId,
...@@ -142,6 +159,7 @@ impl KvEventPublisher { ...@@ -142,6 +159,7 @@ impl KvEventPublisher {
enable_local_indexer: bool, enable_local_indexer: bool,
zmq_endpoint: Option<String>, zmq_endpoint: Option<String>,
zmq_topic: Option<String>, zmq_topic: Option<String>,
batching_timeout_us: Option<u64>,
) -> PyResult<Self> { ) -> PyResult<Self> {
let _ = worker_id; let _ = worker_id;
...@@ -163,6 +181,7 @@ impl KvEventPublisher { ...@@ -163,6 +181,7 @@ impl KvEventPublisher {
source_config, source_config,
enable_local_indexer, enable_local_indexer,
dp_rank, dp_rank,
batching_timeout_us,
) )
.map_err(to_pyerr)?; .map_err(to_pyerr)?;
......
This diff is collapsed.
...@@ -330,6 +330,7 @@ impl MockVllmEngine { ...@@ -330,6 +330,7 @@ impl MockVllmEngine {
source_config, source_config,
args.enable_local_indexer, args.enable_local_indexer,
dp_rank, dp_rank,
None,
) { ) {
Ok(publisher) => ( Ok(publisher) => (
Some(Arc::new(sink) as Arc<dyn KvCacheEventSink>), Some(Arc::new(sink) as Arc<dyn KvCacheEventSink>),
...@@ -358,6 +359,7 @@ impl MockVllmEngine { ...@@ -358,6 +359,7 @@ impl MockVllmEngine {
None, None,
args.enable_local_indexer, args.enable_local_indexer,
dp_rank, dp_rank,
None,
) { ) {
Ok(publisher) => ( Ok(publisher) => (
Some(Arc::new(KvEventSinkAdapter(publisher)) Some(Arc::new(KvEventSinkAdapter(publisher))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment