Unverified Commit fca0a801 authored by Michael Feil's avatar Michael Feil Committed by GitHub
Browse files

feat: follow up of optinal, batching-of-client-side-events (#6741)


Signed-off-by: default avatarmichaelfeil <63565275+michaelfeil@users.noreply.github.com>
Signed-off-by: default avatarMichael Feil <michaelfeil@users.noreply.github.com>
Signed-off-by: default avatarMichael Feil <63565275+michaelfeil@users.noreply.github.com>
parent e9f740ab
......@@ -124,12 +124,14 @@ impl KvEventPublisher {
/// so that routers can recover events directly from this worker.
/// zmq_endpoint: Optional ZMQ SUB endpoint to read raw engine events from.
/// zmq_topic: ZMQ topic filter (default "").
/// batching_timeout_us: Maximum time (in **microseconds**) to accumulate
/// batching_timeout_ms: Maximum time (in **milliseconds**) to accumulate
/// events into a single batch before flushing.
/// ``None`` uses the default window of 10000 µs (10 ms).
/// ``0`` disables batching: every event is published immediately.
/// ``None`` disables batching: every event is published immediately.
/// ``50`` to enable batching with a 50 ms window.
/// ``0`` is treated as ``None`` (also disables batching).
/// Maximum allowed is 15_000 (15 seconds); larger values are capped.
#[new]
#[pyo3(signature = (endpoint, worker_id=0, kv_block_size=0, dp_rank=0, enable_local_indexer=false, zmq_endpoint=None, zmq_topic=None, batching_timeout_us=None))]
#[pyo3(signature = (endpoint, worker_id=0, kv_block_size=0, dp_rank=0, enable_local_indexer=false, zmq_endpoint=None, zmq_topic=None, batching_timeout_ms=llm_rs::kv_router::publisher::DEFAULT_BATCHING_TIMEOUT_MS))]
#[allow(clippy::too_many_arguments)]
fn new(
endpoint: Endpoint,
......@@ -139,7 +141,7 @@ impl KvEventPublisher {
enable_local_indexer: bool,
zmq_endpoint: Option<String>,
zmq_topic: Option<String>,
batching_timeout_us: Option<u64>,
batching_timeout_ms: Option<u64>,
) -> PyResult<Self> {
let _ = worker_id;
......@@ -161,7 +163,7 @@ impl KvEventPublisher {
source_config,
enable_local_indexer,
dp_rank,
batching_timeout_us,
batching_timeout_ms,
)
.map_err(to_pyerr)?;
......
This diff is collapsed.
......@@ -519,12 +519,18 @@ pub mod tokio_perf {
pub const ALIVE_TASKS: &str = "alive_tasks";
}
// KvRouter (including KvInexer) Prometheus metric names
// KvRouter (including KvIndexer) Prometheus metric names
pub mod kvrouter {
/// Number of KV cache events applied to the index (including status)
pub const KV_CACHE_EVENTS_APPLIED: &str = "kv_cache_events_applied";
}
/// KV Publisher metrics
pub mod kv_publisher {
/// Total number of raw events dropped by engines before reaching publisher (detected via event_id gaps)
pub const ENGINES_DROPPED_EVENTS_TOTAL: &str = "kv_publisher_engines_dropped_events_total";
}
/// Additional TRT-LLM worker metrics beyond what the engine natively provides.
///
/// These metrics are Python-only (registered via `prometheus_client`) and share the
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment