Unverified Commit 7ff5e0be authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

feat: Concurrent KV event consumer (#7293)


Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent 0f8e1a9e
...@@ -18,6 +18,74 @@ use crate::indexer::pruning::{BlockEntry, PruneConfig, PruneManager}; ...@@ -18,6 +18,74 @@ use crate::indexer::pruning::{BlockEntry, PruneConfig, PruneManager};
use crate::protocols::*; use crate::protocols::*;
use dynamo_tokens::SequenceHash; use dynamo_tokens::SequenceHash;
fn stored_block_entries(event: &RouterEvent) -> Option<Vec<BlockEntry>> {
let KvCacheEventData::Stored(ref store_data) = event.event.data else {
return None;
};
let worker = WorkerWithDpRank::new(event.worker_id, event.event.dp_rank);
Some(
store_data
.blocks
.iter()
.enumerate()
.map(|(idx, block)| BlockEntry {
key: block.block_hash,
worker,
seq_position: idx,
})
.collect(),
)
}
fn apply_event_with_prune_tracking(
trie: &mut RadixTree,
event: RouterEvent,
metrics: &KvIndexerMetrics,
prune_manager: &mut Option<PruneManager<BlockEntry>>,
prune_tx: &mpsc::Sender<()>,
) {
let event_type = KvIndexerMetrics::get_event_type(&event.event.data);
let event_id = event.event.event_id;
let worker_id = event.worker_id;
let event_for_prune = prune_manager.is_some().then(|| event.clone());
let result = trie.apply_event(event);
let result_is_ok = result.is_ok();
let tree_size = trie.current_size();
tracing::trace!(
"Applied KV event to global radix tree: event_type={event_type}, event_id={event_id}, worker_id={worker_id}, success={result_is_ok}, global_radix_tree_size={tree_size}"
);
metrics.increment_event_applied(event_type, result);
let Some(pm) = prune_manager.as_mut() else {
return;
};
if !result_is_ok {
return;
}
let Some(ref event) = event_for_prune else {
return;
};
let Some(block_entries) = stored_block_entries(event) else {
return;
};
pm.insert(block_entries);
let Some(ref pc) = pm.prune_config else {
return;
};
let current_size = trie.current_size();
if current_size > pc.max_tree_size {
tracing::info!(
"Pruning: tree size ({}) exceeded max tree size ({}), scheduling pruning",
current_size,
pc.max_tree_size
);
let _ = prune_tx.try_send(());
}
}
/// The KV Indexer, managing the KV store and handling events and match requests. /// The KV Indexer, managing the KV store and handling events and match requests.
#[derive(Clone)] #[derive(Clone)]
pub struct KvIndexer { pub struct KvIndexer {
...@@ -64,7 +132,7 @@ impl KvIndexer { ...@@ -64,7 +132,7 @@ impl KvIndexer {
metrics: Arc<KvIndexerMetrics>, metrics: Arc<KvIndexerMetrics>,
prune_config: Option<PruneConfig>, prune_config: Option<PruneConfig>,
) -> Self { ) -> Self {
let (event_tx, event_rx) = mpsc::channel::<RouterEvent>(2048); let (event_tx, event_rx) = mpsc::channel::<RouterEvent>(16384);
let (match_tx, match_rx) = mpsc::channel::<MatchRequest>(128); let (match_tx, match_rx) = mpsc::channel::<MatchRequest>(128);
let (remove_worker_tx, remove_worker_rx) = mpsc::channel::<WorkerId>(16); let (remove_worker_tx, remove_worker_rx) = mpsc::channel::<WorkerId>(16);
let (remove_worker_dp_rank_tx, remove_worker_dp_rank_rx) = let (remove_worker_dp_rank_tx, remove_worker_dp_rank_rx) =
...@@ -151,49 +219,26 @@ impl KvIndexer { ...@@ -151,49 +219,26 @@ impl KvIndexer {
} }
Some(event) = event_rx.recv() => { Some(event) = event_rx.recv() => {
let event_type = KvIndexerMetrics::get_event_type(&event.event.data); apply_event_with_prune_tracking(
let event_id = event.event.event_id; &mut trie,
let worker_id = event.worker_id; event,
// Only clone if we need the event for prune_manager afterward &metrics,
let event_for_prune = prune_manager.is_some().then(|| event.clone()); &mut prune_manager,
let result = trie.apply_event(event); &prune_tx,
let result_is_ok = result.is_ok();
let tree_size = trie.current_size();
tracing::trace!(
"Applied KV event to global radix tree: event_type={event_type}, event_id={event_id}, worker_id={worker_id}, success={result_is_ok}, global_radix_tree_size={tree_size}"
); );
metrics.increment_event_applied(event_type, result);
// Track blocks in PruneManager if TTL is enabled and event was stored successfully
let Some(ref mut pm) = prune_manager else { continue };
if !result_is_ok { continue };
let Some(ref event) = event_for_prune else { continue };
let KvCacheEventData::Stored(ref store_data) = event.event.data else { continue };
let worker = WorkerWithDpRank::new(event.worker_id, event.event.dp_rank);
let block_entries: Vec<BlockEntry> = store_data.blocks.iter().enumerate().map(|(idx, block)| {
BlockEntry {
key: block.block_hash,
worker,
seq_position: idx,
}
}).collect();
pm.insert(block_entries);
// Check if we need to prune due to tree size
let Some(ref pc) = pm.prune_config else { continue };
let current_size = trie.current_size();
if current_size > pc.max_tree_size {
tracing::info!(
"Pruning: tree size ({}) exceeded max tree size ({}), scheduling pruning",
current_size,
pc.max_tree_size
);
let _ = prune_tx.try_send(());
}
} }
Some(dump_req) = dump_rx.recv() => { Some(dump_req) = dump_rx.recv() => {
// Flush pending events so tree is consistent with buffer
while let Ok(event) = event_rx.try_recv() {
apply_event_with_prune_tracking(
&mut trie,
event,
&metrics,
&mut prune_manager,
&prune_tx,
);
}
let events = trie.dump_tree_as_events(); let events = trie.dump_tree_as_events();
let _ = dump_req.resp.send(events); let _ = dump_req.resp.send(events);
} }
......
...@@ -53,6 +53,15 @@ impl LocalKvIndexer { ...@@ -53,6 +53,15 @@ impl LocalKvIndexer {
buffer.iter().cloned().collect() buffer.iter().cloned().collect()
} }
/// Build a tree dump response with the given `last_event_id`.
async fn tree_dump_response(&self, last_event_id: u64) -> WorkerKvQueryResponse {
let events = self.dump_events().await.unwrap_or_default();
WorkerKvQueryResponse::TreeDump {
events,
last_event_id,
}
}
/// Query events by ID range, returning events in `[start_id, end_id]` (both inclusive). /// Query events by ID range, returning events in `[start_id, end_id]` (both inclusive).
/// ///
/// ### Arguments /// ### Arguments
...@@ -63,7 +72,7 @@ impl LocalKvIndexer { ...@@ -63,7 +72,7 @@ impl LocalKvIndexer {
/// ### Returns /// ### Returns
/// ///
/// - `Events`: Buffered events with original IDs (when range is within buffer) /// - `Events`: Buffered events with original IDs (when range is within buffer)
/// - `TreeDump`: Full tree dump with synthetic IDs (when range is too old or unspecified) /// - `TreeDump`: Full tree dump with synthetic IDs and the worker's latest real event ID (when range is too old or unspecified)
/// - `TooNew`: Error when requested range is newer than available data /// - `TooNew`: Error when requested range is newer than available data
/// - `InvalidRange`: Error when end_id < start_id /// - `InvalidRange`: Error when end_id < start_id
pub async fn get_events_in_id_range( pub async fn get_events_in_id_range(
...@@ -98,8 +107,7 @@ impl LocalKvIndexer { ...@@ -98,8 +107,7 @@ impl LocalKvIndexer {
// If no start_id specified, dump entire tree // If no start_id specified, dump entire tree
if start_id.is_none() { if start_id.is_none() {
tracing::debug!("No start_id specified, dumping entire tree"); tracing::debug!("No start_id specified, dumping entire tree");
let events = self.dump_events().await.unwrap_or_default(); return self.tree_dump_response(last_id.unwrap_or(0)).await;
return WorkerKvQueryResponse::TreeDump(events);
} }
let start_id = start_id.unwrap(); let start_id = start_id.unwrap();
...@@ -108,8 +116,7 @@ impl LocalKvIndexer { ...@@ -108,8 +116,7 @@ impl LocalKvIndexer {
// Check for empty buffer // Check for empty buffer
let Some(first_buffered) = first_id else { let Some(first_buffered) = first_id else {
tracing::debug!("Buffer empty, dumping entire tree"); tracing::debug!("Buffer empty, dumping entire tree");
let events = self.dump_events().await.unwrap_or_default(); return self.tree_dump_response(0).await;
return WorkerKvQueryResponse::TreeDump(events);
}; };
let last_buffered = last_id.unwrap(); let last_buffered = last_id.unwrap();
...@@ -134,8 +141,7 @@ impl LocalKvIndexer { ...@@ -134,8 +141,7 @@ impl LocalKvIndexer {
first_buffered, first_buffered,
"Requested start_id is older than buffer, dumping entire tree" "Requested start_id is older than buffer, dumping entire tree"
); );
let events = self.dump_events().await.unwrap_or_default(); return self.tree_dump_response(last_buffered).await;
return WorkerKvQueryResponse::TreeDump(events);
} }
// Serve from buffer // Serve from buffer
...@@ -196,17 +202,20 @@ impl LocalKvIndexer { ...@@ -196,17 +202,20 @@ impl LocalKvIndexer {
/// Apply event with buffering. /// Apply event with buffering.
/// ///
/// This records the event in the buffer and forwards it to the underlying indexer. /// This forwards the event to the underlying indexer and records it on success.
pub async fn apply_event_with_buffer(&self, event: RouterEvent) -> Result<(), KvRouterError> { pub async fn apply_event_with_buffer(&self, event: RouterEvent) -> Result<(), KvRouterError> {
// Record in buffer
self.record_event(event.clone());
// Forward to underlying indexer // Forward to underlying indexer
self.indexer let result = self
.indexer
.event_sender() .event_sender()
.send(event) .send(event.clone())
.await .await
.map_err(|_| KvRouterError::IndexerOffline) .map_err(|_| KvRouterError::IndexerOffline);
if result.is_ok() {
self.record_event(event);
}
result
} }
/// Clear the event buffer. /// Clear the event buffer.
......
...@@ -1941,7 +1941,7 @@ async fn test_local_indexer_slice_within_range() { ...@@ -1941,7 +1941,7 @@ async fn test_local_indexer_slice_within_range() {
let extract_events = |resp: WorkerKvQueryResponse| -> Vec<RouterEvent> { let extract_events = |resp: WorkerKvQueryResponse| -> Vec<RouterEvent> {
match resp { match resp {
WorkerKvQueryResponse::Events(e) => e, WorkerKvQueryResponse::Events(e) => e,
WorkerKvQueryResponse::TreeDump(e) => e, WorkerKvQueryResponse::TreeDump { events: e, .. } => e,
_ => panic!("Unexpected response type"), _ => panic!("Unexpected response type"),
} }
}; };
...@@ -1962,7 +1962,7 @@ async fn test_local_indexer_slice_within_range() { ...@@ -1962,7 +1962,7 @@ async fn test_local_indexer_slice_within_range() {
// start_id=0 is before buffer (first is 1), so should trigger tree dump // start_id=0 is before buffer (first is 1), so should trigger tree dump
let result = indexer.get_events_in_id_range(Some(0), Some(4)).await; let result = indexer.get_events_in_id_range(Some(0), Some(4)).await;
assert!(matches!(result, WorkerKvQueryResponse::TreeDump(_))); assert!(matches!(result, WorkerKvQueryResponse::TreeDump { .. }));
let result = indexer.get_events_in_id_range(Some(3), Some(3)).await; let result = indexer.get_events_in_id_range(Some(3), Some(3)).await;
let ids = get_ids(extract_events(result)); let ids = get_ids(extract_events(result));
...@@ -2016,7 +2016,7 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() { ...@@ -2016,7 +2016,7 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() {
let extract_events = |resp: WorkerKvQueryResponse| -> Vec<RouterEvent> { let extract_events = |resp: WorkerKvQueryResponse| -> Vec<RouterEvent> {
match resp { match resp {
WorkerKvQueryResponse::Events(e) => e, WorkerKvQueryResponse::Events(e) => e,
WorkerKvQueryResponse::TreeDump(e) => e, WorkerKvQueryResponse::TreeDump { events: e, .. } => e,
_ => panic!("Unexpected response type: {:?}", resp), _ => panic!("Unexpected response type: {:?}", resp),
} }
}; };
...@@ -2038,11 +2038,11 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() { ...@@ -2038,11 +2038,11 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() {
// Tree dump path tests // Tree dump path tests
let result = indexer.get_events_in_id_range(None, None).await; let result = indexer.get_events_in_id_range(None, None).await;
assert!(matches!(result, WorkerKvQueryResponse::TreeDump(_))); assert!(matches!(&result, WorkerKvQueryResponse::TreeDump { .. }));
assert_eq!(extract_events(result).len(), 10); assert_eq!(extract_events(result).len(), 10);
let result = indexer.get_events_in_id_range(Some(7), None).await; let result = indexer.get_events_in_id_range(Some(7), None).await;
assert!(matches!(result, WorkerKvQueryResponse::TreeDump(_))); assert!(matches!(result, WorkerKvQueryResponse::TreeDump { .. }));
// Edge cases // Edge cases
let result = indexer.get_events_in_id_range(Some(15), Some(10)).await; let result = indexer.get_events_in_id_range(Some(15), Some(10)).await;
...@@ -2052,6 +2052,98 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() { ...@@ -2052,6 +2052,98 @@ async fn test_local_indexer_get_events_in_id_range_all_cases() {
assert!(matches!(result, WorkerKvQueryResponse::TooNew { .. })); assert!(matches!(result, WorkerKvQueryResponse::TooNew { .. }));
} }
#[tokio::test]
async fn test_tree_dump_includes_last_event_id() {
// Create indexer with small buffer (5 events max)
let indexer = LocalKvIndexer::new(
CancellationToken::new(),
4,
Arc::new(KvIndexerMetrics::new_unregistered()),
5,
);
let make_event = |id: u64| {
RouterEvent::new(
0,
KvCacheEvent {
event_id: id,
data: KvCacheEventData::Stored(KvCacheStoreData {
parent_hash: None,
blocks: vec![KvCacheStoredBlockData {
block_hash: ExternalSequenceBlockHash(id * 100),
tokens_hash: LocalBlockHash(id * 200),
mm_extra_info: None,
}],
}),
dp_rank: 0,
},
)
};
// Add 10 events (IDs 5-14), buffer keeps last 5: events 10-14
for id in 5..15 {
indexer
.apply_event_with_buffer(make_event(id))
.await
.unwrap();
}
indexer.flush().await;
// Request with start_id=None -> tree dump should include last_event_id=14
let result = indexer.get_events_in_id_range(None, None).await;
match result {
WorkerKvQueryResponse::TreeDump {
last_event_id,
events,
} => {
assert_eq!(
last_event_id, 14,
"last_event_id should be the buffer's newest event ID"
);
assert!(!events.is_empty(), "tree dump should contain events");
}
other => panic!("Expected TreeDump, got: {other:?}"),
}
// Request with start_id older than buffer -> tree dump should include last_event_id=14
let result = indexer.get_events_in_id_range(Some(7), None).await;
match result {
WorkerKvQueryResponse::TreeDump {
last_event_id,
events,
} => {
assert_eq!(
last_event_id, 14,
"last_event_id should be the buffer's newest event ID"
);
assert!(!events.is_empty(), "tree dump should contain events");
}
other => panic!("Expected TreeDump, got: {other:?}"),
}
// Empty buffer case: create a fresh indexer with no events
let empty_indexer = LocalKvIndexer::new(
CancellationToken::new(),
4,
Arc::new(KvIndexerMetrics::new_unregistered()),
5,
);
let result = empty_indexer.get_events_in_id_range(None, None).await;
match result {
WorkerKvQueryResponse::TreeDump {
last_event_id,
events,
} => {
assert_eq!(
last_event_id, 0,
"empty buffer should return last_event_id=0"
);
assert!(events.is_empty(), "empty indexer should have no events");
}
other => panic!("Expected TreeDump, got: {other:?}"),
}
}
#[tokio::test] #[tokio::test]
async fn test_local_indexer_buffer_and_serialization() { async fn test_local_indexer_buffer_and_serialization() {
let worker_id = 42u64; let worker_id = 42u64;
...@@ -2099,6 +2191,51 @@ async fn test_local_indexer_buffer_and_serialization() { ...@@ -2099,6 +2191,51 @@ async fn test_local_indexer_buffer_and_serialization() {
assert_eq!(events[0].worker_id, worker_id); assert_eq!(events[0].worker_id, worker_id);
} }
#[tokio::test]
async fn test_local_indexer_does_not_buffer_failed_send() {
let local_indexer = LocalKvIndexer::new(
CancellationToken::new(),
4,
Arc::new(KvIndexerMetrics::new_unregistered()),
5,
);
let test_event = RouterEvent::new(
7,
KvCacheEvent {
event_id: 1,
data: KvCacheEventData::Stored(KvCacheStoreData {
parent_hash: None,
blocks: vec![KvCacheStoredBlockData {
block_hash: ExternalSequenceBlockHash(100),
tokens_hash: LocalBlockHash(200),
mm_extra_info: None,
}],
}),
dp_rank: 0,
},
);
let event_tx = local_indexer.event_sender();
local_indexer.shutdown();
event_tx.closed().await;
let result = local_indexer.apply_event_with_buffer(test_event).await;
assert!(matches!(result, Err(KvRouterError::IndexerOffline)));
assert_eq!(local_indexer.buffer_len(), 0);
match local_indexer.get_events_in_id_range(None, None).await {
WorkerKvQueryResponse::TreeDump {
events,
last_event_id,
} => {
assert!(events.is_empty());
assert_eq!(last_event_id, 0);
}
other => panic!("Expected TreeDump, got: {other:?}"),
}
}
#[tokio::test] #[tokio::test]
#[apply(indexer_template)] #[apply(indexer_template)]
async fn test_apply_events_idempotent(variant: &str) { async fn test_apply_events_idempotent(variant: &str) {
......
...@@ -56,8 +56,13 @@ pub struct WorkerKvQueryRequest { ...@@ -56,8 +56,13 @@ pub struct WorkerKvQueryRequest {
pub enum WorkerKvQueryResponse { pub enum WorkerKvQueryResponse {
/// Events served from the circular buffer (with original event IDs) /// Events served from the circular buffer (with original event IDs)
Events(Vec<RouterEvent>), Events(Vec<RouterEvent>),
/// Full tree dump (with synthetic 0-indexed event IDs) /// Full tree dump (with synthetic 0-indexed event IDs).
TreeDump(Vec<RouterEvent>), /// Includes `last_event_id`: the newest real event ID in the worker's buffer
/// at the time of the dump, so the caller can set its tracking cursor correctly.
TreeDump {
events: Vec<RouterEvent>,
last_event_id: u64,
},
/// Requested range is newer than available data /// Requested range is newer than available data
TooNew { TooNew {
requested_start: Option<u64>, requested_start: Option<u64>,
......
...@@ -2024,7 +2024,7 @@ mod tests_startup_helpers { ...@@ -2024,7 +2024,7 @@ mod tests_startup_helpers {
.await; .await;
let missed_events = match response { let missed_events = match response {
crate::kv_router::indexer::WorkerKvQueryResponse::Events(e) => e, crate::kv_router::indexer::WorkerKvQueryResponse::Events(e) => e,
crate::kv_router::indexer::WorkerKvQueryResponse::TreeDump(e) => e, crate::kv_router::indexer::WorkerKvQueryResponse::TreeDump { events: e, .. } => e,
crate::kv_router::indexer::WorkerKvQueryResponse::Error(message) => { crate::kv_router::indexer::WorkerKvQueryResponse::Error(message) => {
panic!("Unexpected error response: {message}") panic!("Unexpected error response: {message}")
} }
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use std::collections::HashMap;
use crate::kv_router::{ use crate::kv_router::{
Indexer, KV_EVENT_SUBJECT, KvRouterConfig, Indexer, KV_EVENT_SUBJECT, KvRouterConfig, protocols::RouterEvent,
protocols::{DpRank, RouterEvent, WorkerId},
worker_query::WorkerQueryClient, worker_query::WorkerQueryClient,
}; };
use anyhow::Result; use anyhow::Result;
...@@ -23,9 +20,6 @@ use dynamo_runtime::{ ...@@ -23,9 +20,6 @@ use dynamo_runtime::{
/// - On worker Added: dumps worker's local indexer into router /// - On worker Added: dumps worker's local indexer into router
/// - On worker Removed: removes worker from router indexer /// - On worker Removed: removes worker from router indexer
/// ///
/// This function first recovers state from all currently registered workers before
/// spawning the background task, ensuring the router is ready before returning.
///
/// This is appropriate when workers have local indexers enabled. /// This is appropriate when workers have local indexers enabled.
async fn start_kv_router_background_event_plane( async fn start_kv_router_background_event_plane(
component: Component, component: Component,
...@@ -47,7 +41,9 @@ async fn start_kv_router_background_event_plane( ...@@ -47,7 +41,9 @@ async fn start_kv_router_background_event_plane(
// before recovery fetches the initial dump from workers. // before recovery fetches the initial dump from workers.
tokio::time::sleep(std::time::Duration::from_millis(100)).await; tokio::time::sleep(std::time::Duration::from_millis(100)).await;
let worker_query_client = WorkerQueryClient::spawn(component.clone(), indexer.clone()).await?; // WorkerQueryClient handles its own discovery loop for lifecycle + initial recovery.
// No blocking wait — recovery happens asynchronously as endpoints are discovered.
let worker_query_client = WorkerQueryClient::spawn(component.clone(), indexer).await?;
let kv_event_subject = format!( let kv_event_subject = format!(
"namespace.{}.component.{}.{}", "namespace.{}.component.{}.{}",
component.namespace().name(), component.namespace().name(),
...@@ -71,10 +67,6 @@ async fn start_kv_router_background_event_plane( ...@@ -71,10 +67,6 @@ async fn start_kv_router_background_event_plane(
} }
tokio::spawn(async move { tokio::spawn(async move {
// Track last received event ID per (worker, dp_rank) for gap detection
// Each dp_rank has its own monotonic event ID sequence
let mut last_event_ids: HashMap<(WorkerId, DpRank), u64> = HashMap::new();
loop { loop {
tokio::select! { tokio::select! {
biased; biased;
...@@ -94,47 +86,19 @@ async fn start_kv_router_background_event_plane( ...@@ -94,47 +86,19 @@ async fn start_kv_router_background_event_plane(
} }
}; };
let worker_id = event.worker_id;
let dp_rank = event.event.dp_rank;
let event_id = event.event.event_id;
let event_key = (worker_id, dp_rank);
tracing::trace!( tracing::trace!(
"Received event from publisher {} (seq {})", "Received event from publisher {} (seq {})",
envelope.publisher_id, envelope.publisher_id,
envelope.sequence envelope.sequence
); );
// Gap detection: check if event ID is monotonically increasing per (worker, dp_rank) tracing::trace!(
// Note: event_id <= last_id is duplicate/out-of-order, apply anyway (idempotent) "Forwarding live event to recovery coordinator for worker {} dp_rank {} event_id {}",
if let Some(&last_id) = last_event_ids.get(&event_key) event.worker_id,
&& event_id > last_id + 1 event.event.dp_rank,
{ event.event.event_id
let gap_start = last_id + 1; );
let gap_end = event_id - 1; worker_query_client.handle_live_event(event).await;
let gap_size = gap_end - gap_start + 1;
tracing::warn!(
"Event ID gap detected for worker {worker_id} dp_rank {dp_rank}, recovering events [{gap_start}, {gap_end}], gap_size: {gap_size}"
);
if let Err(e) = worker_query_client
.recover_from_worker(worker_id, dp_rank, Some(gap_start), Some(gap_end))
.await
{
tracing::error!(
"Failed to recover gap events for worker {worker_id} dp_rank {dp_rank} (gap_start: {gap_start}, gap_end: {gap_end}); proceeding with current event anyway: {e}"
);
}
}
// Update last seen event ID (use max to handle out-of-order)
last_event_ids
.entry(event_key)
.and_modify(|id| *id = (*id).max(event_id))
.or_insert(event_id);
// Forward the RouterEvent to the indexer
indexer.apply_event(event).await;
} }
} }
} }
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment