Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
d49fc3eb
Unverified
Commit
d49fc3eb
authored
Sep 16, 2025
by
Yan Ru Pei
Committed by
GitHub
Sep 16, 2025
Browse files
fix: purge before snapshotting (#3077)
Signed-off-by:
PeaBrane
<
yanrpei@gmail.com
>
parent
0373b897
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
45 deletions
+49
-45
lib/llm/src/kv_router/approx.rs
lib/llm/src/kv_router/approx.rs
+14
-11
lib/llm/src/kv_router/indexer.rs
lib/llm/src/kv_router/indexer.rs
+26
-26
lib/llm/src/kv_router/subscriber.rs
lib/llm/src/kv_router/subscriber.rs
+9
-8
No files found.
lib/llm/src/kv_router/approx.rs
View file @
d49fc3eb
...
@@ -208,10 +208,15 @@ impl ApproxKvIndexer {
...
@@ -208,10 +208,15 @@ impl ApproxKvIndexer {
};
};
tokio
::
select!
{
tokio
::
select!
{
Some
(
request
)
=
match_rx
.recv
()
=>
{
_
=
cancel_clone
.cancelled
()
=>
{
let
scores
=
trie
.find_matches
(
request
.sequence
,
false
);
tracing
::
debug!
(
"Approximate Indexer progress loop shutting down"
);
re
quest
.resp
.send
(
scores
)
.unwrap
()
;
re
turn
;
}
}
Some
(
worker
)
=
remove_worker_rx
.recv
()
=>
{
trie
.remove_worker
(
worker
);
}
Some
(
result
)
=
route_rx
.recv
()
=>
{
Some
(
result
)
=
route_rx
.recv
()
=>
{
let
hashes
=
result
.local_hashes
.iter
()
.zip
(
result
.sequence_hashes
.iter
());
let
hashes
=
result
.local_hashes
.iter
()
.zip
(
result
.sequence_hashes
.iter
());
...
@@ -239,14 +244,17 @@ impl ApproxKvIndexer {
...
@@ -239,14 +244,17 @@ impl ApproxKvIndexer {
worker
:
result
.worker_id
,
worker
:
result
.worker_id
,
})
.collect
());
})
.collect
());
}
}
Some
(
worker
)
=
remove_worker_rx
.recv
()
=>
{
trie
.remove_worker
(
worker
);
}
Some
(
dump_req
)
=
dump_rx
.recv
()
=>
{
Some
(
dump_req
)
=
dump_rx
.recv
()
=>
{
let
events
=
trie
.dump_tree_as_events
();
let
events
=
trie
.dump_tree_as_events
();
let
_
=
dump_req
.resp
.send
(
events
);
let
_
=
dump_req
.resp
.send
(
events
);
}
}
Some
(
request
)
=
match_rx
.recv
()
=>
{
let
scores
=
trie
.find_matches
(
request
.sequence
,
false
);
request
.resp
.send
(
scores
)
.unwrap
();
}
_
=
expiry_fut
=>
{
_
=
expiry_fut
=>
{
let
expired
=
timer_manager
.pop_expired
();
let
expired
=
timer_manager
.pop_expired
();
...
@@ -266,11 +274,6 @@ impl ApproxKvIndexer {
...
@@ -266,11 +274,6 @@ impl ApproxKvIndexer {
let
_
=
trie
.apply_event
(
event
);
let
_
=
trie
.apply_event
(
event
);
});
});
}
}
_
=
cancel_clone
.cancelled
()
=>
{
tracing
::
debug!
(
"Approximate Indexer progress loop shutting down"
);
return
;
}
}
}
}
}
});
});
...
...
lib/llm/src/kv_router/indexer.rs
View file @
d49fc3eb
...
@@ -797,13 +797,19 @@ impl KvIndexer {
...
@@ -797,13 +797,19 @@ impl KvIndexer {
tokio
::
select!
{
tokio
::
select!
{
biased
;
biased
;
_
=
cancel
.cancelled
()
=>
{
tracing
::
debug!
(
"KvCacheIndexer progress loop shutting down"
);
return
;
}
Some
(
worker
)
=
remove_worker_rx
.recv
()
=>
{
Some
(
worker
)
=
remove_worker_rx
.recv
()
=>
{
trie
.remove_worker
(
worker
);
trie
.remove_worker
(
worker
);
}
}
Some
(
req
)
=
match_rx
.recv
()
=>
{
Some
(
event
)
=
event_rx
.recv
()
=>
{
let
matches
=
trie
.find_matches
(
req
.sequence
,
req
.early_exit
);
let
event_type
=
KvIndexerMetrics
::
get_event_type
(
&
event
.event.data
);
let
_
=
req
.resp
.send
(
matches
);
let
result
=
trie
.apply_event
(
event
);
metrics
.increment_event_applied
(
event_type
,
result
);
}
}
Some
(
dump_req
)
=
dump_rx
.recv
()
=>
{
Some
(
dump_req
)
=
dump_rx
.recv
()
=>
{
...
@@ -811,15 +817,9 @@ impl KvIndexer {
...
@@ -811,15 +817,9 @@ impl KvIndexer {
let
_
=
dump_req
.resp
.send
(
events
);
let
_
=
dump_req
.resp
.send
(
events
);
}
}
_
=
cancel
.cancelled
()
=>
{
Some
(
req
)
=
match_rx
.recv
()
=>
{
tracing
::
debug!
(
"KvCacheIndexer progress loop shutting down"
);
let
matches
=
trie
.find_matches
(
req
.sequence
,
req
.early_exit
);
return
;
let
_
=
req
.resp
.send
(
matches
);
}
Some
(
event
)
=
event_rx
.recv
()
=>
{
let
event_type
=
KvIndexerMetrics
::
get_event_type
(
&
event
.event.data
);
let
result
=
trie
.apply_event
(
event
);
metrics
.increment_event_applied
(
event_type
,
result
);
}
}
}
}
}
}
...
@@ -1040,15 +1040,19 @@ impl KvIndexerSharded {
...
@@ -1040,15 +1040,19 @@ impl KvIndexerSharded {
tokio
::
select!
{
tokio
::
select!
{
biased
;
biased
;
_
=
cancel
.cancelled
()
=>
{
tracing
::
trace!
(
"KvCacheIndexer progress loop shutting down"
);
return
;
}
Some
(
worker
)
=
shard_remove_worker_rx
.recv
()
=>
{
Some
(
worker
)
=
shard_remove_worker_rx
.recv
()
=>
{
trie
.remove_worker
(
worker
);
trie
.remove_worker
(
worker
);
}
}
Ok
(
req
)
=
shard_broadcast_rx
.recv
()
=>
{
Some
(
event
)
=
shard_event_rx
.recv
()
=>
{
let
matches
=
trie
.find_matches
(
req
.sequence
,
req
.early_exit
);
let
event_type
=
KvIndexerMetrics
::
get_event_type
(
&
event
.event.data
);
if
let
Err
(
e
)
=
req
.resp
.send
(
matches
)
.await
{
let
result
=
trie
.apply_event
(
event
);
tracing
::
trace!
(
"Failed to send match response: {:?}"
,
e
);
metrics
.increment_event_applied
(
event_type
,
result
);
}
}
}
Some
(
dump_req
)
=
shard_dump_rx
.recv
()
=>
{
Some
(
dump_req
)
=
shard_dump_rx
.recv
()
=>
{
...
@@ -1056,15 +1060,11 @@ impl KvIndexerSharded {
...
@@ -1056,15 +1060,11 @@ impl KvIndexerSharded {
let
_
=
dump_req
.resp
.send
(
events
);
let
_
=
dump_req
.resp
.send
(
events
);
}
}
_
=
cancel
.cancelled
()
=>
{
Ok
(
req
)
=
shard_broadcast_rx
.recv
()
=>
{
tracing
::
trace!
(
"KvCacheIndexer progress loop shutting down"
);
let
matches
=
trie
.find_matches
(
req
.sequence
,
req
.early_exit
);
return
;
if
let
Err
(
e
)
=
req
.resp
.send
(
matches
)
.await
{
}
tracing
::
trace!
(
"Failed to send match response: {:?}"
,
e
);
}
Some
(
event
)
=
shard_event_rx
.recv
()
=>
{
let
event_type
=
KvIndexerMetrics
::
get_event_type
(
&
event
.event.data
);
let
result
=
trie
.apply_event
(
event
);
metrics
.increment_event_applied
(
event_type
,
result
);
}
}
}
}
}
}
...
...
lib/llm/src/kv_router/subscriber.rs
View file @
d49fc3eb
...
@@ -244,7 +244,7 @@ pub async fn start_kv_router_background(
...
@@ -244,7 +244,7 @@ pub async fn start_kv_router_background(
};
};
// Perform snapshot upload and purge
// Perform snapshot upload and purge
match
p
erform_snapshot_and_purge
(
match
p
urge_then_snapshot
(
&
mut
nats_queue
,
&
mut
nats_queue
,
snapshot_tx
,
snapshot_tx
,
resources
resources
...
@@ -322,15 +322,19 @@ pub async fn start_kv_router_background(
...
@@ -322,15 +322,19 @@ pub async fn start_kv_router_background(
}
}
/// Perform snapshot upload and purge operations
/// Perform snapshot upload and purge operations
async
fn
p
erform_snapshot_and_purge
(
async
fn
p
urge_then_snapshot
(
nats_queue
:
&
mut
NatsQueue
,
nats_queue
:
&
mut
NatsQueue
,
snapshot_tx
:
&
mpsc
::
Sender
<
DumpRequest
>
,
snapshot_tx
:
&
mpsc
::
Sender
<
DumpRequest
>
,
resources
:
&
SnapshotResources
,
resources
:
&
SnapshotResources
,
)
->
anyhow
::
Result
<
()
>
{
)
->
anyhow
::
Result
<
()
>
{
// Snapshot before purge ensures we capture the current state before removing any messages.
// Purge before snapshot ensures new/warm-restarted routers won't replay already-acknowledged messages.
// This guarantees the snapshot matches what has been acknowledged up to this point.
// Since KV events are idempotent, this ordering reduces unnecessary reprocessing while maintaining
// at-least-once delivery guarantees. The snapshot will capture the clean state after purge.
// First, request a snapshot from the indexer
// First, purge acknowledged messages from the stream
nats_queue
.purge_acknowledged
()
.await
?
;
// Now request a snapshot from the indexer (which reflects the post-purge state)
let
(
resp_tx
,
resp_rx
)
=
oneshot
::
channel
();
let
(
resp_tx
,
resp_rx
)
=
oneshot
::
channel
();
let
dump_req
=
DumpRequest
{
resp
:
resp_tx
};
let
dump_req
=
DumpRequest
{
resp
:
resp_tx
};
...
@@ -363,8 +367,5 @@ async fn perform_snapshot_and_purge(
...
@@ -363,8 +367,5 @@ async fn perform_snapshot_and_purge(
resources
.bucket_name
resources
.bucket_name
);
);
// Now purge acknowledged messages from the stream
nats_queue
.purge_acknowledged
()
.await
?
;
Ok
(())
Ok
(())
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment