Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
df91fce2
"lib/bindings/vscode:/vscode.git/clone" did not exist on "31f5ed3ce7db3eef7b2991963644f5fdb17ab063"
Unverified
Commit
df91fce2
authored
Jul 14, 2025
by
Yan Ru Pei
Committed by
GitHub
Jul 14, 2025
Browse files
feat: prefill aware routing (#1895)
parent
ad8ad66b
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
169 additions
and
58 deletions
+169
-58
components/metrics/src/bin/mock_worker.rs
components/metrics/src/bin/mock_worker.rs
+1
-1
components/metrics/src/main.rs
components/metrics/src/main.rs
+1
-1
docs/guides/dynamo_run.md
docs/guides/dynamo_run.md
+1
-1
launch/dynamo-run/src/flags.rs
launch/dynamo-run/src/flags.rs
+2
-2
lib/llm/src/kv_router.rs
lib/llm/src/kv_router.rs
+11
-3
lib/llm/src/kv_router/protocols.rs
lib/llm/src/kv_router/protocols.rs
+1
-1
lib/llm/src/kv_router/scheduler.rs
lib/llm/src/kv_router/scheduler.rs
+39
-42
lib/llm/src/kv_router/sequence.rs
lib/llm/src/kv_router/sequence.rs
+113
-7
No files found.
components/metrics/src/bin/mock_worker.rs
View file @
df91fce2
...
...
@@ -93,7 +93,7 @@ async fn mock_event_publisher(namespace: Namespace) {
let
event
=
KVHitRateEvent
{
worker_id
,
isl_blocks
,
overlap_blocks
,
overlap_blocks
:
overlap_blocks
as
u32
,
};
if
let
Err
(
e
)
=
namespace
.publish
(
KV_HIT_RATE_SUBJECT
,
&
event
)
.await
{
...
...
components/metrics/src/main.rs
View file @
df91fce2
...
...
@@ -199,7 +199,7 @@ async fn app(runtime: Runtime) -> Result<()> {
&
config_clone
,
event
.worker_id
,
event
.isl_blocks
,
event
.overlap_blocks
,
event
.overlap_blocks
as
usize
,
);
}
Err
(
e
)
=>
{
...
...
docs/guides/dynamo_run.md
View file @
df91fce2
...
...
@@ -8,7 +8,7 @@ It supports these engines: mistralrs, llamacpp, sglang, vllm, and tensorrt-llm.
Usage:
```
dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=echo_core|echo_full|mistralrs|llamacpp|sglang|vllm|dyn [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--context-length=N] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0] [--extra-engine-args=args.json] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=1.0] [--router-temperature=0.
5
] [--use-kv-events=true] [--verbosity (-v|-vv)]
dynamo-run in=[http|text|dyn://<path>|batch:<folder>] out=echo_core|echo_full|mistralrs|llamacpp|sglang|vllm|dyn [--http-port 8080] [--model-path <path>] [--model-name <served-model-name>] [--model-config <hf-repo>] [--tensor-parallel-size=1] [--context-length=N] [--num-nodes=1] [--node-rank=0] [--leader-addr=127.0.0.1:9876] [--base-gpu-id=0] [--extra-engine-args=args.json] [--router-mode random|round-robin|kv] [--kv-overlap-score-weight=1.0] [--router-temperature=0.
0
] [--use-kv-events=true] [--verbosity (-v|-vv)]
```
Example:
`dynamo run Qwen/Qwen3-0.6B`
...
...
launch/dynamo-run/src/flags.rs
View file @
df91fce2
...
...
@@ -118,13 +118,13 @@ pub struct Flags {
pub
max_num_batched_tokens
:
Option
<
u32
>
,
/// KV Router: Weight for overlap score in worker selection.
/// Higher values prioritize KV cache reuse. Default:
2
.0
/// Higher values prioritize KV cache reuse. Default:
1
.0
#[arg(long)]
pub
kv_overlap_score_weight
:
Option
<
f64
>
,
/// KV Router: Temperature for worker sampling via softmax.
/// Higher values promote more randomness, and 0 fallbacks to deterministic.
/// Default: 0.
5
/// Default: 0.
0
#[arg(long)]
pub
router_temperature
:
Option
<
f64
>
,
...
...
lib/llm/src/kv_router.rs
View file @
df91fce2
...
...
@@ -78,7 +78,7 @@ impl Default for KvRouterConfig {
fn
default
()
->
Self
{
Self
{
overlap_score_weight
:
1.0
,
router_temperature
:
0.
5
,
router_temperature
:
0.
0
,
use_kv_events
:
true
,
max_num_batched_tokens
:
8192
,
}
...
...
@@ -337,6 +337,7 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
let
mut
accumulated_tokens
=
Vec
::
new
();
let
mut
total_output_length
=
0u
size
;
let
mut
last_block_index
=
(
isl
.saturating_sub
(
1
))
/
block_size
;
let
mut
first_push_done
=
false
;
while
let
Some
(
item
)
=
response_stream
.next
()
.await
{
// Track tokens if they exist in the response
...
...
@@ -353,12 +354,19 @@ impl AsyncEngine<SingleIn<PreprocessedRequest>, ManyOut<Annotated<LLMEngineOutpu
accumulated_tokens
.extend_from_slice
(
&
output
.token_ids
);
total_output_length
+=
output
.token_ids
.len
();
// Check if we've moved to a new block
// Always push for the first generated token (to mark prefill done)
// or when we've moved to a new block
let
current_block_index
=
(
isl
+
total_output_length
)
.saturating_sub
(
1
)
/
block_size
;
if
current_block_index
>
last_block_index
{
let
should_push
=
(
!
first_push_done
&&
total_output_length
>=
1
)
||
(
first_push_done
&&
current_block_index
>
last_block_index
);
if
should_push
{
chooser
.push
(
&
request_id
,
&
accumulated_tokens
)
.await
;
accumulated_tokens
.clear
();
last_block_index
=
current_block_index
;
if
!
first_push_done
{
first_push_done
=
true
;
}
}
yield
item
;
...
...
lib/llm/src/kv_router/protocols.rs
View file @
df91fce2
...
...
@@ -36,7 +36,7 @@ pub struct WorkerSelectionResult {
/// The number of blocks that the selected worker may already have cached.
/// This is not a guarantee, but an estimate.
pub
overlap_blocks
:
u
size
,
pub
overlap_blocks
:
u
32
,
}
#[derive(Debug,
Clone,
Serialize,
Deserialize,
Default,
PartialEq)]
...
...
lib/llm/src/kv_router/scheduler.rs
View file @
df91fce2
...
...
@@ -25,7 +25,6 @@ use tokio::sync::Mutex;
use
super
::
protocols
::
WorkerSelectionResult
;
use
super
::
WorkerSelector
;
use
crate
::
kv_router
::
indexer
::
OverlapScores
;
use
crate
::
kv_router
::
indexer
::
WorkerId
;
use
crate
::
kv_router
::
protocols
::
LoadMetrics
;
use
crate
::
kv_router
::
scoring
::
ProcessedEndpoints
;
use
crate
::
kv_router
::
sequence
::
ActiveSequencesMultiWorker
;
...
...
@@ -37,7 +36,7 @@ use crate::tokens::TokenBlockSequence;
pub
struct
KVHitRateEvent
{
pub
worker_id
:
i64
,
pub
isl_blocks
:
usize
,
pub
overlap_blocks
:
u
size
,
pub
overlap_blocks
:
u
32
,
}
#[derive(Debug,
thiserror::Error)]
...
...
@@ -79,13 +78,15 @@ impl Endpoint {
#[derive(Debug)]
pub
struct
SchedulingResponse
{
pub
best_worker_id
:
i64
,
pub
overlap_blocks
:
u32
,
// Add this field
pub
endpoints_changed
:
Option
<
Vec
<
i64
>>
,
}
pub
struct
SchedulingRequest
{
pub
isl_tokens
:
usize
,
pub
overlap
:
OverlapScores
,
pub
overlap
s
:
OverlapScores
,
pub
potential_blocks
:
HashMap
<
i64
,
usize
>
,
pub
potential_tokens
:
HashMap
<
i64
,
usize
>
,
resp_tx
:
tokio
::
sync
::
oneshot
::
Sender
<
SchedulingResponse
>
,
}
...
...
@@ -174,6 +175,7 @@ impl KvScheduler {
let
response
=
SchedulingResponse
{
best_worker_id
:
selection
.worker_id
,
overlap_blocks
:
selection
.overlap_blocks
,
endpoints_changed
:
pending_endpoint_update
.take
(),
};
request
.respond
(
response
);
...
...
@@ -207,18 +209,20 @@ impl KvScheduler {
isl_tokens
:
usize
,
block_size
:
u32
,
tokens
:
&
[
u32
],
overlap
:
OverlapScores
,
overlap
s
:
OverlapScores
,
)
->
Result
<
i64
,
KvSchedulerError
>
{
let
mut
sequences
=
self
.sequences
.lock
()
.await
;
let
token_sequence
=
TokenBlockSequence
::
from_slice
(
tokens
,
block_size
,
None
);
let
potential_blocks
=
sequences
.potential_blocks
(
token_sequence
);
let
(
potential_blocks
,
potential_tokens
)
=
sequences
.potential_blocks_and_tokens
(
token_sequence
,
overlaps
.clone
());
let
(
resp_tx
,
resp_rx
)
=
tokio
::
sync
::
oneshot
::
channel
();
let
request
=
SchedulingRequest
{
isl_tokens
,
overlap
,
overlap
s
,
potential_blocks
,
potential_tokens
,
resp_tx
,
};
self
.request_tx
...
...
@@ -234,31 +238,16 @@ impl KvScheduler {
}
let
token_sequence
=
TokenBlockSequence
::
from_slice
(
tokens
,
block_size
,
None
);
sequences
.add_request
(
request_id
,
token_sequence
,
response
.best_worker_id
);
sequences
.add_request
(
request_id
,
token_sequence
,
response
.overlap_blocks
,
response
.best_worker_id
,
);
Ok
(
response
.best_worker_id
)
}
/// Find the potential blocks for each worker if the sequence were routed there
pub
async
fn
potential_blocks
(
&
self
,
token_sequence
:
TokenBlockSequence
,
)
->
HashMap
<
i64
,
usize
>
{
let
sequences
=
self
.sequences
.lock
()
.await
;
sequences
.potential_blocks
(
token_sequence
)
}
/// Add a new request with its initial tokens to a specific worker
pub
async
fn
add_request
(
&
self
,
request_id
:
String
,
token_sequence
:
TokenBlockSequence
,
worker_id
:
WorkerId
,
)
{
let
mut
sequences
=
self
.sequences
.lock
()
.await
;
sequences
.add_request
(
request_id
,
token_sequence
,
worker_id
)
}
/// Push tokens to a specific request's sequence
pub
async
fn
push
(
&
self
,
request_id
:
&
String
,
tokens
:
&
[
u32
])
{
let
mut
sequences
=
self
.sequences
.lock
()
.await
;
...
...
@@ -370,34 +359,47 @@ impl WorkerSelector for DefaultWorkerSelector {
return
Err
(
KvSchedulerError
::
NoEndpoints
);
}
let
request_blocks
=
request
.isl_tokens
.div_ceil
(
block_size
as
usize
);
let
isl
=
request
.isl_tokens
;
let
request_blocks
=
isl
.div_ceil
(
block_size
as
usize
);
let
overlaps
=
&
request
.overlaps.scores
;
// active blocks for decoding
let
potential_active_blocks
=
&
request
.potential_blocks
;
// active tokens in the batch (processed by the linear layers), mostly prefill tokens
let
potential_active_tokens
=
&
request
.potential_tokens
;
let
mut
worker_logits
=
HashMap
::
new
();
let
mut
max_logit
=
f64
::
NEG_INFINITY
;
// Calculate logits for each worker
for
(
worker_id
,
_
)
in
workers
.endpoints
.iter
()
{
let
cached_blocks
=
request
.overlap.scores
.get
(
worker_id
)
.copied
()
.unwrap_or
(
0
)
as
f64
;
let
prefill_blocks
=
request_blocks
as
f64
-
cached_blocks
;
// this is the number of tokens each worker would have if the request were scheduled there
let
potential_tokens
=
*
potential_active_tokens
.get
(
worker_id
)
.unwrap_or_else
(||
{
tracing
::
warn!
(
"assuming {isl} tokens for {worker_id}, as the endpoint does not exist yet"
);
&
isl
})
as
f64
;
// this is the number of blocks each worker would have if the request were scheduled there
let
potential_blocks
=
*
potential_active_blocks
.get
(
worker_id
)
.unwrap_or_else
(||
{
tracing
::
warn!
(
"assuming
0
decoding blocks for {worker_id}, as the
load metrics
endpoint does not exist yet"
);
&
0
{
tracing
::
warn!
(
"assuming
{request_blocks}
decoding blocks for {worker_id}, as the endpoint does not exist yet"
);
&
request_blocks
})
as
f64
;
let
potential_prefill_blocks
=
potential_tokens
/
(
block_size
as
f64
);
// Calculate logit (lower is better)
let
logit
=
self
.kv_router_config.overlap_score_weight
*
prefill_blocks
+
potential_blocks
;
let
logit
=
self
.kv_router_config.overlap_score_weight
*
potential_prefill_blocks
+
potential_blocks
;
max_logit
=
max_logit
.max
(
logit
);
worker_logits
.insert
(
*
worker_id
,
logit
);
tracing
::
info!
(
"Formula for {worker_id}: {logit:.3} = {:.1} * {prefill_blocks:.3} + {potential_blocks:.3} (cached_blocks: {
cached_blocks
})"
,
"Formula for {worker_id}: {logit:.3} = {:.1} * {
potential_
prefill_blocks:.3} + {potential_blocks:.3} (cached_blocks: {})"
,
self
.kv_router_config.overlap_score_weight
,
cached_blocks
=
cached_blocks
overlaps
.get
(
worker_id
)
.unwrap_or
(
&
0
),
);
}
...
...
@@ -412,12 +414,7 @@ impl WorkerSelector for DefaultWorkerSelector {
let
temperature
=
self
.kv_router_config.router_temperature
;
let
best_worker_id
=
softmax_sample
(
&
worker_logits
,
temperature
);
let
overlap_blocks
=
request
.overlap
.scores
.get
(
&
best_worker_id
)
.copied
()
.unwrap_or
(
0
)
as
usize
;
let
overlap_blocks
=
overlaps
.get
(
&
best_worker_id
)
.copied
()
.unwrap_or
(
0
);
let
best_logit
=
worker_logits
[
&
best_worker_id
];
tracing
::
info!
(
...
...
lib/llm/src/kv_router/sequence.rs
View file @
df91fce2
...
...
@@ -34,6 +34,7 @@
//! Each block is identified by a hash of its contents, allowing for deduplication when multiple
//! requests share common prefixes (e.g., system prompts, few-shot examples).
use
crate
::
kv_router
::
indexer
::
OverlapScores
;
use
crate
::
kv_router
::
indexer
::
WorkerId
;
use
crate
::
tokens
::
blocks
::
UniqueBlock
;
use
crate
::
tokens
::
TokenBlockSequence
;
...
...
@@ -76,6 +77,8 @@ pub struct ActiveSequences {
partial_blocks
:
HashMap
<
RequestId
,
UniqueBlock
>
,
prefill_tokens
:
HashMap
<
RequestId
,
usize
>
,
unique_blocks
:
HashMap
<
UniqueBlock
,
HashSet
<
RequestId
>>
,
#[getter(copy)]
...
...
@@ -83,6 +86,9 @@ pub struct ActiveSequences {
#[getter(copy)]
active_blocks
:
usize
,
#[getter(copy)]
active_tokens
:
usize
,
}
impl
ActiveSequences
{
...
...
@@ -94,9 +100,11 @@ impl ActiveSequences {
Self
{
active_seqs
:
HashMap
::
new
(),
partial_blocks
:
HashMap
::
new
(),
prefill_tokens
:
HashMap
::
new
(),
unique_blocks
:
HashMap
::
new
(),
block_size
,
active_blocks
:
0
,
active_tokens
:
0
,
}
}
...
...
@@ -135,7 +143,13 @@ impl ActiveSequences {
&
mut
self
,
request_id
:
RequestId
,
token_sequence
:
TokenBlockSequence
,
overlap
:
u32
,
)
->
usize
{
let
prefill_tokens
=
self
.new_tokens
(
&
token_sequence
,
overlap
);
self
.prefill_tokens
.insert
(
request_id
.clone
(),
prefill_tokens
);
self
.active_tokens
+=
prefill_tokens
;
let
blocks
=
create_unique_blocks_from_sequence
(
&
token_sequence
,
None
,
self
.block_size
);
for
block
in
&
blocks
{
...
...
@@ -147,6 +161,25 @@ impl ActiveSequences {
self
.active_blocks
}
pub
fn
new_tokens
(
&
self
,
token_sequence
:
&
TokenBlockSequence
,
overlap
:
u32
)
->
usize
{
let
input_tokens
=
token_sequence
.total_tokens
();
input_tokens
.checked_sub
((
overlap
as
usize
)
*
self
.block_size
)
.unwrap_or_else
(||
{
panic!
(
"prefill_tokens < 0 with overlap {overlap} and ISL {input_tokens}"
)
})
}
pub
fn
potential_blocks_and_tokens
(
&
self
,
token_sequence
:
&
TokenBlockSequence
,
overlap
:
u32
,
)
->
(
usize
,
usize
)
{
let
potential_blocks
=
self
.new_blocks
(
token_sequence
)
+
self
.active_blocks
;
let
potential_tokens
=
self
.new_tokens
(
token_sequence
,
overlap
)
+
self
.active_tokens
;
(
potential_blocks
,
potential_tokens
)
}
/// Match a request against existing blocks and return the number of new blocks that would be added
pub
fn
new_blocks
(
&
self
,
token_sequence
:
&
TokenBlockSequence
)
->
usize
{
let
blocks
=
create_unique_blocks_from_sequence
(
token_sequence
,
None
,
self
.block_size
);
...
...
@@ -165,6 +198,12 @@ impl ActiveSequences {
/// Free all blocks associated with a request
pub
fn
free
(
&
mut
self
,
request_id
:
&
RequestId
)
->
usize
{
// decoding has one active token
self
.active_tokens
=
self
.active_tokens
.checked_sub
(
self
.prefill_tokens
.remove
(
request_id
)
.unwrap_or
(
1
))
.expect
(
"active_tokens < 0"
);
let
Some
(
token_seq
)
=
self
.active_seqs
.get
(
request_id
)
else
{
tracing
::
warn!
(
"Trying to free free non-existent request {request_id}"
);
return
0
;
...
...
@@ -187,6 +226,16 @@ impl ActiveSequences {
/// Push tokens to a specific request's sequence
pub
fn
push
(
&
mut
self
,
request_id
:
&
RequestId
,
tokens
:
&
[
u32
])
->
usize
{
if
let
Some
(
prefill_tokens
)
=
self
.prefill_tokens
.get
(
request_id
)
.cloned
()
{
self
.prefill_tokens
.remove
(
request_id
);
// decoding has one active token
self
.active_tokens
=
self
.active_tokens
.checked_sub
(
prefill_tokens
)
.expect
(
"active_tokens < 0"
)
+
1
;
};
// Collect operations to perform after releasing the borrow
let
mut
blocks_to_remove
=
Vec
::
new
();
let
mut
blocks_to_add
=
Vec
::
new
();
...
...
@@ -239,6 +288,7 @@ enum UpdateSequences {
AddRequest
{
request_id
:
RequestId
,
token_sequence
:
TokenBlockSequence
,
overlap
:
u32
,
},
Free
{
request_id
:
RequestId
,
...
...
@@ -255,6 +305,11 @@ enum UpdateSequences {
token_sequence
:
Arc
<
TokenBlockSequence
>
,
resp_tx
:
mpsc
::
SyncSender
<
usize
>
,
},
PotentialBlocksAndTokens
{
token_sequence
:
Arc
<
TokenBlockSequence
>
,
overlap
:
u32
,
resp_tx
:
mpsc
::
SyncSender
<
(
usize
,
usize
)
>
,
},
ActiveBlocks
{
resp_tx
:
mpsc
::
SyncSender
<
usize
>
,
},
...
...
@@ -302,8 +357,9 @@ impl ActiveSequencesMultiWorker {
UpdateSequences
::
AddRequest
{
request_id
,
token_sequence
,
overlap
,
}
=>
{
active_sequences
.add_request
(
request_id
,
token_sequence
);
active_sequences
.add_request
(
request_id
,
token_sequence
,
overlap
);
}
UpdateSequences
::
Free
{
request_id
}
=>
{
active_sequences
.free
(
&
request_id
);
...
...
@@ -325,6 +381,15 @@ impl ActiveSequencesMultiWorker {
let
potential_blocks
=
active_sequences
.potential_blocks
(
&
token_sequence
);
let
_
=
resp_tx
.send
(
potential_blocks
);
}
UpdateSequences
::
PotentialBlocksAndTokens
{
token_sequence
,
overlap
,
resp_tx
,
}
=>
{
let
potential_tokens
=
active_sequences
.potential_blocks_and_tokens
(
&
token_sequence
,
overlap
);
let
_
=
resp_tx
.send
(
potential_tokens
);
}
UpdateSequences
::
ActiveBlocks
{
resp_tx
}
=>
{
let
active_blocks
=
active_sequences
.active_blocks
();
let
_
=
resp_tx
.send
(
active_blocks
);
...
...
@@ -379,6 +444,7 @@ impl ActiveSequencesMultiWorker {
&
mut
self
,
request_id
:
RequestId
,
token_sequence
:
TokenBlockSequence
,
overlap
:
u32
,
worker_id
:
WorkerId
,
)
{
if
!
self
.senders
.contains_key
(
&
worker_id
)
{
...
...
@@ -391,6 +457,7 @@ impl ActiveSequencesMultiWorker {
.send
(
UpdateSequences
::
AddRequest
{
request_id
,
token_sequence
,
overlap
,
})
.expect
(
"Failed to send add_request command to worker"
);
}
...
...
@@ -482,6 +549,43 @@ impl ActiveSequencesMultiWorker {
})
}
/// Query all workers for the potential tokens (new + active) that would be used by a token sequence with overlap
pub
fn
potential_blocks_and_tokens
(
&
self
,
token_sequence
:
TokenBlockSequence
,
overlaps
:
OverlapScores
,
)
->
(
HashMap
<
WorkerId
,
usize
>
,
HashMap
<
WorkerId
,
usize
>
)
{
let
mut
potential_blocks
=
HashMap
::
new
();
let
mut
potential_tokens
=
HashMap
::
new
();
let
token_sequence_shared
=
Arc
::
new
(
token_sequence
);
let
mut
receivers
=
Vec
::
new
();
// Send queries to all workers in parallel
for
(
worker_id
,
sender
)
in
&
self
.senders
{
let
(
resp_tx
,
resp_rx
)
=
mpsc
::
sync_channel
(
0
);
receivers
.push
((
worker_id
,
resp_rx
));
sender
.send
(
UpdateSequences
::
PotentialBlocksAndTokens
{
token_sequence
:
token_sequence_shared
.clone
(),
overlap
:
overlaps
.scores
.get
(
worker_id
)
.copied
()
.unwrap_or
(
0
),
resp_tx
,
})
.expect
(
"Failed to send potential_tokens command to worker"
);
}
// Collect results from all workers
for
(
worker_id
,
receiver
)
in
receivers
{
let
(
blocks
,
tokens
)
=
receiver
.recv_timeout
(
Duration
::
from_secs
(
1
))
.expect
(
"Failed to receive response from worker"
);
potential_blocks
.insert
(
*
worker_id
,
blocks
);
potential_tokens
.insert
(
*
worker_id
,
tokens
);
}
(
potential_blocks
,
potential_tokens
)
}
/// Query all workers for their current number of active blocks
pub
fn
active_blocks
(
&
self
)
->
HashMap
<
WorkerId
,
usize
>
{
self
.query_workers
(
None
,
|
_
,
resp_tx
|
UpdateSequences
::
ActiveBlocks
{
resp_tx
})
...
...
@@ -515,14 +619,15 @@ mod tests {
|
tokens
:
Vec
<
u32
>
|
Tokens
::
from
(
tokens
)
.into_sequence
(
block_size
as
u32
,
None
);
// Step 1: Add request 0 with tokens [0, 1, 2], then push 3 and 4
manager
.add_request
(
"0"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
]));
manager
.add_request
(
"0"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
])
,
0
);
manager
.push
(
&
"0"
.to_string
(),
&
[
3
,
4
]);
// Push both tokens at once
assert_eq!
(
manager
.active_tokens
(),
1
);
assert_eq!
(
manager
.active_blocks
(),
2
);
assert_eq!
(
manager
.partial_blocks
.len
(),
1
);
// Step 2: Add request 1 with tokens [0, 1, 2, 3, 4, 5, 6]
manager
.add_request
(
"1"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
,
3
,
4
,
5
,
6
]));
manager
.add_request
(
"1"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
,
3
,
4
,
5
,
6
]),
1
);
assert_eq!
(
manager
.active_tokens
(),
1
+
3
);
assert_eq!
(
manager
.active_blocks
(),
3
);
// Check that only one key is FullBlock with both requests sharing it
...
...
@@ -551,6 +656,7 @@ mod tests {
// Step 4: Free request 0
manager
.free
(
&
"0"
.to_string
());
assert_eq!
(
manager
.active_tokens
(),
0
);
assert_eq!
(
manager
.active_blocks
(),
0
);
assert_eq!
(
manager
.unique_blocks
.len
(),
0
);
assert_eq!
(
manager
.partial_blocks
.len
(),
0
);
...
...
@@ -566,14 +672,14 @@ mod tests {
|
tokens
:
Vec
<
u32
>
|
Tokens
::
from
(
tokens
)
.into_sequence
(
block_size
as
u32
,
None
);
// Send request [0, 1, 2, 3] to worker 0
manager
.add_request
(
"req0"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
,
3
]),
0
);
manager
.add_request
(
"req0"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
,
3
]),
0
,
0
);
// Send request [0, 1, 2] to worker 1, then push 3 and 4
manager
.add_request
(
"req1"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
]),
1
);
manager
.add_request
(
"req1"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
]),
0
,
1
);
manager
.push
(
&
"req1"
.to_string
(),
&
[
3
,
4
]);
// Push both tokens at once
// Send request [0, 1, 2] to worker 2
manager
.add_request
(
"req2"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
]),
2
);
manager
.add_request
(
"req2"
.to_string
(),
to_sequence
(
vec!
[
0
,
1
,
2
]),
0
,
2
);
// Check new_blocks on tokens [0, 1, 2, 3, 4]
let
new_blocks_map
=
manager
.new_blocks
(
to_sequence
(
vec!
[
0
,
1
,
2
,
3
,
4
]));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment