Unverified Commit e3d00b89 authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

feat: Tier-based KV Routing (#8380)


Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
Co-authored-by: default avatarClaude Opus 4.6 (1M context) <noreply@anthropic.com>
parent 7e48f3bd
...@@ -12,6 +12,7 @@ use dynamo_kv_router::indexer::{ ...@@ -12,6 +12,7 @@ use dynamo_kv_router::indexer::{
KvIndexer, KvIndexerInterface, KvIndexerMetrics, ThreadPoolIndexer, KvIndexer, KvIndexerInterface, KvIndexerMetrics, ThreadPoolIndexer,
}; };
use dynamo_kv_router::protocols::{BlockHashOptions, OverlapScores, RouterEvent, WorkerId}; use dynamo_kv_router::protocols::{BlockHashOptions, OverlapScores, RouterEvent, WorkerId};
use dynamo_kv_router::scheduling::TierOverlapBlocks;
use tokio::sync::mpsc; use tokio::sync::mpsc;
use tokio_util::sync::CancellationToken; use tokio_util::sync::CancellationToken;
use uuid::Uuid; use uuid::Uuid;
...@@ -132,7 +133,7 @@ impl KvReplayRouter { ...@@ -132,7 +133,7 @@ impl KvReplayRouter {
let (_worker_config_tx, worker_config_rx) = let (_worker_config_tx, worker_config_rx) =
tokio::sync::watch::channel(workers_with_configs); tokio::sync::watch::channel(workers_with_configs);
let selector = replay_selector(&config); let selector = replay_selector(&config);
let policy = replay_policy(&config, args); let policy = replay_policy(&config);
let scheduler_cancel = CancellationToken::new(); let scheduler_cancel = CancellationToken::new();
let scheduler = Arc::new(dynamo_kv_router::LocalScheduler::new( let scheduler = Arc::new(dynamo_kv_router::LocalScheduler::new(
slots, slots,
...@@ -190,6 +191,21 @@ impl KvReplayRouter { ...@@ -190,6 +191,21 @@ impl KvReplayRouter {
.indexer .indexer
.find_matches_for_request(&request.tokens, None) .find_matches_for_request(&request.tokens, None)
.await?; .await?;
let effective_overlap_blocks = overlaps
.scores
.iter()
.map(|(worker, overlap)| (*worker, *overlap as f64))
.collect();
let effective_cached_tokens = overlaps
.scores
.iter()
.map(|(worker, overlap)| {
(
*worker,
(*overlap as usize) * usize::try_from(self.block_size).unwrap_or(0),
)
})
.collect();
let token_seq = self.config.compute_seq_hashes_for_tracking( let token_seq = self.config.compute_seq_hashes_for_tracking(
&request.tokens, &request.tokens,
self.block_size, self.block_size,
...@@ -197,13 +213,18 @@ impl KvReplayRouter { ...@@ -197,13 +213,18 @@ impl KvReplayRouter {
BlockHashOptions::default(), BlockHashOptions::default(),
None, None,
); );
let tree_sizes: std::collections::HashMap<_, _> =
overlaps.tree_sizes.iter().map(|(k, v)| (*k, *v)).collect();
let response = self let response = self
.scheduler .scheduler
.schedule( .schedule(
Some(uuid.to_string()), Some(uuid.to_string()),
request.tokens.len(), request.tokens.len(),
token_seq, token_seq,
overlaps, TierOverlapBlocks::default(),
effective_overlap_blocks,
effective_cached_tokens,
tree_sizes,
None, None,
true, true,
None, None,
...@@ -256,7 +277,7 @@ impl KvReplayRouter { ...@@ -256,7 +277,7 @@ impl KvReplayRouter {
self.scheduler.get_potential_loads( self.scheduler.get_potential_loads(
None, None,
isl_tokens, isl_tokens,
OverlapScores::default(), std::collections::HashMap::new(),
track_prefill_tokens, track_prefill_tokens,
) )
} }
......
...@@ -117,9 +117,6 @@ pub(crate) fn replay_router_config( ...@@ -117,9 +117,6 @@ pub(crate) fn replay_router_config(
config config
} }
pub(super) fn replay_policy( pub(super) fn replay_policy(config: &KvRouterConfig) -> RouterSchedulingPolicy {
config: &KvRouterConfig, RouterSchedulingPolicy::new(config.router_queue_policy)
args: &MockEngineArgs,
) -> RouterSchedulingPolicy {
RouterSchedulingPolicy::new(config.router_queue_policy, args.block_size)
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment