Unverified Commit fc92fc18 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: clean ups in kv-router (#5771)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 842f0f15
...@@ -700,6 +700,15 @@ dependencies = [ ...@@ -700,6 +700,15 @@ dependencies = [
"objc2", "objc2",
] ]
[[package]]
name = "bs58"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf88ba1141d185c399bee5288d850d63b8369520c1eafc32a0430b5b6c287bf4"
dependencies = [
"tinyvec",
]
[[package]] [[package]]
name = "bs62" name = "bs62"
version = "0.1.4" version = "0.1.4"
...@@ -1598,6 +1607,24 @@ dependencies = [ ...@@ -1598,6 +1607,24 @@ dependencies = [
"anyhow", "anyhow",
] ]
[[package]]
name = "dynamo-kv-router"
version = "0.9.0"
dependencies = [
"anyhow",
"async-trait",
"dynamo-runtime",
"dynamo-tokens",
"prometheus",
"rand 0.9.2",
"serde",
"thiserror 2.0.17",
"tokio",
"tokio-util",
"tracing",
"xxhash-rust",
]
[[package]] [[package]]
name = "dynamo-llm" name = "dynamo-llm"
version = "0.9.0" version = "0.9.0"
...@@ -1626,9 +1653,11 @@ dependencies = [ ...@@ -1626,9 +1653,11 @@ dependencies = [
"derive_builder", "derive_builder",
"dialoguer", "dialoguer",
"dynamo-async-openai", "dynamo-async-openai",
"dynamo-kv-router",
"dynamo-memory", "dynamo-memory",
"dynamo-parsers", "dynamo-parsers",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens",
"either", "either",
"erased-serde", "erased-serde",
"etcd-client", "etcd-client",
...@@ -1833,6 +1862,20 @@ dependencies = [ ...@@ -1833,6 +1862,20 @@ dependencies = [
"zmq", "zmq",
] ]
[[package]]
name = "dynamo-tokens"
version = "0.9.0"
dependencies = [
"bs58",
"bytemuck",
"dashmap 6.1.0",
"derive-getters",
"serde",
"thiserror 2.0.17",
"uuid",
"xxhash-rust",
]
[[package]] [[package]]
name = "ed25519" name = "ed25519"
version = "2.2.3" version = "2.2.3"
......
...@@ -360,7 +360,7 @@ impl KvEventPublisher { ...@@ -360,7 +360,7 @@ impl KvEventPublisher {
#[pyclass] #[pyclass]
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct OverlapScores { pub(crate) struct OverlapScores {
inner: llm_rs::kv_router::indexer::OverlapScores, inner: llm_rs::kv_router::protocols::OverlapScores,
} }
#[pymethods] #[pymethods]
...@@ -386,7 +386,7 @@ enum RadixTreeRequest { ...@@ -386,7 +386,7 @@ enum RadixTreeRequest {
FindMatches { FindMatches {
local_block_hashes: Vec<llm_rs::kv_router::protocols::LocalBlockHash>, local_block_hashes: Vec<llm_rs::kv_router::protocols::LocalBlockHash>,
early_exit: bool, early_exit: bool,
response_tx: mpsc::SyncSender<llm_rs::kv_router::indexer::OverlapScores>, response_tx: mpsc::SyncSender<llm_rs::kv_router::protocols::OverlapScores>,
}, },
ApplyEvent { ApplyEvent {
worker_id: WorkerId, worker_id: WorkerId,
...@@ -402,7 +402,7 @@ enum RadixTreeRequest { ...@@ -402,7 +402,7 @@ enum RadixTreeRequest {
response_tx: mpsc::SyncSender<()>, response_tx: mpsc::SyncSender<()>,
}, },
DumpTreeAsEvents { DumpTreeAsEvents {
response_tx: mpsc::SyncSender<Vec<llm_rs::kv_router::indexer::RouterEvent>>, response_tx: mpsc::SyncSender<Vec<llm_rs::kv_router::protocols::RouterEvent>>,
}, },
Shutdown, Shutdown,
} }
...@@ -616,8 +616,10 @@ impl RadixTree { ...@@ -616,8 +616,10 @@ impl RadixTree {
>(&kv_cache_event_bytes) >(&kv_cache_event_bytes)
{ {
Ok(kv_cache_event) => { Ok(kv_cache_event) => {
let router_event = let router_event = llm_rs::kv_router::protocols::RouterEvent::new(
llm_rs::kv_router::indexer::RouterEvent::new(worker_id, kv_cache_event); worker_id,
kv_cache_event,
);
match radix_tree.apply_event(router_event) { match radix_tree.apply_event(router_event) {
Ok(_) => Ok(()), Ok(_) => Ok(()),
Err(e) => Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>( Err(e) => Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(
...@@ -898,7 +900,7 @@ impl KvRecorder { ...@@ -898,7 +900,7 @@ impl KvRecorder {
// Spawn a task to forward events to the recorder // Spawn a task to forward events to the recorder
tokio::spawn(async move { tokio::spawn(async move {
while let Some(event) = kv_events_rx.next().await { while let Some(event) = kv_events_rx.next().await {
let event: llm_rs::kv_router::indexer::RouterEvent = let event: llm_rs::kv_router::protocols::RouterEvent =
serde_json::from_slice(&event.payload).unwrap(); serde_json::from_slice(&event.payload).unwrap();
tracing::debug!("KvRecorder received kv event: {:?}", event); tracing::debug!("KvRecorder received kv event: {:?}", event);
if let Err(e) = event_tx.send(event).await { if let Err(e) = event_tx.send(event).await {
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Flat HashMap baseline for benchmarking comparison with RadixTree.
//!
//! This module provides a `FlatHashMap` structure that has full feature parity with `RadixTree`
//! but uses flat HashMaps instead of a tree structure. This isolates the overhead of
//! tree traversal (pointer chasing) from pure HashMap operations.
//!
//! The `find_matches` API matches RadixTree exactly: it takes `LocalBlockHash` values
//! and internally computes the cumulative sequence hashes for lookup.
use std::collections::{HashMap, HashSet};
use crate::protocols::{
ExternalSequenceBlockHash, KvCacheEvent, KvCacheEventData, KvCacheStoreData,
KvCacheStoredBlockData, LocalBlockHash, OverlapScores, RouterEvent, WorkerId, WorkerWithDpRank,
compute_seq_hash_for_block,
};
/// A flat HashMap-based structure for KV cache indexing.
///
/// Unlike RadixTree which uses a tree of nodes connected by pointers,
/// FlatHashMap uses bidirectional HashMaps. This provides the same
/// find_matches semantics but with better cache locality.
///
/// # Structure
///
/// - `block_to_workers`: Maps ExternalSequenceBlockHash -> Set of workers that have this block.
/// Used for efficient find_matches lookups.
/// - `worker_to_blocks`: Maps Worker -> Set of ExternalSequenceBlockHash they have.
/// Used for remove operations and current_size.
pub struct FlatHashMap {
/// Primary index: block -> workers (for find_matches)
block_to_workers: HashMap<ExternalSequenceBlockHash, HashSet<WorkerWithDpRank>>,
/// Secondary index: worker -> blocks (for remove and current_size)
worker_to_blocks: HashMap<WorkerWithDpRank, HashSet<ExternalSequenceBlockHash>>,
}
impl FlatHashMap {
/// Create a new empty FlatHashMap.
pub fn new() -> Self {
Self {
block_to_workers: HashMap::new(),
worker_to_blocks: HashMap::new(),
}
}
/// Store blocks for a worker.
///
/// Updates both indexes for each block.
pub fn store(&mut self, worker: WorkerWithDpRank, block_hashes: &[ExternalSequenceBlockHash]) {
let worker_blocks = self.worker_to_blocks.entry(worker).or_default();
for &block_hash in block_hashes {
// Add to block -> workers index
self.block_to_workers
.entry(block_hash)
.or_default()
.insert(worker);
// Add to worker -> blocks index
worker_blocks.insert(block_hash);
}
}
/// Remove blocks for a worker.
///
/// Updates both indexes for each block.
pub fn remove(&mut self, worker: WorkerWithDpRank, block_hashes: &[ExternalSequenceBlockHash]) {
let Some(worker_blocks) = self.worker_to_blocks.get_mut(&worker) else {
return;
};
for &block_hash in block_hashes {
// Remove from worker -> blocks index
worker_blocks.remove(&block_hash);
// Remove from block -> workers index
if let Some(workers) = self.block_to_workers.get_mut(&block_hash) {
workers.remove(&worker);
if workers.is_empty() {
self.block_to_workers.remove(&block_hash);
}
}
}
// Clean up empty worker entry
if worker_blocks.is_empty() {
self.worker_to_blocks.remove(&worker);
}
}
/// Find matches for a sequence of local block hashes.
///
/// This has the same signature as `RadixTree::find_matches`: it takes `LocalBlockHash`
/// values and internally computes the cumulative sequence hashes for lookup.
///
/// Returns OverlapScores showing which workers have matching blocks.
/// Stops at first non-match (same semantics as RadixTree).
///
/// # Algorithm
///
/// 1. Compute cumulative sequence hashes from local block hashes
/// 2. For each sequence hash:
/// - Look up which workers have this block
/// - Intersect with previously matching workers (in place)
/// - Track depth for scoring
/// - Stop if no workers remain
///
/// This is O(depth) HashMap lookups + O(num_workers) set operations per level.
pub fn find_matches(&self, sequence: Vec<LocalBlockHash>, early_exit: bool) -> OverlapScores {
let mut scores = OverlapScores::new();
if sequence.is_empty() {
return scores;
}
// Compute cumulative sequence hashes from local block hashes
let seq_hashes = compute_seq_hash_for_block(&sequence);
// Track active workers and their match depth
// Workers drop out when they miss a block; their final score is the depth they reached
let mut active_workers: Option<HashSet<WorkerWithDpRank>> = None;
let mut depth = 0u32;
for seq_hash in seq_hashes {
let block_hash = ExternalSequenceBlockHash(seq_hash);
// Look up workers that have this block
let Some(workers) = self.block_to_workers.get(&block_hash) else {
break; // No workers have this block, stop
};
// Intersect with previously active workers (or initialize on first block)
match &mut active_workers {
None => {
// First block: initialize with workers that have it
active_workers = Some(workers.clone());
}
Some(active) => {
// Record score for workers about to drop out (they matched up to current depth)
for &worker in active.iter() {
if !workers.contains(&worker) {
scores.scores.insert(worker, depth);
}
}
// Keep only workers that have this block (in-place, no allocation)
active.retain(|w| workers.contains(w));
}
}
depth += 1;
let active = active_workers.as_ref().unwrap();
if active.is_empty() {
break;
}
// Early exit if only one worker matches
if early_exit && active.len() == 1 {
break;
}
}
// Record final scores for workers that matched all blocks (or until early exit)
if let Some(active) = active_workers {
for worker in active {
scores.scores.insert(worker, depth);
}
}
// Populate tree sizes for workers with scores
for &worker in scores.scores.keys() {
if let Some(blocks) = self.worker_to_blocks.get(&worker) {
scores.tree_sizes.insert(worker, blocks.len());
}
}
scores
}
/// Apply a RouterEvent (for API compatibility with RadixTree).
pub fn apply_event(&mut self, event: RouterEvent) {
let worker = WorkerWithDpRank::new(event.worker_id, event.event.dp_rank);
match event.event.data {
KvCacheEventData::Stored(store_data) => {
let hashes: Vec<_> = store_data.blocks.iter().map(|b| b.block_hash).collect();
self.store(worker, &hashes);
}
KvCacheEventData::Removed(remove_data) => {
self.remove(worker, &remove_data.block_hashes);
}
KvCacheEventData::Cleared => {
self.clear_all_blocks(worker.worker_id);
}
}
}
/// Helper function to remove or clear blocks for a worker.
/// If `keep_worker` is true, the worker remains in lookup with empty blocks.
/// If `keep_worker` is false, the worker is completely removed from lookup.
fn remove_or_clear_worker_blocks(&mut self, worker_id: WorkerId, keep_worker: bool) {
// Collect all WorkerWithDpRank keys that match this worker_id
let workers: Vec<WorkerWithDpRank> = self
.worker_to_blocks
.keys()
.filter(|w| w.worker_id == worker_id)
.copied()
.collect();
for worker in workers {
if let Some(blocks) = self.worker_to_blocks.remove(&worker) {
for block_hash in blocks {
if let Some(workers_set) = self.block_to_workers.get_mut(&block_hash) {
workers_set.remove(&worker);
if workers_set.is_empty() {
self.block_to_workers.remove(&block_hash);
}
}
}
if keep_worker {
// Re-insert worker with empty blocks set to keep it tracked
self.worker_to_blocks.insert(worker, HashSet::new());
}
}
}
}
/// Remove a worker and all their blocks from the index.
pub fn remove_worker(&mut self, worker_id: WorkerId) {
self.remove_or_clear_worker_blocks(worker_id, false);
}
/// Clear all blocks for a worker but keep the worker tracked.
pub fn clear_all_blocks(&mut self, worker_id: WorkerId) {
self.remove_or_clear_worker_blocks(worker_id, true);
}
/// Get all worker IDs currently tracked in the index.
/// Returns unique worker_ids sorted (ignoring dp_rank differences).
pub fn get_workers(&self) -> Vec<WorkerId> {
let mut worker_ids: Vec<WorkerId> = self
.worker_to_blocks
.keys()
.map(|w| w.worker_id)
.collect::<HashSet<_>>()
.into_iter()
.collect();
worker_ids.sort_unstable();
worker_ids
}
/// Dump the index as a series of RouterEvents that can reconstruct the state.
/// For API compatibility with RadixTree.
pub fn dump_tree_as_events(&self) -> Vec<RouterEvent> {
let mut events = Vec::new();
let mut event_id = 0u64;
for (&worker, blocks) in &self.worker_to_blocks {
for &block_hash in blocks {
let event = RouterEvent {
worker_id: worker.worker_id,
event: KvCacheEvent {
event_id,
data: KvCacheEventData::Stored(KvCacheStoreData {
parent_hash: None, // FlatHashMap doesn't track parent relationships
blocks: vec![KvCacheStoredBlockData {
block_hash,
mm_extra_info: None,
// We don't have the original tokens_hash, use a placeholder
tokens_hash: LocalBlockHash(0),
}],
}),
dp_rank: worker.dp_rank,
},
};
events.push(event);
event_id += 1;
}
}
events
}
/// Returns the total number of (worker, block) pairs stored.
pub fn current_size(&self) -> usize {
self.worker_to_blocks.values().map(|s| s.len()).sum()
}
}
impl Default for FlatHashMap {
fn default() -> Self {
Self::new()
}
}
This diff is collapsed.
...@@ -7,9 +7,16 @@ ...@@ -7,9 +7,16 @@
//! efficient KV cache lookup and routing in distributed LLM inference systems. //! efficient KV cache lookup and routing in distributed LLM inference systems.
pub mod approx; pub mod approx;
pub mod flat_hashmap;
pub mod indexer; pub mod indexer;
pub mod protocols; pub mod protocols;
pub mod radix_tree;
// Re-export key types for convenience // Re-export key types for convenience
pub use indexer::{MaybeError, RadixTree, RouterEvent}; pub use flat_hashmap::FlatHashMap;
pub use protocols::{LocalBlockHash, WorkerId, compute_block_hash_for_seq}; pub use indexer::MaybeError;
pub use protocols::{
KvCacheEventError, LocalBlockHash, OverlapScores, RouterEvent, WorkerId,
compute_block_hash_for_seq,
};
pub use radix_tree::RadixTree;
...@@ -453,6 +453,105 @@ impl<'de> Deserialize<'de> for ExternalSequenceBlockHash { ...@@ -453,6 +453,105 @@ impl<'de> Deserialize<'de> for ExternalSequenceBlockHash {
} }
} }
// ------
// Router Event Types
// ------
/// Errors that can occur during KV Cache Event processing.
#[derive(Debug, thiserror::Error)]
pub enum KvCacheEventError {
#[error("Failed to find parent block")]
ParentBlockNotFound,
#[error("Failed to find block")]
BlockNotFound,
#[error("Invalid block sequence")]
InvalidBlockSequence,
}
/// A [`KvCacheEvent`] on a specific LLM worker denoted by [`WorkerId`].
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct RouterEvent {
/// The ID of the worker emitting the event.
pub worker_id: WorkerId,
/// The cache event associated with the worker.
pub event: KvCacheEvent,
}
impl RouterEvent {
/// Create a new `RouterEvent`.
///
/// ### Arguments
///
/// * `worker_id` - The ID of the worker emitting the event.
/// * `event` - The cache event.
///
/// ### Returns
///
/// A new `RouterEvent`.
pub fn new(worker_id: WorkerId, event: KvCacheEvent) -> Self {
Self { worker_id, event }
}
}
/// Scores representing the overlap of workers (with their dp_rank).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OverlapScores {
/// Map of worker (with dp_rank) to score.
pub scores: std::collections::HashMap<WorkerWithDpRank, u32>,
/// List of frequencies that the blocks have been accessed. Entries with value 0 are omitted.
pub frequencies: Vec<usize>,
/// Map of worker to their tree size (number of blocks in the tree for that worker).
pub tree_sizes: std::collections::HashMap<WorkerWithDpRank, usize>,
}
impl Default for OverlapScores {
fn default() -> Self {
Self::new()
}
}
impl OverlapScores {
/// Create a new `OverlapScores`.
///
/// ### Returns
///
/// A new `OverlapScores`.
pub fn new() -> Self {
Self {
scores: std::collections::HashMap::new(),
frequencies: Vec::with_capacity(32),
tree_sizes: std::collections::HashMap::new(),
}
}
/// Update the scores with a set of workers.
///
/// ### Arguments
///
/// * `workers` - An iterator over `WorkerWithDpRank` references.
pub fn update_scores<'a, I>(&mut self, workers: I)
where
I: IntoIterator<Item = &'a WorkerWithDpRank>,
{
for worker in workers {
let score = self.scores.entry(*worker).or_insert(0);
*score += 1;
}
}
/// Add an entry in the frequency list.
pub fn add_frequency(&mut self, frequency: usize) {
if frequency != 0 {
self.frequencies
.last()
.inspect(|elem| debug_assert!(**elem >= frequency));
self.frequencies.push(frequency);
}
}
}
// ------ // ------
// TokensWithHashes // TokensWithHashes
// ------ // ------
...@@ -556,8 +655,67 @@ impl TokensWithHashes { ...@@ -556,8 +655,67 @@ impl TokensWithHashes {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::*; use super::*;
use rstest::rstest;
use serde_json; use serde_json;
#[test]
fn test_router_event_new() {
let worker_id = 0;
let kv_cache_event = KvCacheEvent {
event_id: 1,
data: KvCacheEventData::Stored(KvCacheStoreData {
parent_hash: None,
blocks: vec![KvCacheStoredBlockData {
block_hash: ExternalSequenceBlockHash(0),
mm_extra_info: None,
tokens_hash: LocalBlockHash(13226331709069118873),
}],
}),
dp_rank: 0,
};
let router_event = RouterEvent::new(worker_id, kv_cache_event);
assert_eq!(router_event.worker_id, worker_id);
assert_eq!(router_event.event.event_id, 1);
if let KvCacheEventData::Stored(store_op) = &router_event.event.data {
assert_eq!(store_op.blocks.len(), 1);
assert_eq!(
store_op.blocks[0].tokens_hash,
compute_block_hash(b"test data")
);
assert_eq!(store_op.blocks[0].block_hash, ExternalSequenceBlockHash(0));
} else {
panic!("Expected KvCacheEventData::Stored");
}
}
#[test]
fn test_overlap_scores_default() {
let overlap_scores: OverlapScores = Default::default();
assert!(overlap_scores.scores.is_empty());
}
#[rstest]
#[case(11)]
#[case(32)]
#[case(64)]
fn test_compute_block_hash_for_seq(#[case] kv_block_size: u32) {
// create a sequence of kv_block_size elements
let sequence = (0..kv_block_size).collect::<Vec<u32>>();
let hashes = compute_block_hash_for_seq(&sequence, kv_block_size, None);
assert_eq!(hashes.len(), 1);
// create a sequence of kv_block_size + 1 elements
let sequence = (0..(kv_block_size + 1)).collect::<Vec<u32>>();
let hashes = compute_block_hash_for_seq(&sequence, kv_block_size, None);
assert_eq!(hashes.len(), 1);
// create a sequence of 2 * kv_block_size + 1 elements
let sequence = (0..(2 * kv_block_size + 1)).collect::<Vec<u32>>();
let hashes = compute_block_hash_for_seq(&sequence, kv_block_size, None);
assert_eq!(hashes.len(), 2);
}
#[test] #[test]
fn test_local_block_hash_serialization() { fn test_local_block_hash_serialization() {
let hash = LocalBlockHash(12345); let hash = LocalBlockHash(12345);
......
This diff is collapsed.
...@@ -44,11 +44,11 @@ use crate::{ ...@@ -44,11 +44,11 @@ use crate::{
discovery::RuntimeConfigsWithNotify, discovery::RuntimeConfigsWithNotify,
kv_router::{ kv_router::{
approx::PruneConfig, approx::PruneConfig,
indexer::{KvIndexer, KvIndexerInterface, KvRouterError, OverlapScores, RouterEvent}, indexer::{KvIndexer, KvIndexerInterface, KvRouterError},
protocols::{ protocols::{
LocalBlockHash, RouterRequest, RouterResponse, TokensWithHashes, WorkerId, LocalBlockHash, OverlapScores, RouterEvent, RouterRequest, RouterResponse,
WorkerSelectionResult, WorkerWithDpRank, compute_block_hash_for_seq, TokensWithHashes, WorkerId, WorkerSelectionResult, WorkerWithDpRank,
compute_seq_hash_for_block, compute_block_hash_for_seq, compute_seq_hash_for_block,
}, },
scheduler::{KvScheduler, KvSchedulerError, PotentialLoad, SchedulingRequest}, scheduler::{KvScheduler, KvSchedulerError, PotentialLoad, SchedulingRequest},
sequence::SequenceError, sequence::SequenceError,
......
...@@ -42,7 +42,7 @@ fn create_kv_stream_name(component: &Component, subject: &str) -> String { ...@@ -42,7 +42,7 @@ fn create_kv_stream_name(component: &Component, subject: &str) -> String {
use crate::kv_router::{ use crate::kv_router::{
KV_EVENT_SUBJECT, KV_METRICS_SUBJECT, WORKER_KV_INDEXER_BUFFER_SIZE, KV_EVENT_SUBJECT, KV_METRICS_SUBJECT, WORKER_KV_INDEXER_BUFFER_SIZE,
indexer::{KvIndexerMetrics, LocalKvIndexer, RouterEvent}, indexer::{KvIndexerMetrics, LocalKvIndexer},
protocols::*, protocols::*,
worker_query::start_worker_kv_query_endpoint, worker_query::start_worker_kv_query_endpoint,
}; };
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use crate::kv_router::indexer::RouterEvent; use crate::kv_router::protocols::RouterEvent;
use crate::recorder::Recorder; use crate::recorder::Recorder;
// Type alias for backward compatibility // Type alias for backward compatibility
......
...@@ -17,8 +17,7 @@ use super::KV_HIT_RATE_SUBJECT; ...@@ -17,8 +17,7 @@ use super::KV_HIT_RATE_SUBJECT;
use super::KvRouterConfig; use super::KvRouterConfig;
use super::RouterConfigOverride; use super::RouterConfigOverride;
use super::WorkerSelector; use super::WorkerSelector;
use super::indexer::OverlapScores; use super::protocols::{DpRank, OverlapScores, WorkerId, WorkerSelectionResult, WorkerWithDpRank};
use super::protocols::{DpRank, WorkerId, WorkerSelectionResult, WorkerWithDpRank};
use super::sequence::{ActiveSequencesMultiWorker, SequenceError}; use super::sequence::{ActiveSequencesMultiWorker, SequenceError};
use dynamo_tokens::SequenceHash; use dynamo_tokens::SequenceHash;
......
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
//! Each block is identified by a hash of its contents, allowing for deduplication when multiple //! Each block is identified by a hash of its contents, allowing for deduplication when multiple
//! requests share common prefixes (e.g., system prompts, few-shot examples). //! requests share common prefixes (e.g., system prompts, few-shot examples).
use crate::kv_router::indexer::OverlapScores; use crate::kv_router::protocols::OverlapScores;
use anyhow::Result; use anyhow::Result;
use dashmap::DashMap; use dashmap::DashMap;
use derive_getters::Getters; use derive_getters::Getters;
......
...@@ -19,8 +19,8 @@ use tokio_util::sync::CancellationToken; ...@@ -19,8 +19,8 @@ use tokio_util::sync::CancellationToken;
use crate::kv_router::{ use crate::kv_router::{
KV_EVENT_SUBJECT, RADIX_STATE_BUCKET, RADIX_STATE_FILE, KV_EVENT_SUBJECT, RADIX_STATE_BUCKET, RADIX_STATE_FILE,
indexer::{DumpRequest, GetWorkersRequest, RouterEvent, WorkerKvQueryResponse}, indexer::{DumpRequest, GetWorkersRequest, WorkerKvQueryResponse},
protocols::WorkerId, protocols::{RouterEvent, WorkerId},
router_discovery_query, router_discovery_query,
worker_query::WorkerQueryClient, worker_query::WorkerQueryClient,
}; };
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment