"lib/bindings/python/vscode:/vscode.git/clone" did not exist on "050906b5930768d03386fcc13259055fe6d41f5b"
Unverified Commit 02666f04 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): remove sharded indexer path (#8041)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent ba274a03
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//! Combined benchmark for KvIndexer, KvIndexerSharded, and PositionalIndexer (nested).
//! Combined benchmark for KvIndexer, PositionalIndexer (nested), and ConcurrentRadixTree.
//!
//! Provides two modes:
//! - `microbench`: Per-operation latency benchmarks comparing indexer implementations
//! - `stress`: Queue saturation stress test under load
//!
//! Supported indexer types: single, sharded, nested, all
//! Supported indexer types: single, nested, concurrent, all
//!
//! Run with:
//! cargo bench --package dynamo-bench --bench kv_indexer_bench -- microbench --help
......@@ -21,9 +21,7 @@ use clap::{Args, Parser, Subcommand, ValueEnum};
use dynamo_bench::common::LatencyStats;
use dynamo_kv_router::{
ConcurrentRadixTree,
indexer::{
KvIndexer, KvIndexerInterface, KvIndexerMetrics, KvIndexerSharded, ThreadPoolIndexer,
},
indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics, ThreadPoolIndexer},
nested_map::PositionalIndexer,
protocols::{LocalBlockHash, RouterEvent},
};
......@@ -40,7 +38,7 @@ use tokio_util::sync::CancellationToken;
#[derive(Parser)]
#[command(name = "kv_indexer_bench")]
#[command(about = "Combined benchmark for KvIndexer, KvIndexerSharded, and PositionalIndexer")]
#[command(about = "Combined benchmark for KvIndexer, PositionalIndexer, and ConcurrentRadixTree")]
struct Cli {
#[command(subcommand)]
command: Command,
......@@ -63,8 +61,6 @@ enum Command {
enum IndexerType {
/// Non-sharded KvIndexer (single background thread)
Single,
/// Sharded KvIndexer (multiple shards with separate trees)
Sharded,
/// Nested PositionalIndexer (position-based HashMap with jump search)
Nested,
/// Concurrent radix tree (lock-per-node with DashMap lookup)
......@@ -122,9 +118,9 @@ struct MicrobenchArgs {
#[arg(long, value_enum, default_value = "all")]
indexer_type: IndexerType,
/// Number of shards for sharded indexer
/// Number of event worker threads for nested/concurrent indexers
#[arg(long, default_value = "4")]
num_shards: usize,
num_event_workers: usize,
/// Jump size for nested/positional indexer
#[arg(long, default_value = "32")]
......@@ -164,9 +160,9 @@ struct StressArgs {
#[arg(long, value_enum, default_value = "single")]
indexer_type: IndexerType,
/// Number of shards for sharded indexer
/// Number of event worker threads for nested/concurrent indexers
#[arg(long, default_value = "4")]
num_shards: usize,
num_event_workers: usize,
/// Jump size for nested/positional indexer
#[arg(long, default_value = "32")]
......@@ -177,7 +173,7 @@ struct StressArgs {
// Benchable Indexer Trait
// ============================================================================
/// Trait for abstracting over KvIndexer and KvIndexerSharded
/// Trait for abstracting over benchmarked indexers
#[async_trait::async_trait]
trait BenchableIndexer: Send + Sync {
async fn apply_event(&mut self, event: RouterEvent);
......@@ -207,25 +203,6 @@ impl BenchableIndexer for KvIndexer {
}
}
#[async_trait::async_trait]
impl BenchableIndexer for KvIndexerSharded {
async fn apply_event(&mut self, event: RouterEvent) {
KvIndexerInterface::apply_event(self, event).await;
}
async fn find_matches(
&self,
sequence: Vec<LocalBlockHash>,
) -> Result<(), dynamo_kv_router::indexer::KvRouterError> {
KvIndexerInterface::find_matches(self, sequence).await?;
Ok(())
}
fn name(&self) -> &str {
"KvIndexerSharded"
}
}
#[async_trait::async_trait]
impl BenchableIndexer for ThreadPoolIndexer<PositionalIndexer> {
async fn apply_event(&mut self, event: RouterEvent) {
......@@ -697,6 +674,16 @@ async fn run_microbench_mode(args: MicrobenchArgs) {
eprintln!("Error: size must be >= depth");
std::process::exit(1);
}
if matches!(
args.indexer_type,
IndexerType::Nested | IndexerType::Concurrent | IndexerType::All
) && args.num_event_workers == 0
{
eprintln!(
"Error: num_event_workers must be > 0 when using Nested, Concurrent, or All indexer type"
);
std::process::exit(1);
}
println!("KvIndexer Microbenchmark");
println!("========================\n");
......@@ -716,7 +703,7 @@ async fn run_microbench_mode(args: MicrobenchArgs) {
args.prefix_prompt_ratio * 100.0
);
println!(" Prefix prompt groups: {}", args.num_prefix_prompts);
println!(" Num shards (for sharded): {}", args.num_shards);
println!(" Event worker threads: {}", args.num_event_workers);
println!(" Indexer type: {:?}", args.indexer_type);
println!(" Benchmark type: {}", args.benchmark_type);
println!(
......@@ -751,26 +738,11 @@ async fn run_microbench_mode(args: MicrobenchArgs) {
tokio::time::sleep(Duration::from_millis(50)).await;
}
// Benchmark sharded indexer
if matches!(args.indexer_type, IndexerType::Sharded | IndexerType::All) {
let token = CancellationToken::new();
let mut indexer = KvIndexerSharded::new(
token.clone(),
args.num_shards,
args.common.block_size,
metrics.clone(),
);
let result = run_microbenchmarks(&mut indexer, sequences, extra_sequences, &args).await;
results.push(result);
token.cancel();
tokio::time::sleep(Duration::from_millis(50)).await;
}
// Benchmark nested indexer
if matches!(args.indexer_type, IndexerType::Nested | IndexerType::All) {
let mut indexer = ThreadPoolIndexer::new(
PositionalIndexer::new(args.jump_size),
args.num_shards,
args.num_event_workers,
args.common.block_size,
);
let result = run_microbenchmarks(&mut indexer, sequences, extra_sequences, &args).await;
......@@ -786,7 +758,7 @@ async fn run_microbench_mode(args: MicrobenchArgs) {
) {
let mut indexer = ThreadPoolIndexer::new(
ConcurrentRadixTree::new(),
args.num_shards,
args.num_event_workers,
args.common.block_size,
);
let result = run_microbenchmarks(&mut indexer, sequences, extra_sequences, &args).await;
......@@ -1226,10 +1198,12 @@ async fn run_stress_mode(args: StressArgs) {
}
if matches!(
args.indexer_type,
IndexerType::Sharded | IndexerType::Nested | IndexerType::All
) && args.num_shards == 0
IndexerType::Nested | IndexerType::Concurrent | IndexerType::All
) && args.num_event_workers == 0
{
eprintln!("Error: num_shards must be > 0 when using Sharded, Nested, or All indexer type");
eprintln!(
"Error: num_event_workers must be > 0 when using Nested, Concurrent, or All indexer type"
);
std::process::exit(1);
}
......@@ -1254,11 +1228,13 @@ async fn run_stress_mode(args: StressArgs) {
println!(" Duration: {}s", args.duration);
println!(" In-flight timeout: {}s", args.in_flight_timeout);
println!(" Indexer type: {:?}", args.indexer_type);
if matches!(args.indexer_type, IndexerType::Sharded | IndexerType::All) {
println!(" Num shards (sharded): {}", args.num_shards);
if matches!(
args.indexer_type,
IndexerType::Nested | IndexerType::Concurrent | IndexerType::All
) {
println!(" Event worker threads: {}", args.num_event_workers);
}
if matches!(args.indexer_type, IndexerType::Nested | IndexerType::All) {
println!(" Num workers (nested): {}", args.num_shards);
println!(" Jump size (nested): {}", args.jump_size);
}
......@@ -1322,58 +1298,11 @@ async fn run_stress_mode(args: StressArgs) {
tokio::time::sleep(Duration::from_millis(50)).await;
}
// Test sharded indexer
if matches!(args.indexer_type, IndexerType::Sharded | IndexerType::All) {
let token = CancellationToken::new();
let indexer = KvIndexerSharded::new(
token.clone(),
args.num_shards,
args.common.block_size,
metrics.clone(),
);
println!(
"\n Applying {} store events to KvIndexerSharded...",
sequences.len()
);
let construction_start = Instant::now();
for (event_id, seq) in sequences.iter().enumerate() {
let event = seq.to_store_event(event_id as u64);
KvIndexerInterface::apply_event(&indexer, event).await;
if args.common.verbose && (event_id + 1) % 100 == 0 {
println!(" Applied {}/{} events...", event_id + 1, sequences.len());
}
}
let construction_time = construction_start.elapsed();
let construction_events = sequences.len() as u64;
println!(" Tree construction completed in {:?}", construction_time);
println!(
" Throughput: {:.0} events/sec",
construction_events as f64 / construction_time.as_secs_f64()
);
tokio::time::sleep(Duration::from_millis(100)).await;
let mut results = run_stress_test(Arc::new(indexer), &sequences, &args).await;
results.construction_time = construction_time;
results.construction_events = construction_events;
print_stress_results(&args, &results);
all_results.push(results);
token.cancel();
tokio::time::sleep(Duration::from_millis(50)).await;
}
// Test nested indexer
if matches!(args.indexer_type, IndexerType::Nested | IndexerType::All) {
let indexer = ThreadPoolIndexer::new(
PositionalIndexer::new(args.jump_size),
args.num_shards,
args.num_event_workers,
args.common.block_size,
);
......@@ -1425,7 +1354,7 @@ async fn run_stress_mode(args: StressArgs) {
) {
let indexer = ThreadPoolIndexer::new(
ConcurrentRadixTree::new(),
args.num_shards,
args.num_event_workers,
args.common.block_size,
);
......
......@@ -7,9 +7,7 @@ use common::*;
use clap::{Parser, Subcommand};
use dynamo_kv_router::LocalBlockHash;
use dynamo_kv_router::indexer::{
KvIndexer, KvIndexerInterface, KvIndexerMetrics, KvIndexerSharded,
};
use dynamo_kv_router::indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics};
use dynamo_kv_router::protocols::{KvCacheEvent, KvCacheEventData, RouterEvent};
use dynamo_kv_router::{
ConcurrentRadixTree, ConcurrentRadixTreeCompressed, PositionalIndexer, ThreadPoolIndexer,
......@@ -26,13 +24,6 @@ enum IndexerArgs {
/// Single-threaded radix tree indexer.
RadixTree {},
/// Sharded radix tree indexer that partitions workers across independent shards.
RadixTreeSharded {
/// Number of independent shards to split workers across.
#[clap(long, default_value = "4")]
num_shards: usize,
},
/// Position-based nested map indexer with jump search.
NestedMap {
/// Number of positions to skip during jump search before scanning back.
......@@ -68,12 +59,6 @@ impl IndexerArgs {
IndexerArgs::RadixTree {} => {
Arc::new(KvIndexer::new(cancel_token, block_size, metrics))
}
IndexerArgs::RadixTreeSharded { num_shards } => Arc::new(KvIndexerSharded::new(
cancel_token,
num_shards,
block_size,
metrics,
)),
IndexerArgs::NestedMap {
jump_size,
num_event_workers,
......@@ -115,7 +100,6 @@ impl IndexerArgs {
let nw = num_event_workers;
let indexer_args = match name {
"radix-tree" => IndexerArgs::RadixTree {},
"radix-tree-sharded" => IndexerArgs::RadixTreeSharded { num_shards: 4 },
"nested-map" => IndexerArgs::NestedMap {
jump_size: 8,
num_event_workers: nw,
......@@ -127,7 +111,7 @@ impl IndexerArgs {
num_event_workers: nw,
},
_ => anyhow::bail!(
"Unknown indexer '{}'. Valid names: radix-tree, radix-tree-sharded, \
"Unknown indexer '{}'. Valid names: radix-tree, \
nested-map, concurrent-radix-tree, concurrent-radix-tree-compressed",
name
),
......@@ -148,14 +132,15 @@ struct Args {
/// Comma-separated list of indexer names to benchmark and compare on the
/// same plot. Overrides the subcommand indexer when present. Valid names:
/// radix-tree, radix-tree-sharded, nested-map, concurrent-radix-tree,
/// radix-tree, nested-map, concurrent-radix-tree,
/// concurrent-radix-tree-compressed.
#[clap(long, value_delimiter = ',')]
compare: Vec<String>,
/// Number of OS threads for event processing in compare mode. Applies to
/// indexers that use a thread pool (nested-map, concurrent-radix-tree).
/// Ignored by radix-tree and radix-tree-sharded.
/// indexers that use a thread pool (nested-map, concurrent-radix-tree,
/// concurrent-radix-tree-compressed).
/// Ignored by radix-tree.
#[clap(long, default_value = "16")]
num_event_workers: usize,
......@@ -555,7 +540,6 @@ async fn main() -> anyhow::Result<()> {
let indexer_names: Vec<String> = if args.compare.is_empty() {
let name = match args.get_indexer() {
IndexerArgs::RadixTree {} => "radix-tree",
IndexerArgs::RadixTreeSharded { .. } => "radix-tree-sharded",
IndexerArgs::NestedMap { .. } => "nested-map",
IndexerArgs::ConcurrentRadixTree { .. } => "concurrent-radix-tree",
IndexerArgs::ConcurrentRadixTreeCompressed { .. } => "concurrent-radix-tree-compressed",
......
......@@ -17,7 +17,6 @@ The concurrent indexers achieve a combined throughput of over **10 million event
| `concurrent_radix_tree.rs` | `ConcurrentRadixTree` — thread-safe variant with `Arc<RwLock<Block>>` nodes and `DashMap` lookup |
| `positional.rs` | `PositionalIndexer` — flat `DashMap<(pos, hash), SeqEntry>` with jump optimization |
| `thread_pool.rs` | `ThreadPoolIndexer<T: SyncIndexer>` — N OS threads for sticky-routed writes, inline reads; wraps `ConcurrentRadixTree` or `PositionalIndexer` |
| `sharded.rs` | `KvIndexerSharded` — N independent `RadixTree` shards each in its own OS thread, scatter-gather for matches |
| `local.rs` | `LocalKvIndexer` — thin wrapper around `KvIndexer` with a circular event buffer for worker-side decentralized routing |
| `pruning.rs` | `PruneManager` — TTL-based expiration and size-based pruning via `BinaryHeap<BlockEntry>` |
| `naive.rs` | Brute-force baseline indexers (bench-only, behind `bench` feature flag) |
......
......@@ -34,7 +34,6 @@
mod kv_indexer;
mod local;
mod metrics;
mod sharded;
mod thread_pool;
mod traits;
mod types;
......@@ -52,7 +51,6 @@ mod tests;
pub use kv_indexer::*;
pub use local::*;
pub use metrics::*;
pub use sharded::*;
pub use thread_pool::*;
pub use traits::*;
pub use types::*;
......
This diff is collapsed.
......@@ -207,14 +207,14 @@ fn make_clear_event_with_dp_rank(worker_id: u64, dp_rank: u32) -> RouterEvent {
#[template]
#[rstest]
fn indexer_template(
#[values("single", "sharded", "flat", "concurrent", "concurrent_compressed")] variant: &str,
#[values("single", "flat", "concurrent", "concurrent_compressed")] variant: &str,
) {
}
#[template]
#[rstest]
fn tree_size_indexer_template(
#[values("single", "sharded", "concurrent", "concurrent_compressed")] variant: &str,
#[values("single", "concurrent", "concurrent_compressed")] variant: &str,
) {
}
......@@ -225,7 +225,6 @@ fn make_indexer(variant: &str) -> Box<dyn KvIndexerInterface> {
match variant {
"single" => Box::new(KvIndexer::new(token, kv_block_size, metrics)),
"sharded" => Box::new(KvIndexerSharded::new(token, 4, kv_block_size, metrics)),
"flat" => Box::new(ThreadPoolIndexer::new(
PositionalIndexer::new(32),
4,
......@@ -330,7 +329,7 @@ mod interface_tests {
// tree-size accounting gap after mid-chain removes because descendant
// lookup entries are cleaned up lazily. That means "store -> partial
// remove -> restore continuation" can still miscount restored coverage
// in single, sharded, and concurrent. This test is intentionally scoped
// in single and concurrent. This test is intentionally scoped
// to duplicate store/remove replay so all tree-size variants share the
// same stable baseline.
......@@ -1854,13 +1853,13 @@ mod long_sequence_tests {
}
// ============================================================================
// Tests specific to tree-based implementations (KvIndexer, KvIndexerSharded)
// Tests specific to tree-based implementations with frequency/pruning support.
// These use features not available in PositionalIndexer
// ============================================================================
#[template]
#[rstest]
fn tree_indexer_template(#[values("single", "sharded")] variant: &str) {}
fn tree_indexer_template(#[values("single")] variant: &str) {}
fn make_tree_indexer_with_frequency(
variant: &str,
......@@ -1878,25 +1877,16 @@ fn make_tree_indexer_with_frequency(
metrics,
None,
)),
"sharded" => Box::new(KvIndexerSharded::new_with_frequency(
token,
4,
Some(expiration),
kv_block_size,
metrics,
None,
)),
_ => panic!("Unknown variant: {}", variant),
}
}
#[tokio::test]
async fn test_sharded_routing_decision_assigns_first_seen_worker() {
async fn test_routing_decision_assigns_first_seen_worker() {
let token = CancellationToken::new();
let metrics = Arc::new(KvIndexerMetrics::new_unregistered());
let index = KvIndexerSharded::new_with_frequency(
let index = KvIndexer::new_with_frequency(
token,
4,
Some(Duration::from_secs(60)),
32,
metrics,
......
......@@ -5,7 +5,7 @@
use std::time::Instant;
use serde::{Deserialize, Serialize};
use tokio::sync::{mpsc, oneshot};
use tokio::sync::oneshot;
use crate::protocols::*;
use dynamo_tokens::SequenceHash;
......@@ -307,28 +307,3 @@ pub(super) struct RoutingDecisionRequest {
pub(super) local_hashes: Vec<LocalBlockHash>,
pub(super) sequence_hashes: Vec<SequenceHash>,
}
#[derive(Debug, Clone)]
pub struct ShardedMatchRequest {
pub(super) sequence: Vec<LocalBlockHash>,
pub(super) early_exit: bool,
pub(super) resp: mpsc::Sender<OverlapScores>,
#[cfg(feature = "bench")]
pub(super) created_at: Instant,
}
impl ShardedMatchRequest {
pub(super) fn new(
sequence: Vec<LocalBlockHash>,
early_exit: bool,
resp: mpsc::Sender<OverlapScores>,
) -> Self {
Self {
sequence,
early_exit,
resp,
#[cfg(feature = "bench")]
created_at: Instant::now(),
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment