Unverified Commit 2e29620d authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): move benches to lib/bench to break circular dep [OPS-3752] (#7013)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent f0bcabe0
......@@ -1846,14 +1846,22 @@ name = "dynamo-bench"
version = "1.0.0"
dependencies = [
"anyhow",
"async-trait",
"clap 4.5.60",
"dynamo-kv-router",
"dynamo-mocker",
"dynamo-tokens",
"futures-util",
"indicatif 0.18.4",
"minstant",
"plotters",
"rand 0.9.2",
"reqwest 0.12.28",
"serde",
"serde_json",
"tokio",
"tokio-util",
"uuid",
]
[[package]]
......@@ -1883,13 +1891,10 @@ dependencies = [
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
"dynamo-bench",
"dynamo-mocker",
"dynamo-runtime",
"dynamo-tokens",
"flume",
"indicatif 0.18.4",
"minstant",
"parking_lot",
"plotters",
"prometheus",
......@@ -1944,6 +1949,7 @@ dependencies = [
"dynamo-bench",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
"dynamo-parsers",
"dynamo-runtime",
"dynamo-tokens",
......@@ -2033,31 +2039,25 @@ name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"bytes",
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
"dynamo-llm",
"dynamo-runtime",
"dynamo-tokens",
"futures",
"ndarray 0.16.1",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"rmp-serde",
"rstest 0.18.2",
"serde",
"serde_json",
"tokio",
"tokio-stream",
"tokio-timerfd",
"tokio-util",
"tracing",
"uuid",
"validator",
"zeromq",
]
[[package]]
......
......@@ -15,6 +15,21 @@ description = "Lightweight HTTP benchmarks for Dynamo endpoints"
name = "multiturn_bench"
path = "src/bin/multiturn_bench.rs"
[[bench]]
name = "kv_indexer_bench"
path = "kv_router/kv_indexer_bench.rs"
harness = false
[[bench]]
name = "mooncake_bench"
path = "kv_router/mooncake_bench.rs"
harness = false
[[bench]]
name = "active_sequences_bench"
path = "kv_router/active_sequences_bench.rs"
harness = false
[dependencies]
anyhow = { workspace = true }
clap = { version = "4.5", features = ["derive"] }
......@@ -25,3 +40,14 @@ reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
[dev-dependencies]
async-trait = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["bench"] }
dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true }
minstant = "0.1.7"
plotters = { version = "0.3", default-features = false, features = ["svg_backend", "line_series", "point_series", "full_palette"] }
tokio = { workspace = true, features = ["rt", "macros", "time"] }
tokio-util = { workspace = true }
uuid = { workspace = true }
......@@ -553,11 +553,13 @@ async fn main() -> anyhow::Result<()> {
return run_tests().await;
}
let path = args
.common
.mooncake_trace_path
.as_deref()
.ok_or_else(|| anyhow::anyhow!("mooncake_trace_path is required for benchmarking"))?;
let path = match args.common.mooncake_trace_path.as_deref() {
Some(p) => p,
None => {
eprintln!("No mooncake_trace_path provided, skipping benchmark");
return Ok(());
}
};
let traces = process_mooncake_trace(
path,
args.common.trace_length_factor,
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
#![allow(dead_code)]
#![allow(dead_code, unused_imports)]
use std::time::Duration;
......
......@@ -10,8 +10,8 @@
//! Supported indexer types: single, sharded, nested, all
//!
//! Run with:
//! cargo bench --package dynamo-kv-router --bench kv_indexer_bench --features bench -- microbench --help
//! cargo bench --package dynamo-kv-router --bench kv_indexer_bench --features bench -- stress --help
//! cargo bench --package dynamo-bench --bench kv_indexer_bench -- microbench --help
//! cargo bench --package dynamo-bench --bench kv_indexer_bench -- stress --help
#[path = "common/mod.rs"]
mod common;
......@@ -1484,7 +1484,13 @@ async fn run_stress_mode(args: StressArgs) {
#[tokio::main]
async fn main() {
let cli = Cli::parse();
let cli = match Cli::try_parse() {
Ok(cli) => cli,
Err(_) => {
eprintln!("No valid arguments provided, skipping benchmark");
return;
}
};
match cli.command {
Command::Microbench(args) => run_microbench_mode(args).await,
......
......@@ -532,11 +532,13 @@ async fn main() -> anyhow::Result<()> {
return run_tests();
}
let path = args
.common
.mooncake_trace_path
.as_deref()
.ok_or_else(|| anyhow::anyhow!("mooncake_trace_path is required for benchmarking"))?;
let path = match args.common.mooncake_trace_path.as_deref() {
Some(p) => p,
None => {
eprintln!("No mooncake_trace_path provided, skipping benchmark");
return Ok(());
}
};
let traces = process_mooncake_trace(
path,
args.common.trace_length_factor,
......
......@@ -1568,6 +1568,7 @@ dependencies = [
"dynamo-async-openai",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
"dynamo-parsers",
"dynamo-runtime",
"dynamo-tokens",
......@@ -1640,6 +1641,31 @@ dependencies = [
"tracing",
]
[[package]]
name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
"dynamo-runtime",
"dynamo-tokens",
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"serde",
"serde_json",
"tokio",
"tokio-timerfd",
"tokio-util",
"tracing",
"uuid",
"validator",
]
[[package]]
name = "dynamo-parsers"
version = "1.0.0"
......@@ -3314,6 +3340,12 @@ dependencies = [
"redox_syscall 0.7.3",
]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]]
name = "linux-raw-sys"
version = "0.12.1"
......@@ -3740,6 +3772,31 @@ dependencies = [
"rawpointer",
]
[[package]]
name = "ndarray-interp"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e43087829efb5ec2736598e88587df286425b59df5a9ce991994cdd2c5855d3f"
dependencies = [
"ndarray",
"num-traits",
"thiserror 2.0.18",
]
[[package]]
name = "ndarray-npy"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b313788c468c49141a9d9b6131fc15f403e6ef4e8446a0b2e18f664ddb278a9"
dependencies = [
"byteorder",
"ndarray",
"num-complex",
"num-traits",
"py_literal",
"zip 2.4.2",
]
[[package]]
name = "neli"
version = "0.7.4"
......@@ -4943,6 +5000,19 @@ version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d"
[[package]]
name = "py_literal"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1"
dependencies = [
"num-bigint",
"num-complex",
"num-traits",
"pest",
"pest_derive",
]
[[package]]
name = "pyo3"
version = "0.23.5"
......@@ -5540,6 +5610,19 @@ dependencies = [
"semver",
]
[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.0",
"errno",
"libc",
"linux-raw-sys 0.4.15",
"windows-sys 0.59.0",
]
[[package]]
name = "rustix"
version = "1.1.4"
......@@ -5549,7 +5632,7 @@ dependencies = [
"bitflags 2.11.0",
"errno",
"libc",
"linux-raw-sys",
"linux-raw-sys 0.12.1",
"windows-sys 0.61.2",
]
......@@ -6300,7 +6383,7 @@ dependencies = [
"fastrand",
"getrandom 0.4.2",
"once_cell",
"rustix",
"rustix 1.1.4",
"windows-sys 0.61.2",
]
......@@ -6415,6 +6498,15 @@ dependencies = [
"time-core",
]
[[package]]
name = "timerfd"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84e482e368cf7efa2c8b570f476e5b9fd9fd5e9b9219fc567832b05f13511091"
dependencies = [
"rustix 0.38.44",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
......@@ -6555,6 +6647,19 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-timerfd"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87eecdae9a9b793843b1df7a64bc136f203443c1ca9889b3c4a39590afa51094"
dependencies = [
"futures-core",
"libc",
"slab",
"timerfd",
"tokio",
]
[[package]]
name = "tokio-util"
version = "0.7.18"
......@@ -7203,7 +7308,7 @@ dependencies = [
"serde_json",
"url",
"utoipa",
"zip",
"zip 3.0.0",
]
[[package]]
......@@ -8102,6 +8207,23 @@ dependencies = [
"syn",
]
[[package]]
name = "zip"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
dependencies = [
"arbitrary",
"crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2",
"indexmap 2.13.0",
"memchr",
"thiserror 2.0.18",
"zopfli",
]
[[package]]
name = "zip"
version = "3.0.0"
......
......@@ -1576,6 +1576,7 @@ dependencies = [
"dynamo-async-openai",
"dynamo-kv-router",
"dynamo-memory",
"dynamo-mocker",
"dynamo-parsers",
"dynamo-runtime",
"dynamo-tokens",
......@@ -1656,30 +1657,24 @@ name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"bytes",
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
"dynamo-llm",
"dynamo-runtime",
"dynamo-tokens",
"futures",
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"rmp-serde",
"serde",
"serde_json",
"tokio",
"tokio-stream",
"tokio-timerfd",
"tokio-util",
"tracing",
"uuid",
"validator",
"zeromq",
]
[[package]]
......
......@@ -18,6 +18,7 @@ use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::kv_router::KvRouterConfig as RsKvRouterConfig;
use dynamo_llm::local_model::DEFAULT_HTTP_PORT;
use dynamo_llm::local_model::{LocalModel, LocalModelBuilder};
use dynamo_llm::mocker::make_mocker_engine;
use dynamo_llm::model_card::ModelDeploymentCard as RsModelDeploymentCard;
use dynamo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
use dynamo_mocker::common::protocols::MockEngineArgs;
......@@ -425,8 +426,7 @@ async fn select_engine(
let endpoint = local_model.endpoint_id().clone();
let engine =
dynamo_mocker::make_mocker_engine(distributed_runtime.inner, endpoint, mocker_args)
.await?;
make_mocker_engine(distributed_runtime.inner, endpoint, mocker_args).await?;
RsEngineConfig::InProcessTokens {
engine,
......
......@@ -13,7 +13,7 @@ repository.workspace = true
[features]
default = []
metrics = []
bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dynamo-runtime/integration", "dep:plotters"]
bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dep:plotters"]
indexer-bin = ["metrics", "dep:axum", "dep:clap", "dep:zeromq", "dep:tracing-subscriber", "dep:serde_json"]
[dependencies]
......@@ -55,30 +55,15 @@ axum = { workspace = true, optional = true }
zeromq = { version = "0.4.1", optional = true }
tracing-subscriber = { workspace = true, optional = true }
[package.metadata.cargo-machete]
ignored = ["indicatif", "plotters"]
[dev-dependencies]
dynamo-bench = { path = "../bench" }
rstest = "0.18.2"
rstest_reuse = "0.7.0"
serde_json = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "time"] }
dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true }
minstant = "0.1.7"
[[bench]]
name = "kv_indexer_bench"
harness = false
required-features = ["bench"]
[[bench]]
name = "mooncake_bench"
harness = false
required-features = ["bench"]
[[bench]]
name = "active_sequences_bench"
harness = false
required-features = ["bench"]
[[bin]]
name = "dynamo-kv-indexer"
......
......@@ -49,6 +49,7 @@ dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] }
dynamo-memory = { workspace = true }
dynamo-mocker = { workspace = true }
# workspace
aho-corasick = "1.1"
......
......@@ -25,6 +25,7 @@ pub mod kv_router;
pub mod local_model;
pub mod lora;
pub mod migration;
pub mod mocker;
pub mod model_card;
pub mod model_type;
pub mod namespace;
......
......@@ -9,19 +9,20 @@
use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::backend::ExecutionContext;
use crate::kv_router::publisher::{KvEventPublisher, KvEventSourceConfig, WorkerMetricsPublisher};
use crate::protocols::TokenIdType;
use crate::protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest};
use anyhow::Result;
use bytes::Bytes;
use dashmap::DashMap;
use dynamo_llm::backend::ExecutionContext;
use futures::StreamExt;
use rand::Rng;
use serde::Serialize;
use tokio::sync::{Notify, OnceCell, mpsc};
use tokio_stream::wrappers::UnboundedReceiverStream;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
use zeromq::{Socket, SocketSend};
use dynamo_kv_router::protocols::{KvCacheEvent, KvCacheEventData};
use dynamo_mocker::common::bootstrap::{BootstrapServer, connect_to_prefill};
use dynamo_mocker::common::protocols::{
DirectRequest, KvCacheEventSink, MockEngineArgs, OutputSignal,
};
use dynamo_mocker::common::utils::{compute_kv_transfer_delay, sleep_precise};
use dynamo_mocker::scheduler::Scheduler;
use dynamo_runtime::DistributedRuntime;
use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::{
......@@ -30,19 +31,14 @@ use dynamo_runtime::{
pipeline::{AsyncEngine, Error, ManyOut, ResponseStream, SingleIn, async_trait},
traits::DistributedRuntimeProvider,
};
use dynamo_kv_router::protocols::{KvCacheEvent, KvCacheEventData};
use dynamo_llm::kv_router::publisher::{
KvEventPublisher, KvEventSourceConfig, WorkerMetricsPublisher,
};
use dynamo_llm::protocols::TokenIdType;
use dynamo_llm::protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest};
use crate::common::bootstrap::{BootstrapServer, connect_to_prefill};
use crate::common::protocols::OutputSignal;
use crate::common::protocols::{DirectRequest, KvCacheEventSink, MockEngineArgs};
use crate::common::utils::{compute_kv_transfer_delay, sleep_precise};
use crate::scheduler::Scheduler;
use futures::StreamExt;
use rand::Rng;
use serde::Serialize;
use tokio::sync::{Notify, OnceCell, mpsc};
use tokio_stream::wrappers::UnboundedReceiverStream;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
use zeromq::{Socket, SocketSend};
pub const MOCKER_COMPONENT: &str = "mocker";
......
......@@ -18,8 +18,6 @@ dynamo-tokens = { workspace = true }
# workspace
anyhow = { workspace = true }
bytes = { workspace = true }
futures = { workspace = true }
dashmap = { workspace = true }
derive_builder = { workspace = true }
derive-getters = { workspace = true }
......@@ -27,7 +25,6 @@ rand = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
......@@ -37,15 +34,9 @@ validator = { workspace = true }
ndarray = "0.16"
ndarray-npy = "0.9"
ndarray-interp = "0.5"
zeromq = "0.4.1"
rmp-serde = "1.3"
[target.'cfg(target_os = "linux")'.dependencies]
dynamo-llm = { workspace = true }
tokio-timerfd = "0.2"
[target.'cfg(not(target_os = "linux"))'.dependencies]
dynamo-llm = { path = "../llm", default-features = false }
[dev-dependencies]
rstest = "0.18.2"
......@@ -10,6 +10,4 @@
pub mod cache;
pub mod common;
pub mod kv_manager;
pub mod mocker;
pub use mocker::make_mocker_engine; // Re-export nicely for bindings
pub mod scheduler;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment