"vscode:/vscode.git/clone" did not exist on "5101f08c4e70a359b935edde76adfa09687a436e"
Unverified Commit 2e29620d authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): move benches to lib/bench to break circular dep [OPS-3752] (#7013)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent f0bcabe0
...@@ -1846,14 +1846,22 @@ name = "dynamo-bench" ...@@ -1846,14 +1846,22 @@ name = "dynamo-bench"
version = "1.0.0" version = "1.0.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"async-trait",
"clap 4.5.60", "clap 4.5.60",
"dynamo-kv-router",
"dynamo-mocker",
"dynamo-tokens",
"futures-util", "futures-util",
"indicatif 0.18.4", "indicatif 0.18.4",
"minstant",
"plotters",
"rand 0.9.2", "rand 0.9.2",
"reqwest 0.12.28", "reqwest 0.12.28",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"tokio-util",
"uuid",
] ]
[[package]] [[package]]
...@@ -1883,13 +1891,10 @@ dependencies = [ ...@@ -1883,13 +1891,10 @@ dependencies = [
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters", "derive-getters",
"derive_builder", "derive_builder",
"dynamo-bench",
"dynamo-mocker",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
"flume", "flume",
"indicatif 0.18.4", "indicatif 0.18.4",
"minstant",
"parking_lot", "parking_lot",
"plotters", "plotters",
"prometheus", "prometheus",
...@@ -1944,6 +1949,7 @@ dependencies = [ ...@@ -1944,6 +1949,7 @@ dependencies = [
"dynamo-bench", "dynamo-bench",
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-memory", "dynamo-memory",
"dynamo-mocker",
"dynamo-parsers", "dynamo-parsers",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
...@@ -2033,31 +2039,25 @@ name = "dynamo-mocker" ...@@ -2033,31 +2039,25 @@ name = "dynamo-mocker"
version = "1.0.0" version = "1.0.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters", "derive-getters",
"derive_builder", "derive_builder",
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-llm",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
"futures",
"ndarray 0.16.1", "ndarray 0.16.1",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
"rand 0.9.2", "rand 0.9.2",
"rmp-serde",
"rstest 0.18.2", "rstest 0.18.2",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"tokio-stream",
"tokio-timerfd", "tokio-timerfd",
"tokio-util", "tokio-util",
"tracing", "tracing",
"uuid", "uuid",
"validator", "validator",
"zeromq",
] ]
[[package]] [[package]]
......
...@@ -15,6 +15,21 @@ description = "Lightweight HTTP benchmarks for Dynamo endpoints" ...@@ -15,6 +15,21 @@ description = "Lightweight HTTP benchmarks for Dynamo endpoints"
name = "multiturn_bench" name = "multiturn_bench"
path = "src/bin/multiturn_bench.rs" path = "src/bin/multiturn_bench.rs"
[[bench]]
name = "kv_indexer_bench"
path = "kv_router/kv_indexer_bench.rs"
harness = false
[[bench]]
name = "mooncake_bench"
path = "kv_router/mooncake_bench.rs"
harness = false
[[bench]]
name = "active_sequences_bench"
path = "kv_router/active_sequences_bench.rs"
harness = false
[dependencies] [dependencies]
anyhow = { workspace = true } anyhow = { workspace = true }
clap = { version = "4.5", features = ["derive"] } clap = { version = "4.5", features = ["derive"] }
...@@ -25,3 +40,14 @@ reqwest = { workspace = true } ...@@ -25,3 +40,14 @@ reqwest = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
[dev-dependencies]
async-trait = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["bench"] }
dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true }
minstant = "0.1.7"
plotters = { version = "0.3", default-features = false, features = ["svg_backend", "line_series", "point_series", "full_palette"] }
tokio = { workspace = true, features = ["rt", "macros", "time"] }
tokio-util = { workspace = true }
uuid = { workspace = true }
...@@ -553,11 +553,13 @@ async fn main() -> anyhow::Result<()> { ...@@ -553,11 +553,13 @@ async fn main() -> anyhow::Result<()> {
return run_tests().await; return run_tests().await;
} }
let path = args let path = match args.common.mooncake_trace_path.as_deref() {
.common Some(p) => p,
.mooncake_trace_path None => {
.as_deref() eprintln!("No mooncake_trace_path provided, skipping benchmark");
.ok_or_else(|| anyhow::anyhow!("mooncake_trace_path is required for benchmarking"))?; return Ok(());
}
};
let traces = process_mooncake_trace( let traces = process_mooncake_trace(
path, path,
args.common.trace_length_factor, args.common.trace_length_factor,
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
#![allow(dead_code)] #![allow(dead_code, unused_imports)]
use std::time::Duration; use std::time::Duration;
......
...@@ -10,8 +10,8 @@ ...@@ -10,8 +10,8 @@
//! Supported indexer types: single, sharded, nested, all //! Supported indexer types: single, sharded, nested, all
//! //!
//! Run with: //! Run with:
//! cargo bench --package dynamo-kv-router --bench kv_indexer_bench --features bench -- microbench --help //! cargo bench --package dynamo-bench --bench kv_indexer_bench -- microbench --help
//! cargo bench --package dynamo-kv-router --bench kv_indexer_bench --features bench -- stress --help //! cargo bench --package dynamo-bench --bench kv_indexer_bench -- stress --help
#[path = "common/mod.rs"] #[path = "common/mod.rs"]
mod common; mod common;
...@@ -1484,7 +1484,13 @@ async fn run_stress_mode(args: StressArgs) { ...@@ -1484,7 +1484,13 @@ async fn run_stress_mode(args: StressArgs) {
#[tokio::main] #[tokio::main]
async fn main() { async fn main() {
let cli = Cli::parse(); let cli = match Cli::try_parse() {
Ok(cli) => cli,
Err(_) => {
eprintln!("No valid arguments provided, skipping benchmark");
return;
}
};
match cli.command { match cli.command {
Command::Microbench(args) => run_microbench_mode(args).await, Command::Microbench(args) => run_microbench_mode(args).await,
......
...@@ -532,11 +532,13 @@ async fn main() -> anyhow::Result<()> { ...@@ -532,11 +532,13 @@ async fn main() -> anyhow::Result<()> {
return run_tests(); return run_tests();
} }
let path = args let path = match args.common.mooncake_trace_path.as_deref() {
.common Some(p) => p,
.mooncake_trace_path None => {
.as_deref() eprintln!("No mooncake_trace_path provided, skipping benchmark");
.ok_or_else(|| anyhow::anyhow!("mooncake_trace_path is required for benchmarking"))?; return Ok(());
}
};
let traces = process_mooncake_trace( let traces = process_mooncake_trace(
path, path,
args.common.trace_length_factor, args.common.trace_length_factor,
......
...@@ -1568,6 +1568,7 @@ dependencies = [ ...@@ -1568,6 +1568,7 @@ dependencies = [
"dynamo-async-openai", "dynamo-async-openai",
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-memory", "dynamo-memory",
"dynamo-mocker",
"dynamo-parsers", "dynamo-parsers",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
...@@ -1640,6 +1641,31 @@ dependencies = [ ...@@ -1640,6 +1641,31 @@ dependencies = [
"tracing", "tracing",
] ]
[[package]]
name = "dynamo-mocker"
version = "1.0.0"
dependencies = [
"anyhow",
"dashmap 6.1.0",
"derive-getters",
"derive_builder",
"dynamo-kv-router",
"dynamo-runtime",
"dynamo-tokens",
"ndarray",
"ndarray-interp",
"ndarray-npy",
"rand 0.9.2",
"serde",
"serde_json",
"tokio",
"tokio-timerfd",
"tokio-util",
"tracing",
"uuid",
"validator",
]
[[package]] [[package]]
name = "dynamo-parsers" name = "dynamo-parsers"
version = "1.0.0" version = "1.0.0"
...@@ -3314,6 +3340,12 @@ dependencies = [ ...@@ -3314,6 +3340,12 @@ dependencies = [
"redox_syscall 0.7.3", "redox_syscall 0.7.3",
] ]
[[package]]
name = "linux-raw-sys"
version = "0.4.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.12.1" version = "0.12.1"
...@@ -3740,6 +3772,31 @@ dependencies = [ ...@@ -3740,6 +3772,31 @@ dependencies = [
"rawpointer", "rawpointer",
] ]
[[package]]
name = "ndarray-interp"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e43087829efb5ec2736598e88587df286425b59df5a9ce991994cdd2c5855d3f"
dependencies = [
"ndarray",
"num-traits",
"thiserror 2.0.18",
]
[[package]]
name = "ndarray-npy"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7b313788c468c49141a9d9b6131fc15f403e6ef4e8446a0b2e18f664ddb278a9"
dependencies = [
"byteorder",
"ndarray",
"num-complex",
"num-traits",
"py_literal",
"zip 2.4.2",
]
[[package]] [[package]]
name = "neli" name = "neli"
version = "0.7.4" version = "0.7.4"
...@@ -4943,6 +5000,19 @@ version = "0.1.28" ...@@ -4943,6 +5000,19 @@ version = "0.1.28"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d" checksum = "b5a041e753da8b807c9255f28de81879c78c876392ff2469cde94799b2896b9d"
[[package]]
name = "py_literal"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "102df7a3d46db9d3891f178dcc826dc270a6746277a9ae6436f8d29fd490a8e1"
dependencies = [
"num-bigint",
"num-complex",
"num-traits",
"pest",
"pest_derive",
]
[[package]] [[package]]
name = "pyo3" name = "pyo3"
version = "0.23.5" version = "0.23.5"
...@@ -5540,6 +5610,19 @@ dependencies = [ ...@@ -5540,6 +5610,19 @@ dependencies = [
"semver", "semver",
] ]
[[package]]
name = "rustix"
version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
"bitflags 2.11.0",
"errno",
"libc",
"linux-raw-sys 0.4.15",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "rustix" name = "rustix"
version = "1.1.4" version = "1.1.4"
...@@ -5549,7 +5632,7 @@ dependencies = [ ...@@ -5549,7 +5632,7 @@ dependencies = [
"bitflags 2.11.0", "bitflags 2.11.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys 0.12.1",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
...@@ -6300,7 +6383,7 @@ dependencies = [ ...@@ -6300,7 +6383,7 @@ dependencies = [
"fastrand", "fastrand",
"getrandom 0.4.2", "getrandom 0.4.2",
"once_cell", "once_cell",
"rustix", "rustix 1.1.4",
"windows-sys 0.61.2", "windows-sys 0.61.2",
] ]
...@@ -6415,6 +6498,15 @@ dependencies = [ ...@@ -6415,6 +6498,15 @@ dependencies = [
"time-core", "time-core",
] ]
[[package]]
name = "timerfd"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84e482e368cf7efa2c8b570f476e5b9fd9fd5e9b9219fc567832b05f13511091"
dependencies = [
"rustix 0.38.44",
]
[[package]] [[package]]
name = "tiny-keccak" name = "tiny-keccak"
version = "2.0.2" version = "2.0.2"
...@@ -6555,6 +6647,19 @@ dependencies = [ ...@@ -6555,6 +6647,19 @@ dependencies = [
"tokio", "tokio",
] ]
[[package]]
name = "tokio-timerfd"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87eecdae9a9b793843b1df7a64bc136f203443c1ca9889b3c4a39590afa51094"
dependencies = [
"futures-core",
"libc",
"slab",
"timerfd",
"tokio",
]
[[package]] [[package]]
name = "tokio-util" name = "tokio-util"
version = "0.7.18" version = "0.7.18"
...@@ -7203,7 +7308,7 @@ dependencies = [ ...@@ -7203,7 +7308,7 @@ dependencies = [
"serde_json", "serde_json",
"url", "url",
"utoipa", "utoipa",
"zip", "zip 3.0.0",
] ]
[[package]] [[package]]
...@@ -8102,6 +8207,23 @@ dependencies = [ ...@@ -8102,6 +8207,23 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "zip"
version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabe6324e908f85a1c52063ce7aa26b68dcb7eb6dbc83a2d148403c9bc3eba50"
dependencies = [
"arbitrary",
"crc32fast",
"crossbeam-utils",
"displaydoc",
"flate2",
"indexmap 2.13.0",
"memchr",
"thiserror 2.0.18",
"zopfli",
]
[[package]] [[package]]
name = "zip" name = "zip"
version = "3.0.0" version = "3.0.0"
......
...@@ -1576,6 +1576,7 @@ dependencies = [ ...@@ -1576,6 +1576,7 @@ dependencies = [
"dynamo-async-openai", "dynamo-async-openai",
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-memory", "dynamo-memory",
"dynamo-mocker",
"dynamo-parsers", "dynamo-parsers",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
...@@ -1656,30 +1657,24 @@ name = "dynamo-mocker" ...@@ -1656,30 +1657,24 @@ name = "dynamo-mocker"
version = "1.0.0" version = "1.0.0"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bytes",
"dashmap 6.1.0", "dashmap 6.1.0",
"derive-getters", "derive-getters",
"derive_builder", "derive_builder",
"dynamo-kv-router", "dynamo-kv-router",
"dynamo-llm",
"dynamo-runtime", "dynamo-runtime",
"dynamo-tokens", "dynamo-tokens",
"futures",
"ndarray", "ndarray",
"ndarray-interp", "ndarray-interp",
"ndarray-npy", "ndarray-npy",
"rand 0.9.2", "rand 0.9.2",
"rmp-serde",
"serde", "serde",
"serde_json", "serde_json",
"tokio", "tokio",
"tokio-stream",
"tokio-timerfd", "tokio-timerfd",
"tokio-util", "tokio-util",
"tracing", "tracing",
"uuid", "uuid",
"validator", "validator",
"zeromq",
] ]
[[package]] [[package]]
......
...@@ -18,6 +18,7 @@ use dynamo_llm::entrypoint::input::Input; ...@@ -18,6 +18,7 @@ use dynamo_llm::entrypoint::input::Input;
use dynamo_llm::kv_router::KvRouterConfig as RsKvRouterConfig; use dynamo_llm::kv_router::KvRouterConfig as RsKvRouterConfig;
use dynamo_llm::local_model::DEFAULT_HTTP_PORT; use dynamo_llm::local_model::DEFAULT_HTTP_PORT;
use dynamo_llm::local_model::{LocalModel, LocalModelBuilder}; use dynamo_llm::local_model::{LocalModel, LocalModelBuilder};
use dynamo_llm::mocker::make_mocker_engine;
use dynamo_llm::model_card::ModelDeploymentCard as RsModelDeploymentCard; use dynamo_llm::model_card::ModelDeploymentCard as RsModelDeploymentCard;
use dynamo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine; use dynamo_llm::types::openai::chat_completions::OpenAIChatCompletionsStreamingEngine;
use dynamo_mocker::common::protocols::MockEngineArgs; use dynamo_mocker::common::protocols::MockEngineArgs;
...@@ -425,8 +426,7 @@ async fn select_engine( ...@@ -425,8 +426,7 @@ async fn select_engine(
let endpoint = local_model.endpoint_id().clone(); let endpoint = local_model.endpoint_id().clone();
let engine = let engine =
dynamo_mocker::make_mocker_engine(distributed_runtime.inner, endpoint, mocker_args) make_mocker_engine(distributed_runtime.inner, endpoint, mocker_args).await?;
.await?;
RsEngineConfig::InProcessTokens { RsEngineConfig::InProcessTokens {
engine, engine,
......
...@@ -13,7 +13,7 @@ repository.workspace = true ...@@ -13,7 +13,7 @@ repository.workspace = true
[features] [features]
default = [] default = []
metrics = [] metrics = []
bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dynamo-runtime/integration", "dep:plotters"] bench = ["dep:clap", "dep:indicatif", "dep:serde_json", "dep:plotters"]
indexer-bin = ["metrics", "dep:axum", "dep:clap", "dep:zeromq", "dep:tracing-subscriber", "dep:serde_json"] indexer-bin = ["metrics", "dep:axum", "dep:clap", "dep:zeromq", "dep:tracing-subscriber", "dep:serde_json"]
[dependencies] [dependencies]
...@@ -55,30 +55,15 @@ axum = { workspace = true, optional = true } ...@@ -55,30 +55,15 @@ axum = { workspace = true, optional = true }
zeromq = { version = "0.4.1", optional = true } zeromq = { version = "0.4.1", optional = true }
tracing-subscriber = { workspace = true, optional = true } tracing-subscriber = { workspace = true, optional = true }
[package.metadata.cargo-machete]
ignored = ["indicatif", "plotters"]
[dev-dependencies] [dev-dependencies]
dynamo-bench = { path = "../bench" }
rstest = "0.18.2" rstest = "0.18.2"
rstest_reuse = "0.7.0" rstest_reuse = "0.7.0"
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true, features = ["rt", "macros", "time"] } tokio = { workspace = true, features = ["rt", "macros", "time"] }
dynamo-mocker = { workspace = true }
dynamo-tokens = { workspace = true } dynamo-tokens = { workspace = true }
minstant = "0.1.7"
[[bench]]
name = "kv_indexer_bench"
harness = false
required-features = ["bench"]
[[bench]]
name = "mooncake_bench"
harness = false
required-features = ["bench"]
[[bench]]
name = "active_sequences_bench"
harness = false
required-features = ["bench"]
[[bin]] [[bin]]
name = "dynamo-kv-indexer" name = "dynamo-kv-indexer"
......
...@@ -49,6 +49,7 @@ dynamo-runtime = { workspace = true } ...@@ -49,6 +49,7 @@ dynamo-runtime = { workspace = true }
dynamo-tokens = { workspace = true } dynamo-tokens = { workspace = true }
dynamo-kv-router = { workspace = true, features = ["metrics"] } dynamo-kv-router = { workspace = true, features = ["metrics"] }
dynamo-memory = { workspace = true } dynamo-memory = { workspace = true }
dynamo-mocker = { workspace = true }
# workspace # workspace
aho-corasick = "1.1" aho-corasick = "1.1"
......
...@@ -25,6 +25,7 @@ pub mod kv_router; ...@@ -25,6 +25,7 @@ pub mod kv_router;
pub mod local_model; pub mod local_model;
pub mod lora; pub mod lora;
pub mod migration; pub mod migration;
pub mod mocker;
pub mod model_card; pub mod model_card;
pub mod model_type; pub mod model_type;
pub mod namespace; pub mod namespace;
......
...@@ -9,19 +9,20 @@ ...@@ -9,19 +9,20 @@
use std::sync::Arc; use std::sync::Arc;
use std::time::{Duration, SystemTime, UNIX_EPOCH}; use std::time::{Duration, SystemTime, UNIX_EPOCH};
use crate::backend::ExecutionContext;
use crate::kv_router::publisher::{KvEventPublisher, KvEventSourceConfig, WorkerMetricsPublisher};
use crate::protocols::TokenIdType;
use crate::protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest};
use anyhow::Result; use anyhow::Result;
use bytes::Bytes; use bytes::Bytes;
use dashmap::DashMap; use dashmap::DashMap;
use dynamo_llm::backend::ExecutionContext; use dynamo_kv_router::protocols::{KvCacheEvent, KvCacheEventData};
use futures::StreamExt; use dynamo_mocker::common::bootstrap::{BootstrapServer, connect_to_prefill};
use rand::Rng; use dynamo_mocker::common::protocols::{
use serde::Serialize; DirectRequest, KvCacheEventSink, MockEngineArgs, OutputSignal,
use tokio::sync::{Notify, OnceCell, mpsc}; };
use tokio_stream::wrappers::UnboundedReceiverStream; use dynamo_mocker::common::utils::{compute_kv_transfer_delay, sleep_precise};
use tokio_util::sync::CancellationToken; use dynamo_mocker::scheduler::Scheduler;
use uuid::Uuid;
use zeromq::{Socket, SocketSend};
use dynamo_runtime::DistributedRuntime; use dynamo_runtime::DistributedRuntime;
use dynamo_runtime::protocols::annotated::Annotated; use dynamo_runtime::protocols::annotated::Annotated;
use dynamo_runtime::{ use dynamo_runtime::{
...@@ -30,19 +31,14 @@ use dynamo_runtime::{ ...@@ -30,19 +31,14 @@ use dynamo_runtime::{
pipeline::{AsyncEngine, Error, ManyOut, ResponseStream, SingleIn, async_trait}, pipeline::{AsyncEngine, Error, ManyOut, ResponseStream, SingleIn, async_trait},
traits::DistributedRuntimeProvider, traits::DistributedRuntimeProvider,
}; };
use futures::StreamExt;
use dynamo_kv_router::protocols::{KvCacheEvent, KvCacheEventData}; use rand::Rng;
use dynamo_llm::kv_router::publisher::{ use serde::Serialize;
KvEventPublisher, KvEventSourceConfig, WorkerMetricsPublisher, use tokio::sync::{Notify, OnceCell, mpsc};
}; use tokio_stream::wrappers::UnboundedReceiverStream;
use dynamo_llm::protocols::TokenIdType; use tokio_util::sync::CancellationToken;
use dynamo_llm::protocols::common::llm_backend::{LLMEngineOutput, PreprocessedRequest}; use uuid::Uuid;
use zeromq::{Socket, SocketSend};
use crate::common::bootstrap::{BootstrapServer, connect_to_prefill};
use crate::common::protocols::OutputSignal;
use crate::common::protocols::{DirectRequest, KvCacheEventSink, MockEngineArgs};
use crate::common::utils::{compute_kv_transfer_delay, sleep_precise};
use crate::scheduler::Scheduler;
pub const MOCKER_COMPONENT: &str = "mocker"; pub const MOCKER_COMPONENT: &str = "mocker";
......
...@@ -18,8 +18,6 @@ dynamo-tokens = { workspace = true } ...@@ -18,8 +18,6 @@ dynamo-tokens = { workspace = true }
# workspace # workspace
anyhow = { workspace = true } anyhow = { workspace = true }
bytes = { workspace = true }
futures = { workspace = true }
dashmap = { workspace = true } dashmap = { workspace = true }
derive_builder = { workspace = true } derive_builder = { workspace = true }
derive-getters = { workspace = true } derive-getters = { workspace = true }
...@@ -27,7 +25,6 @@ rand = { workspace = true } ...@@ -27,7 +25,6 @@ rand = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
tokio-stream = { workspace = true }
tokio-util = { workspace = true } tokio-util = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }
uuid = { workspace = true } uuid = { workspace = true }
...@@ -37,15 +34,9 @@ validator = { workspace = true } ...@@ -37,15 +34,9 @@ validator = { workspace = true }
ndarray = "0.16" ndarray = "0.16"
ndarray-npy = "0.9" ndarray-npy = "0.9"
ndarray-interp = "0.5" ndarray-interp = "0.5"
zeromq = "0.4.1"
rmp-serde = "1.3"
[target.'cfg(target_os = "linux")'.dependencies] [target.'cfg(target_os = "linux")'.dependencies]
dynamo-llm = { workspace = true }
tokio-timerfd = "0.2" tokio-timerfd = "0.2"
[target.'cfg(not(target_os = "linux"))'.dependencies]
dynamo-llm = { path = "../llm", default-features = false }
[dev-dependencies] [dev-dependencies]
rstest = "0.18.2" rstest = "0.18.2"
...@@ -10,6 +10,4 @@ ...@@ -10,6 +10,4 @@
pub mod cache; pub mod cache;
pub mod common; pub mod common;
pub mod kv_manager; pub mod kv_manager;
pub mod mocker;
pub use mocker::make_mocker_engine; // Re-export nicely for bindings
pub mod scheduler; pub mod scheduler;
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment