"docs/vscode:/vscode.git/clone" did not exist on "328841d00294fb8226f0368cc380350b3d671d77"
Unverified Commit 45be2fdc authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): drop easy llm facade reexports (#7474)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 0ac9ef9c
...@@ -11,6 +11,7 @@ pub use dynamo_kv_router::multi_worker_sequence::{ ...@@ -11,6 +11,7 @@ pub use dynamo_kv_router::multi_worker_sequence::{
ActiveSequencesMultiWorker, SequenceError, SequencePublisher, SequenceRequest, ActiveSequencesMultiWorker, SequenceError, SequencePublisher, SequenceRequest,
SequenceSubscriber, SequenceSubscriber,
}; };
use dynamo_kv_router::protocols::{ActiveLoad, ActiveSequenceEvent, WorkerWithDpRank};
pub use dynamo_kv_router::sequence::{ActiveSequences, RequestId}; pub use dynamo_kv_router::sequence::{ActiveSequences, RequestId};
use anyhow::Result; use anyhow::Result;
...@@ -21,7 +22,6 @@ use std::collections::HashMap; ...@@ -21,7 +22,6 @@ use std::collections::HashMap;
use std::sync::Arc; use std::sync::Arc;
use super::metrics::WORKER_LOAD_METRICS; use super::metrics::WORKER_LOAD_METRICS;
use super::protocols::{ActiveLoad, ActiveSequenceEvent, WorkerWithDpRank};
use crate::kv_router::{ACTIVE_SEQUENCES_SUBJECT, KV_METRICS_SUBJECT}; use crate::kv_router::{ACTIVE_SEQUENCES_SUBJECT, KV_METRICS_SUBJECT};
use crate::local_model::runtime_config::ModelRuntimeConfig; use crate::local_model::runtime_config::ModelRuntimeConfig;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use crate::kv_router::{ use crate::kv_router::{Indexer, worker_query::WorkerQueryClient};
Indexer, KV_EVENT_SUBJECT, KvRouterConfig, protocols::RouterEvent,
worker_query::WorkerQueryClient,
};
use anyhow::Result; use anyhow::Result;
use dynamo_kv_router::{
config::KvRouterConfig,
protocols::{KV_EVENT_SUBJECT, RouterEvent},
};
use dynamo_runtime::{ use dynamo_runtime::{
component::Component, discovery::EventTransportKind, prelude::*, component::Component, discovery::EventTransportKind, prelude::*,
transports::event_plane::EventSubscriber, transports::event_plane::EventSubscriber,
......
...@@ -21,9 +21,11 @@ use futures::StreamExt; ...@@ -21,9 +21,11 @@ use futures::StreamExt;
use tokio::sync::{Mutex, Semaphore}; use tokio::sync::{Mutex, Semaphore};
use crate::kv_router::Indexer; use crate::kv_router::Indexer;
use crate::kv_router::indexer::{LocalKvIndexer, WorkerKvQueryRequest, WorkerKvQueryResponse};
use crate::kv_router::protocols::{DpRank, KvCacheEventData, RouterEvent, WorkerId};
use crate::kv_router::worker_kv_indexer_query_endpoint; use crate::kv_router::worker_kv_indexer_query_endpoint;
use dynamo_kv_router::{
indexer::{LocalKvIndexer, WorkerKvQueryRequest, WorkerKvQueryResponse},
protocols::{DpRank, KvCacheEventData, RouterEvent, WorkerId},
};
// Recovery retry configuration // Recovery retry configuration
const RECOVERY_MAX_RETRIES: u32 = 8; const RECOVERY_MAX_RETRIES: u32 = 8;
...@@ -763,11 +765,10 @@ impl AsyncEngine<SingleIn<WorkerKvQueryRequest>, ManyOut<WorkerKvQueryResponse>, ...@@ -763,11 +765,10 @@ impl AsyncEngine<SingleIn<WorkerKvQueryRequest>, ManyOut<WorkerKvQueryResponse>,
mod tests { mod tests {
use super::*; use super::*;
use crate::kv_router::Indexer; use crate::kv_router::Indexer;
use crate::kv_router::RouterEvent; use dynamo_kv_router::indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics};
use crate::kv_router::indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics}; use dynamo_kv_router::protocols::{
use crate::kv_router::protocols::{
ExternalSequenceBlockHash, KvCacheEvent, KvCacheEventData, KvCacheStoreData, ExternalSequenceBlockHash, KvCacheEvent, KvCacheEventData, KvCacheStoreData,
KvCacheStoredBlockData, LocalBlockHash, KvCacheStoredBlockData, LocalBlockHash, RouterEvent,
}; };
use dynamo_runtime::{DistributedRuntime, Runtime, distributed::DistributedConfig}; use dynamo_runtime::{DistributedRuntime, Runtime, distributed::DistributedConfig};
use std::collections::VecDeque; use std::collections::VecDeque;
......
...@@ -11,12 +11,12 @@ use std::sync::Arc; ...@@ -11,12 +11,12 @@ use std::sync::Arc;
use std::time::{Duration, Instant}; use std::time::{Duration, Instant};
use dashmap::DashMap; use dashmap::DashMap;
use dynamo_kv_router::protocols::{ActiveSequenceEvent, ActiveSequenceEventData};
use dynamo_runtime::component::Component; use dynamo_runtime::component::Component;
use dynamo_runtime::traits::DistributedRuntimeProvider; use dynamo_runtime::traits::DistributedRuntimeProvider;
use dynamo_runtime::transports::event_plane::EventSubscriber; use dynamo_runtime::transports::event_plane::EventSubscriber;
use crate::kv_router::ACTIVE_SEQUENCES_SUBJECT; use crate::kv_router::ACTIVE_SEQUENCES_SUBJECT;
use crate::kv_router::protocols::{ActiveSequenceEvent, ActiveSequenceEventData};
use crate::kv_router::scheduler::KvScheduler; use crate::kv_router::scheduler::KvScheduler;
/// Time-series sample of LORA load /// Time-series sample of LORA load
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0 // SPDX-License-Identifier: Apache-2.0
use super::LoraAllocator; use super::LoraAllocator;
use crate::kv_router::protocols::WorkerWithDpRank; use dynamo_kv_router::protocols::WorkerWithDpRank;
/// Rendezvous (HRW) hashing implementation for LoRA allocation /// Rendezvous (HRW) hashing implementation for LoRA allocation
pub struct RendezvousHasher; pub struct RendezvousHasher;
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
//! LoRA Allocation Algorithms - HRW and Random //! LoRA Allocation Algorithms - HRW and Random
use crate::kv_router::protocols::WorkerWithDpRank; use dynamo_kv_router::protocols::WorkerWithDpRank;
use std::str::FromStr; use std::str::FromStr;
pub mod hrw; pub mod hrw;
......
...@@ -4,11 +4,10 @@ ...@@ -4,11 +4,10 @@
//! LoRA Routing Table - Thread-safe data structure for storing LoRA allocation decisions. //! LoRA Routing Table - Thread-safe data structure for storing LoRA allocation decisions.
use dashmap::DashMap; use dashmap::DashMap;
use dynamo_kv_router::protocols::WorkerWithDpRank;
use std::sync::Arc; use std::sync::Arc;
use std::time::Instant; use std::time::Instant;
use crate::kv_router::protocols::WorkerWithDpRank;
/// Configuration for a single LoRA's allocation /// Configuration for a single LoRA's allocation
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct LoraReplicaConfig { pub struct LoraReplicaConfig {
......
...@@ -5,12 +5,14 @@ use std::collections::HashSet; ...@@ -5,12 +5,14 @@ use std::collections::HashSet;
use std::sync::Arc; use std::sync::Arc;
use derive_builder::Builder; use derive_builder::Builder;
use dynamo_kv_router::{
config::RouterConfigOverride,
protocols::{BlockExtraInfo, WorkerId},
};
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use super::timing::RequestTracker; use super::timing::RequestTracker;
use super::{OutputOptions, SamplingOptions, StopConditions}; use super::{OutputOptions, SamplingOptions, StopConditions};
use crate::kv_router::RouterConfigOverride;
use crate::kv_router::protocols::{BlockExtraInfo, WorkerId};
use crate::preprocessor::media::RdmaMediaDataDescriptor; use crate::preprocessor::media::RdmaMediaDataDescriptor;
use crate::protocols::TokenIdType; use crate::protocols::TokenIdType;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment