Unverified Commit 45be2fdc authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(kv-router): drop easy llm facade reexports (#7474)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 0ac9ef9c
......@@ -11,6 +11,7 @@ pub use dynamo_kv_router::multi_worker_sequence::{
ActiveSequencesMultiWorker, SequenceError, SequencePublisher, SequenceRequest,
SequenceSubscriber,
};
use dynamo_kv_router::protocols::{ActiveLoad, ActiveSequenceEvent, WorkerWithDpRank};
pub use dynamo_kv_router::sequence::{ActiveSequences, RequestId};
use anyhow::Result;
......@@ -21,7 +22,6 @@ use std::collections::HashMap;
use std::sync::Arc;
use super::metrics::WORKER_LOAD_METRICS;
use super::protocols::{ActiveLoad, ActiveSequenceEvent, WorkerWithDpRank};
use crate::kv_router::{ACTIVE_SEQUENCES_SUBJECT, KV_METRICS_SUBJECT};
use crate::local_model::runtime_config::ModelRuntimeConfig;
......
// SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
use crate::kv_router::{
Indexer, KV_EVENT_SUBJECT, KvRouterConfig, protocols::RouterEvent,
worker_query::WorkerQueryClient,
};
use crate::kv_router::{Indexer, worker_query::WorkerQueryClient};
use anyhow::Result;
use dynamo_kv_router::{
config::KvRouterConfig,
protocols::{KV_EVENT_SUBJECT, RouterEvent},
};
use dynamo_runtime::{
component::Component, discovery::EventTransportKind, prelude::*,
transports::event_plane::EventSubscriber,
......
......@@ -21,9 +21,11 @@ use futures::StreamExt;
use tokio::sync::{Mutex, Semaphore};
use crate::kv_router::Indexer;
use crate::kv_router::indexer::{LocalKvIndexer, WorkerKvQueryRequest, WorkerKvQueryResponse};
use crate::kv_router::protocols::{DpRank, KvCacheEventData, RouterEvent, WorkerId};
use crate::kv_router::worker_kv_indexer_query_endpoint;
use dynamo_kv_router::{
indexer::{LocalKvIndexer, WorkerKvQueryRequest, WorkerKvQueryResponse},
protocols::{DpRank, KvCacheEventData, RouterEvent, WorkerId},
};
// Recovery retry configuration
const RECOVERY_MAX_RETRIES: u32 = 8;
......@@ -763,11 +765,10 @@ impl AsyncEngine<SingleIn<WorkerKvQueryRequest>, ManyOut<WorkerKvQueryResponse>,
mod tests {
use super::*;
use crate::kv_router::Indexer;
use crate::kv_router::RouterEvent;
use crate::kv_router::indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics};
use crate::kv_router::protocols::{
use dynamo_kv_router::indexer::{KvIndexer, KvIndexerInterface, KvIndexerMetrics};
use dynamo_kv_router::protocols::{
ExternalSequenceBlockHash, KvCacheEvent, KvCacheEventData, KvCacheStoreData,
KvCacheStoredBlockData, LocalBlockHash,
KvCacheStoredBlockData, LocalBlockHash, RouterEvent,
};
use dynamo_runtime::{DistributedRuntime, Runtime, distributed::DistributedConfig};
use std::collections::VecDeque;
......
......@@ -11,12 +11,12 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use dashmap::DashMap;
use dynamo_kv_router::protocols::{ActiveSequenceEvent, ActiveSequenceEventData};
use dynamo_runtime::component::Component;
use dynamo_runtime::traits::DistributedRuntimeProvider;
use dynamo_runtime::transports::event_plane::EventSubscriber;
use crate::kv_router::ACTIVE_SEQUENCES_SUBJECT;
use crate::kv_router::protocols::{ActiveSequenceEvent, ActiveSequenceEventData};
use crate::kv_router::scheduler::KvScheduler;
/// Time-series sample of LORA load
......
......@@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
use super::LoraAllocator;
use crate::kv_router::protocols::WorkerWithDpRank;
use dynamo_kv_router::protocols::WorkerWithDpRank;
/// Rendezvous (HRW) hashing implementation for LoRA allocation
pub struct RendezvousHasher;
......
......@@ -3,7 +3,7 @@
//! LoRA Allocation Algorithms - HRW and Random
use crate::kv_router::protocols::WorkerWithDpRank;
use dynamo_kv_router::protocols::WorkerWithDpRank;
use std::str::FromStr;
pub mod hrw;
......
......@@ -4,11 +4,10 @@
//! LoRA Routing Table - Thread-safe data structure for storing LoRA allocation decisions.
use dashmap::DashMap;
use dynamo_kv_router::protocols::WorkerWithDpRank;
use std::sync::Arc;
use std::time::Instant;
use crate::kv_router::protocols::WorkerWithDpRank;
/// Configuration for a single LoRA's allocation
#[derive(Debug, Clone)]
pub struct LoraReplicaConfig {
......
......@@ -5,12 +5,14 @@ use std::collections::HashSet;
use std::sync::Arc;
use derive_builder::Builder;
use dynamo_kv_router::{
config::RouterConfigOverride,
protocols::{BlockExtraInfo, WorkerId},
};
use serde::{Deserialize, Serialize};
use super::timing::RequestTracker;
use super::{OutputOptions, SamplingOptions, StopConditions};
use crate::kv_router::RouterConfigOverride;
use crate::kv_router::protocols::{BlockExtraInfo, WorkerId};
use crate::preprocessor::media::RdmaMediaDataDescriptor;
use crate::protocols::TokenIdType;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment