"ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "98a6d3b91076f8239b9db0e131175de2de990b4a"
Unverified Commit efa89448 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: de-async scheduler read paths and unblock decode output tracking (#6510)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 6fab12be
...@@ -494,14 +494,12 @@ impl KvRouter { ...@@ -494,14 +494,12 @@ impl KvRouter {
self.scheduler.worker_type() self.scheduler.worker_type()
} }
pub async fn add_output_block( pub fn add_output_block(
&self, &self,
request_id: &str, request_id: &str,
decay_fraction: Option<f64>, decay_fraction: Option<f64>,
) -> Result<(), SequenceError> { ) -> Result<(), SequenceError> {
self.scheduler self.scheduler.add_output_block(request_id, decay_fraction)
.add_output_block(request_id, decay_fraction)
.await
} }
pub fn block_size(&self) -> u32 { pub fn block_size(&self) -> u32 {
...@@ -541,8 +539,7 @@ impl KvRouter { ...@@ -541,8 +539,7 @@ impl KvRouter {
Ok(self Ok(self
.scheduler .scheduler
.get_potential_loads(maybe_seq_hashes, isl_tokens, overlap_scores) .get_potential_loads(maybe_seq_hashes, isl_tokens, overlap_scores))
.await)
} }
/// Dump all events from the indexer /// Dump all events from the indexer
......
...@@ -108,7 +108,6 @@ impl RequestGuard { ...@@ -108,7 +108,6 @@ impl RequestGuard {
if let Err(e) = self if let Err(e) = self
.chooser .chooser
.add_output_block(&self.context_id, decay_fraction) .add_output_block(&self.context_id, decay_fraction)
.await
{ {
tracing::warn!( tracing::warn!(
"Failed to add output block for request {}: {e}", "Failed to add output block for request {}: {e}",
......
...@@ -103,7 +103,7 @@ impl SchedulerQueue { ...@@ -103,7 +103,7 @@ impl SchedulerQueue {
return; return;
}; };
if self.all_workers_busy(threshold).await { if self.all_workers_busy(threshold) {
tracing::debug!("all workers busy, queueing request"); tracing::debug!("all workers busy, queueing request");
let entry = self.make_entry(request); let entry = self.make_entry(request);
self.pending.lock().await.push(entry); self.pending.lock().await.push(entry);
...@@ -121,7 +121,7 @@ impl SchedulerQueue { ...@@ -121,7 +121,7 @@ impl SchedulerQueue {
}; };
loop { loop {
if self.all_workers_busy(threshold).await { if self.all_workers_busy(threshold) {
break; break;
} }
let Some(entry) = self.pending.lock().await.pop() else { let Some(entry) = self.pending.lock().await.pop() else {
...@@ -135,14 +135,11 @@ impl SchedulerQueue { ...@@ -135,14 +135,11 @@ impl SchedulerQueue {
/// Run the full scheduling pipeline for a single request: /// Run the full scheduling pipeline for a single request:
/// compute potential load → select worker → respond → book via add_request. /// compute potential load → select worker → respond → book via add_request.
async fn schedule(&self, mut request: SchedulingRequest) { async fn schedule(&self, mut request: SchedulingRequest) {
let (decode_blocks, prefill_tokens) = self let (decode_blocks, prefill_tokens) = self.slots.potential_blocks_and_tokens(
.slots request.token_seq.clone(),
.potential_blocks_and_tokens( request.isl_tokens,
request.token_seq.clone(), request.overlaps.clone(),
request.isl_tokens, );
request.overlaps.clone(),
)
.await;
request.decode_blocks = decode_blocks; request.decode_blocks = decode_blocks;
request.prefill_tokens = prefill_tokens; request.prefill_tokens = prefill_tokens;
...@@ -194,8 +191,8 @@ impl SchedulerQueue { ...@@ -194,8 +191,8 @@ impl SchedulerQueue {
/// Check if all workers are busy based on threshold. /// Check if all workers are busy based on threshold.
/// Returns true only if ALL workers exceed the threshold (no worker has capacity). /// Returns true only if ALL workers exceed the threshold (no worker has capacity).
async fn all_workers_busy(&self, threshold: f64) -> bool { fn all_workers_busy(&self, threshold: f64) -> bool {
let active_tokens = self.slots.active_tokens().await; let active_tokens = self.slots.active_tokens();
let configs = self.workers_with_configs.borrow(); let configs = self.workers_with_configs.borrow();
for (&worker_id, config) in configs.iter() { for (&worker_id, config) in configs.iter() {
......
...@@ -272,17 +272,16 @@ impl KvScheduler { ...@@ -272,17 +272,16 @@ impl KvScheduler {
self.slots.worker_type() self.slots.worker_type()
} }
pub async fn add_output_block( pub fn add_output_block(
&self, &self,
request_id: &str, request_id: &str,
decay_fraction: Option<f64>, decay_fraction: Option<f64>,
) -> Result<(), SequenceError> { ) -> Result<(), SequenceError> {
self.slots self.slots
.add_output_block(&request_id.to_string(), decay_fraction) .add_output_block(&request_id.to_string(), decay_fraction)
.await
} }
pub async fn get_potential_loads( pub fn get_potential_loads(
&self, &self,
token_seq: Option<Vec<SequenceHash>>, token_seq: Option<Vec<SequenceHash>>,
isl_tokens: usize, isl_tokens: usize,
...@@ -290,8 +289,7 @@ impl KvScheduler { ...@@ -290,8 +289,7 @@ impl KvScheduler {
) -> Vec<PotentialLoad> { ) -> Vec<PotentialLoad> {
let (decode_blocks, prefill_tokens) = self let (decode_blocks, prefill_tokens) = self
.slots .slots
.potential_blocks_and_tokens(token_seq, isl_tokens, overlaps) .potential_blocks_and_tokens(token_seq, isl_tokens, overlaps);
.await;
// Get all unique WorkerWithDpRank from both hashmaps // Get all unique WorkerWithDpRank from both hashmaps
let mut workers: HashSet<WorkerWithDpRank> = HashSet::new(); let mut workers: HashSet<WorkerWithDpRank> = HashSet::new();
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment