Unverified Commit efa89448 authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore: de-async scheduler read paths and unblock decode output tracking (#6510)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 6fab12be
......@@ -494,14 +494,12 @@ impl KvRouter {
self.scheduler.worker_type()
}
pub async fn add_output_block(
pub fn add_output_block(
&self,
request_id: &str,
decay_fraction: Option<f64>,
) -> Result<(), SequenceError> {
self.scheduler
.add_output_block(request_id, decay_fraction)
.await
self.scheduler.add_output_block(request_id, decay_fraction)
}
pub fn block_size(&self) -> u32 {
......@@ -541,8 +539,7 @@ impl KvRouter {
Ok(self
.scheduler
.get_potential_loads(maybe_seq_hashes, isl_tokens, overlap_scores)
.await)
.get_potential_loads(maybe_seq_hashes, isl_tokens, overlap_scores))
}
/// Dump all events from the indexer
......
......@@ -108,7 +108,6 @@ impl RequestGuard {
if let Err(e) = self
.chooser
.add_output_block(&self.context_id, decay_fraction)
.await
{
tracing::warn!(
"Failed to add output block for request {}: {e}",
......
......@@ -103,7 +103,7 @@ impl SchedulerQueue {
return;
};
if self.all_workers_busy(threshold).await {
if self.all_workers_busy(threshold) {
tracing::debug!("all workers busy, queueing request");
let entry = self.make_entry(request);
self.pending.lock().await.push(entry);
......@@ -121,7 +121,7 @@ impl SchedulerQueue {
};
loop {
if self.all_workers_busy(threshold).await {
if self.all_workers_busy(threshold) {
break;
}
let Some(entry) = self.pending.lock().await.pop() else {
......@@ -135,14 +135,11 @@ impl SchedulerQueue {
/// Run the full scheduling pipeline for a single request:
/// compute potential load → select worker → respond → book via add_request.
async fn schedule(&self, mut request: SchedulingRequest) {
let (decode_blocks, prefill_tokens) = self
.slots
.potential_blocks_and_tokens(
request.token_seq.clone(),
request.isl_tokens,
request.overlaps.clone(),
)
.await;
let (decode_blocks, prefill_tokens) = self.slots.potential_blocks_and_tokens(
request.token_seq.clone(),
request.isl_tokens,
request.overlaps.clone(),
);
request.decode_blocks = decode_blocks;
request.prefill_tokens = prefill_tokens;
......@@ -194,8 +191,8 @@ impl SchedulerQueue {
/// Check if all workers are busy based on threshold.
/// Returns true only if ALL workers exceed the threshold (no worker has capacity).
async fn all_workers_busy(&self, threshold: f64) -> bool {
let active_tokens = self.slots.active_tokens().await;
fn all_workers_busy(&self, threshold: f64) -> bool {
let active_tokens = self.slots.active_tokens();
let configs = self.workers_with_configs.borrow();
for (&worker_id, config) in configs.iter() {
......
......@@ -272,17 +272,16 @@ impl KvScheduler {
self.slots.worker_type()
}
pub async fn add_output_block(
pub fn add_output_block(
&self,
request_id: &str,
decay_fraction: Option<f64>,
) -> Result<(), SequenceError> {
self.slots
.add_output_block(&request_id.to_string(), decay_fraction)
.await
}
pub async fn get_potential_loads(
pub fn get_potential_loads(
&self,
token_seq: Option<Vec<SequenceHash>>,
isl_tokens: usize,
......@@ -290,8 +289,7 @@ impl KvScheduler {
) -> Vec<PotentialLoad> {
let (decode_blocks, prefill_tokens) = self
.slots
.potential_blocks_and_tokens(token_seq, isl_tokens, overlaps)
.await;
.potential_blocks_and_tokens(token_seq, isl_tokens, overlaps);
// Get all unique WorkerWithDpRank from both hashmaps
let mut workers: HashSet<WorkerWithDpRank> = HashSet::new();
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment