Unverified Commit f701319e authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(mocker): panic loudly on scheduler invariant violations (#8406)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 781e4100
...@@ -57,9 +57,12 @@ pub(super) fn cache_materialized_prefix( ...@@ -57,9 +57,12 @@ pub(super) fn cache_materialized_prefix(
return; return;
} }
let Some(last_node) = req.last_node else { let last_node = req.last_node.unwrap_or_else(|| {
return; panic!(
}; "cache_materialized_prefix: request {} has aligned_tokens={aligned_tokens} but last_node is None",
req.uuid
)
});
let sequence = req.sequence_prefix(aligned_tokens); let sequence = req.sequence_prefix(aligned_tokens);
let new_last = let new_last =
......
...@@ -106,10 +106,12 @@ pub(super) fn get_new_batch_prefill( ...@@ -106,10 +106,12 @@ pub(super) fn get_new_batch_prefill(
} }
let alloc = if req.materialized_tokens > 0 { let alloc = if req.materialized_tokens > 0 {
let Some(last_node) = prev_node else { let last_node = prev_node.unwrap_or_else(|| {
rejected.push_back(req); panic!(
break; "prefill: request {} has materialized_tokens={} but last_node is None",
}; req.uuid, req.materialized_tokens
)
});
kv_manager.allocate_after_prefix( kv_manager.allocate_after_prefix(
&alloc_tokens, &alloc_tokens,
req.materialized_tokens, req.materialized_tokens,
......
...@@ -473,9 +473,11 @@ impl VllmCore { ...@@ -473,9 +473,11 @@ impl VllmCore {
batch_total_prefix: &mut usize, batch_total_prefix: &mut usize,
preempted_any: &mut bool, preempted_any: &mut bool,
) -> ScheduleOutcome { ) -> ScheduleOutcome {
let Some(request) = self.state.requests.get(&uuid) else { let request = self
return ScheduleOutcome::Blocked; .state
}; .requests
.get(&uuid)
.unwrap_or_else(|| panic!("schedule_request: {uuid} missing from state.requests"));
debug_assert_vllm_request_invariants(uuid, request); debug_assert_vllm_request_invariants(uuid, request);
let prefill_cost = self.kv_manager.get_prefill_cost(&request.sequence); let prefill_cost = self.kv_manager.get_prefill_cost(&request.sequence);
let cached_prefix_tokens = if request.num_computed_tokens == 0 { let cached_prefix_tokens = if request.num_computed_tokens == 0 {
...@@ -508,9 +510,9 @@ impl VllmCore { ...@@ -508,9 +510,9 @@ impl VllmCore {
loop { loop {
let allocation = { let allocation = {
let Some(request) = self.state.requests.get_mut(&uuid) else { let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
return ScheduleOutcome::Blocked; panic!("schedule_request: {uuid} removed mid-pass (alloc prep)")
}; });
let allocation_target = desired_computed_after; let allocation_target = desired_computed_after;
let prev_allocated_tokens = request.sequence.num_allocated_tokens(); let prev_allocated_tokens = request.sequence.num_allocated_tokens();
if allocation_target <= prev_allocated_tokens { if allocation_target <= prev_allocated_tokens {
...@@ -525,9 +527,9 @@ impl VllmCore { ...@@ -525,9 +527,9 @@ impl VllmCore {
break; break;
}; };
let Some(signal) = maybe_signal else { let Some(signal) = maybe_signal else {
let Some(request) = self.state.requests.get_mut(&uuid) else { let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
return ScheduleOutcome::Blocked; panic!("schedule_request: {uuid} removed mid-pass (commit no-signal)")
}; });
request.sequence.commit_allocation(allocation_target); request.sequence.commit_allocation(allocation_target);
request.num_computed_tokens = actual_computed_after; request.num_computed_tokens = actual_computed_after;
break; break;
...@@ -539,9 +541,9 @@ impl VllmCore { ...@@ -539,9 +541,9 @@ impl VllmCore {
}; };
let allocated = self.kv_manager.process(&signal); let allocated = self.kv_manager.process(&signal);
let (_committed_tokens, current_computed_tokens) = { let (_committed_tokens, current_computed_tokens) = {
let Some(request) = self.state.requests.get_mut(&uuid) else { let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
return ScheduleOutcome::Blocked; panic!("schedule_request: {uuid} removed mid-pass (post-process commit)")
}; });
let committed_tokens = if allocated == expected { let committed_tokens = if allocated == expected {
allocation_target allocation_target
} else { } else {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment