"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "ff625465444c680e96c75fb1e21f46932a01949b"
Unverified Commit f701319e authored by Yan Ru Pei's avatar Yan Ru Pei Committed by GitHub
Browse files

chore(mocker): panic loudly on scheduler invariant violations (#8406)


Signed-off-by: default avatarPeaBrane <yanrpei@gmail.com>
parent 781e4100
......@@ -57,9 +57,12 @@ pub(super) fn cache_materialized_prefix(
return;
}
let Some(last_node) = req.last_node else {
return;
};
let last_node = req.last_node.unwrap_or_else(|| {
panic!(
"cache_materialized_prefix: request {} has aligned_tokens={aligned_tokens} but last_node is None",
req.uuid
)
});
let sequence = req.sequence_prefix(aligned_tokens);
let new_last =
......
......@@ -106,10 +106,12 @@ pub(super) fn get_new_batch_prefill(
}
let alloc = if req.materialized_tokens > 0 {
let Some(last_node) = prev_node else {
rejected.push_back(req);
break;
};
let last_node = prev_node.unwrap_or_else(|| {
panic!(
"prefill: request {} has materialized_tokens={} but last_node is None",
req.uuid, req.materialized_tokens
)
});
kv_manager.allocate_after_prefix(
&alloc_tokens,
req.materialized_tokens,
......
......@@ -473,9 +473,11 @@ impl VllmCore {
batch_total_prefix: &mut usize,
preempted_any: &mut bool,
) -> ScheduleOutcome {
let Some(request) = self.state.requests.get(&uuid) else {
return ScheduleOutcome::Blocked;
};
let request = self
.state
.requests
.get(&uuid)
.unwrap_or_else(|| panic!("schedule_request: {uuid} missing from state.requests"));
debug_assert_vllm_request_invariants(uuid, request);
let prefill_cost = self.kv_manager.get_prefill_cost(&request.sequence);
let cached_prefix_tokens = if request.num_computed_tokens == 0 {
......@@ -508,9 +510,9 @@ impl VllmCore {
loop {
let allocation = {
let Some(request) = self.state.requests.get_mut(&uuid) else {
return ScheduleOutcome::Blocked;
};
let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
panic!("schedule_request: {uuid} removed mid-pass (alloc prep)")
});
let allocation_target = desired_computed_after;
let prev_allocated_tokens = request.sequence.num_allocated_tokens();
if allocation_target <= prev_allocated_tokens {
......@@ -525,9 +527,9 @@ impl VllmCore {
break;
};
let Some(signal) = maybe_signal else {
let Some(request) = self.state.requests.get_mut(&uuid) else {
return ScheduleOutcome::Blocked;
};
let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
panic!("schedule_request: {uuid} removed mid-pass (commit no-signal)")
});
request.sequence.commit_allocation(allocation_target);
request.num_computed_tokens = actual_computed_after;
break;
......@@ -539,9 +541,9 @@ impl VllmCore {
};
let allocated = self.kv_manager.process(&signal);
let (_committed_tokens, current_computed_tokens) = {
let Some(request) = self.state.requests.get_mut(&uuid) else {
return ScheduleOutcome::Blocked;
};
let request = self.state.requests.get_mut(&uuid).unwrap_or_else(|| {
panic!("schedule_request: {uuid} removed mid-pass (post-process commit)")
});
let committed_tokens = if allocated == expected {
allocation_target
} else {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment