Commit 7acefb55 authored by jujl1's avatar jujl1
Browse files

fix: 修复重复判断逻辑

parent cb7d90a7
......@@ -281,22 +281,21 @@ class Scheduler(SchedulerInterface):
num_draft_tokens=num_draft_tokens,
num_lookahead_tokens=self.num_lookahead_tokens)
if new_blocks is None:
if new_blocks is None:
if self.use_pp:
preemptable_reqs = [r for r in self.running if
r.num_tokens_with_spec != r.num_computed_tokens]
else:
preemptable_reqs = self.running
# The request cannot be scheduled.
# Preempt the lowest-priority request.
if self.policy == SchedulingPolicy.PRIORITY:
preempted_req = max(
preemptable_reqs,
key=lambda r: (r.priority, r.arrival_time),
)
else:
preempted_req = preemptable_reqs[-1]
self.running.remove(preempted_req)
if self.use_pp:
preemptable_reqs = [r for r in self.running if
r.num_tokens_with_spec != r.num_computed_tokens]
else:
preemptable_reqs = self.running
# The request cannot be scheduled.
# Preempt the lowest-priority request.
if self.policy == SchedulingPolicy.PRIORITY:
preempted_req = max(
preemptable_reqs,
key=lambda r: (r.priority, r.arrival_time),
)
else:
preempted_req = preemptable_reqs[-1]
self.running.remove(preempted_req)
self.kv_cache_manager.free(preempted_req)
preempted_req.status = RequestStatus.PREEMPTED
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment