Commit 2545ed1c authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.9.2-dev-fix' into 'v0.9.2-dev'

fix: 修复重复判断逻辑

See merge request dcutoolkit/deeplearing/vllm!406
parents fa683b07 7acefb55
...@@ -281,22 +281,21 @@ class Scheduler(SchedulerInterface): ...@@ -281,22 +281,21 @@ class Scheduler(SchedulerInterface):
num_draft_tokens=num_draft_tokens, num_draft_tokens=num_draft_tokens,
num_lookahead_tokens=self.num_lookahead_tokens) num_lookahead_tokens=self.num_lookahead_tokens)
if new_blocks is None: if new_blocks is None:
if new_blocks is None: if self.use_pp:
if self.use_pp: preemptable_reqs = [r for r in self.running if
preemptable_reqs = [r for r in self.running if r.num_tokens_with_spec != r.num_computed_tokens]
r.num_tokens_with_spec != r.num_computed_tokens] else:
else: preemptable_reqs = self.running
preemptable_reqs = self.running # The request cannot be scheduled.
# The request cannot be scheduled. # Preempt the lowest-priority request.
# Preempt the lowest-priority request. if self.policy == SchedulingPolicy.PRIORITY:
if self.policy == SchedulingPolicy.PRIORITY: preempted_req = max(
preempted_req = max( preemptable_reqs,
preemptable_reqs, key=lambda r: (r.priority, r.arrival_time),
key=lambda r: (r.priority, r.arrival_time), )
) else:
else: preempted_req = preemptable_reqs[-1]
preempted_req = preemptable_reqs[-1] self.running.remove(preempted_req)
self.running.remove(preempted_req)
self.kv_cache_manager.free(preempted_req) self.kv_cache_manager.free(preempted_req)
preempted_req.status = RequestStatus.PREEMPTED preempted_req.status = RequestStatus.PREEMPTED
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment