Commit 92aba825 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-dev-wm' into 'v0.11.0-dev'

[fix]修复splitpd调度lora报错

See merge request dcutoolkit/deeplearing/vllm!432
parents 1bbb2f94 45d85414
......@@ -663,6 +663,9 @@ class Scheduler(SchedulerInterface):
# For logging.
scheduled_timestamp = time.monotonic()
# Record the LoRAs in scheduled_running_reqs
scheduled_loras: set[int] = set()
# Use a temporary RequestQueue to collect requests that need to be
# skipped and put back at the head of the waiting queue later
skipped_waiting_requests = create_request_queue(self.policy)
......@@ -993,13 +996,11 @@ class Scheduler(SchedulerInterface):
self.encoder_cache_manager.allocate(request, i)
encoder_compute_budget = new_encoder_compute_budget
# Record the LoRAs in scheduled_running_reqs
scheduled_loras: set[int] = set()
if self.lora_config:
scheduled_loras = set(
req.lora_request.lora_int_id for req in scheduled_running_reqs
if req.lora_request and req.lora_request.lora_int_id > 0)
assert len(scheduled_loras) <= self.lora_config.max_loras
if self.lora_config:
scheduled_loras = set(
req.lora_request.lora_int_id for req in scheduled_running_reqs
if req.lora_request and req.lora_request.lora_int_id > 0)
assert len(scheduled_loras) <= self.lora_config.max_loras
# Check if the scheduling constraints are satisfied.
total_num_scheduled_tokens = sum(num_scheduled_tokens.values())
......@@ -1089,17 +1090,14 @@ class Scheduler(SchedulerInterface):
def schedule(self) -> SchedulerOutput:
if envs.VLLM_USE_PD_SPLIT:
return self.schedule_split_pd()
else:
if self.connector is not None:
return self.schedule_default()
if self.use_mla:
if self.full_cuda_graph and self.num_spec_tokens > 0:
return self.schedule_split_pd()
else:
return self.schedule_default()
else:
if self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0 :
return self.schedule_split_pd()
else:
return self.schedule_default()
else:
return self.schedule_default()
def _update_after_schedule(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment