[fix]修复splitpd调度lora报错

45d85414 · 王敏 · 1bbb2f94 · 45d85414
Commit 45d85414 authored Feb 12, 2026 by 王敏
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 15 deletions

vllm/v1/core/sched/scheduler.py vllm/v1/core/sched/scheduler.py +13 -15

No files found.
--- a/vllm/v1/core/sched/scheduler.py
+++ b/vllm/v1/core/sched/scheduler.py
@@ -663,6 +663,9 @@ class Scheduler(SchedulerInterface):
        # For logging.
        scheduled_timestamp = time.monotonic()

+        # Record the LoRAs in scheduled_running_reqs
+        scheduled_loras: set[int] = set()
+
        # Use a temporary RequestQueue to collect requests that need to be
        # skipped and put back at the head of the waiting queue later
        skipped_waiting_requests = create_request_queue(self.policy)
@@ -993,8 +996,6 @@ class Scheduler(SchedulerInterface):
                        self.encoder_cache_manager.allocate(request, i)
                    encoder_compute_budget = new_encoder_compute_budget

-            # Record the LoRAs in scheduled_running_reqs
-            scheduled_loras: set[int] = set()
        if self.lora_config:
            scheduled_loras = set(
                req.lora_request.lora_int_id for req in scheduled_running_reqs
@@ -1089,14 +1090,11 @@ class Scheduler(SchedulerInterface):
    
    def schedule(self) -> SchedulerOutput:
        if envs.VLLM_USE_PD_SPLIT: 
-            if self.connector is not None:
-                return self.schedule_default()
-            if self.use_mla:
-                if self.full_cuda_graph and self.num_spec_tokens > 0:
            return self.schedule_split_pd()
        else:
+            if self.connector is not None:
                return self.schedule_default()
-            else:
+            if self.full_cuda_graph and self.use_mla and self.num_spec_tokens > 0 :
                return self.schedule_split_pd()
            else:
                return self.schedule_default()