[V1] Simplify GPUModelRunner._update_states check (#13265)

085b7b2d · Nick Hill · GitHub · 4da1f667 · 085b7b2d
Unverified Commit 085b7b2d authored Feb 14, 2025 by Nick Hill Committed by GitHub Feb 14, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 2 deletions

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +4 -2

No files found.
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -347,6 +347,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
            self.input_batch.block_table.append_row(req_index, start_index,
                                                    req_data.new_block_ids)

+        batch_changed = len(removed_req_indices) > 0 or len(req_ids_to_add) > 0
+
        # Add the new or resumed requests to the persistent batch.
        # The smaller empty indices are filled first.
        removed_req_indices = sorted(removed_req_indices, reverse=True)
@@ -363,8 +365,8 @@ class GPUModelRunner(LoRAModelRunnerMixin):
        # Condense the batched states if there are empty indices.
        if removed_req_indices:
            self.input_batch.condense(removed_req_indices)
-        return (len(unscheduled_req_ids) > 0 or len(req_ids_to_add) > 0
-                or len(scheduler_output.finished_req_ids) > 0)
+
+        return batch_changed

    def _prepare_inputs(self, scheduler_output: "SchedulerOutput"):
        total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens