[Bugfix] Allow 64-bit integer values for LoRA IDs to avoid overflow/truncation (#27876)

Signed-off-by: Madeesh Kannan <shadeMe@users.noreply.github.com>

[Bugfix] Allow 64-bit integer values for LoRA IDs to avoid overflow/truncation (#27876)
Signed-off-by: Madeesh Kannan <shadeMe@users.noreply.github.com>
675704ac · Madeesh Kannan · GitHub · 0384aa71 · 675704ac · 675704ac
Unverified Commit 675704ac authored Oct 31, 2025 by Madeesh Kannan Committed by GitHub Oct 31, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/v1/worker/gpu_input_batch.py vllm/v1/worker/gpu_input_batch.py +1 -1

vllm/v1/worker/tpu_input_batch.py vllm/v1/worker/tpu_input_batch.py +1 -1

No files found.
--- a/vllm/v1/worker/gpu_input_batch.py
+++ b/vllm/v1/worker/gpu_input_batch.py
@@ -204,7 +204,7 @@ class InputBatch:
        self.num_accepted_tokens_cpu = self.num_accepted_tokens_cpu_tensor.numpy()
        # lora related
-        self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int32)
+        self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int64)
        self.lora_id_to_request_ids: dict[int, set[str]] = {}
        self.lora_id_to_lora_request: dict[int, LoRARequest] = {}

--- a/vllm/v1/worker/tpu_input_batch.py
+++ b/vllm/v1/worker/tpu_input_batch.py
@@ -139,7 +139,7 @@ class InputBatch:
        self.min_tokens: dict[int, tuple[int, set[int]]] = {}
        # lora related
-        self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int32)
+        self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int64)
        self.lora_id_to_request_ids: dict[int, set[str]] = {}
        self.lora_id_to_lora_request: dict[int, LoRARequest] = {}