Unverified Commit 675704ac authored by Madeesh Kannan's avatar Madeesh Kannan Committed by GitHub
Browse files

[Bugfix] Allow 64-bit integer values for LoRA IDs to avoid overflow/truncation (#27876)


Signed-off-by: default avatarMadeesh Kannan <shadeMe@users.noreply.github.com>
parent 0384aa71
...@@ -204,7 +204,7 @@ class InputBatch: ...@@ -204,7 +204,7 @@ class InputBatch:
self.num_accepted_tokens_cpu = self.num_accepted_tokens_cpu_tensor.numpy() self.num_accepted_tokens_cpu = self.num_accepted_tokens_cpu_tensor.numpy()
# lora related # lora related
self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int32) self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int64)
self.lora_id_to_request_ids: dict[int, set[str]] = {} self.lora_id_to_request_ids: dict[int, set[str]] = {}
self.lora_id_to_lora_request: dict[int, LoRARequest] = {} self.lora_id_to_lora_request: dict[int, LoRARequest] = {}
......
...@@ -139,7 +139,7 @@ class InputBatch: ...@@ -139,7 +139,7 @@ class InputBatch:
self.min_tokens: dict[int, tuple[int, set[int]]] = {} self.min_tokens: dict[int, tuple[int, set[int]]] = {}
# lora related # lora related
self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int32) self.request_lora_mapping = np.zeros((self.max_num_reqs,), dtype=np.int64)
self.lora_id_to_request_ids: dict[int, set[str]] = {} self.lora_id_to_request_ids: dict[int, set[str]] = {}
self.lora_id_to_lora_request: dict[int, LoRARequest] = {} self.lora_id_to_lora_request: dict[int, LoRARequest] = {}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment