Unverified Commit d69ff0cb authored by Tao He's avatar Tao He Committed by GitHub
Browse files

Fixes assertion failure in prefix caching: the lora index mapping should respect prefix_len (#2688)


Signed-off-by: default avatarTao He <sighingnow@gmail.com>
parent 1af090b5
...@@ -142,10 +142,10 @@ class ModelRunner: ...@@ -142,10 +142,10 @@ class ModelRunner:
if lora_id > 0: if lora_id > 0:
lora_requests.add(seq_group_metadata.lora_request) lora_requests.add(seq_group_metadata.lora_request)
lora_index_mapping.append([lora_id] * prompt_len) lora_index_mapping.append([lora_id] * (prompt_len - prefix_len))
lora_prompt_mapping.extend( lora_prompt_mapping.extend(
[lora_id] * [lora_id] *
(prompt_len (prompt_len - prefix_len
if seq_group_metadata.sampling_params.prompt_logprobs else 1)) if seq_group_metadata.sampling_params.prompt_logprobs else 1))
if seq_group_metadata.block_tables is None: if seq_group_metadata.block_tables is None:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment