Unverified Commit 467bef18 authored by Rachel Guo's avatar Rachel Guo Committed by GitHub
Browse files

[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected...


[BugFix][FlashInfer] Fix attention backend interface mismatch with unexpected keyword `use_irope` (#19134)
Signed-off-by: default avatarYunqiu Guo <guorachel@meta.com>
parent 5f1ac1e1
...@@ -508,7 +508,12 @@ class FlashInferImpl(AttentionImpl): ...@@ -508,7 +508,12 @@ class FlashInferImpl(AttentionImpl):
logits_soft_cap: Optional[float] = None, logits_soft_cap: Optional[float] = None,
attn_type: AttentionType = AttentionType.DECODER, attn_type: AttentionType = AttentionType.DECODER,
kv_sharing_target_layer_name: Optional[int] = None, kv_sharing_target_layer_name: Optional[int] = None,
use_irope: bool = False,
) -> None: ) -> None:
if use_irope:
logger.warning_once(
"Using irope in FlashInfer is not supported yet, it will fall"
" back to global attention for long context.")
self.num_heads = num_heads self.num_heads = num_heads
self.head_size = head_size self.head_size = head_size
self.scale = float(scale) self.scale = float(scale)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment