update unified_attention_with_output_fake

9d16d5aa · zhuwenwen · 9dd70f0e · 9d16d5aa
Commit 9d16d5aa authored Jan 16, 2026 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 0 deletions

vllm/attention/layer.py vllm/attention/layer.py +2 -0

No files found.
--- a/vllm/attention/layer.py
+++ b/vllm/attention/layer.py
@@ -566,6 +566,8 @@ def unified_attention_with_output_fake(
    output: torch.Tensor,
    layer_name: str,
    output_scale: Optional[torch.Tensor] = None,
+    query_nope: Optional[torch.Tensor] = None,
+    num_local_heads: Optional[int] = None,
    q_ori: Optional[torch.Tensor] = None,
    key_normed: Optional[torch.Tensor] = None,
    positions: Optional[torch.Tensor] = None,