Unverified Commit 9b61dd41 authored by Yang Liu's avatar Yang Liu Committed by GitHub
Browse files

[Bugfix] Initialize attention bias on the same device as Query/Key/Value for QwenVL Series (#14031)

parent f7bee5c8
...@@ -323,7 +323,8 @@ class Qwen2_5_VisionAttention(nn.Module): ...@@ -323,7 +323,8 @@ class Qwen2_5_VisionAttention(nn.Module):
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens, attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
kv_seqlen=None) kv_seqlen=None,
device=q.device)
context_layer = xops.memory_efficient_attention_forward( context_layer = xops.memory_efficient_attention_forward(
q, k, v, attn_bias=attn_bias, p=0, scale=None) q, k, v, attn_bias=attn_bias, p=0, scale=None)
......
...@@ -367,7 +367,8 @@ class Qwen2VisionAttention(nn.Module): ...@@ -367,7 +367,8 @@ class Qwen2VisionAttention(nn.Module):
seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist()
attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens, attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens,
kv_seqlen=None) kv_seqlen=None,
device=q.device)
context_layer = xops.memory_efficient_attention_forward( context_layer = xops.memory_efficient_attention_forward(
q, k, v, attn_bias=attn_bias, p=0, scale=None) q, k, v, attn_bias=attn_bias, p=0, scale=None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment