Fix missed head transpose

9ffda216 · Simon Layton · d51b5894 · 9ffda216
Commit 9ffda216 authored Oct 03, 2019 by Simon Layton
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

transformers/modeling_xlnet.py transformers/modeling_xlnet.py +1 -1

No files found.
--- a/transformers/modeling_xlnet.py
+++ b/transformers/modeling_xlnet.py
@@ -284,7 +284,7 @@ class XLNetRelativeAttention(nn.Module):
        # Mask heads if we want to
        if head_mask is not None:
-            attn_prob = attn_prob * head_mask
+            attn_prob = attn_prob * torch.einsum('ijbn->bnij', head_mask)
        # attention output
        attn_vec = torch.einsum('bnij,jbnd->ibnd', attn_prob, v_head_h)