Merge pull request #1154 from ziliwang/master

fix: hard coding for max number

Merge pull request #1154 from ziliwang/master
fix: hard coding for max number
206c35e9 · Thomas Wolf · GitHub · f3d18c71 · 6060b2f8 · 206c35e9
Unverified Commit 206c35e9 authored Aug 30, 2019 by Thomas Wolf Committed by GitHub Aug 30, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

pytorch_transformers/modeling_xlnet.py pytorch_transformers/modeling_xlnet.py +4 -1

No files found.
--- a/pytorch_transformers/modeling_xlnet.py
+++ b/pytorch_transformers/modeling_xlnet.py
@@ -418,7 +418,10 @@ class XLNetRelativeAttention(nn.Module):
        attn_score = (ac + bd + ef) * self.scale
        if attn_mask is not None:
            # attn_score = attn_score * (1 - attn_mask) - 1e30 * attn_mask
-            attn_score = attn_score - 1e30 * attn_mask
+            if attn_mask.dtype == torch.float16:
+                attn_score = attn_score - 65500 * attn_mask
+            else:
+                attn_score = attn_score - 1e30 * attn_mask

        # attention probability
        attn_prob = F.softmax(attn_score, dim=1)