Unverified Commit 206c35e9 authored by Thomas Wolf's avatar Thomas Wolf Committed by GitHub
Browse files

Merge pull request #1154 from ziliwang/master

fix: hard coding for max number
parents f3d18c71 6060b2f8
...@@ -418,6 +418,9 @@ class XLNetRelativeAttention(nn.Module): ...@@ -418,6 +418,9 @@ class XLNetRelativeAttention(nn.Module):
attn_score = (ac + bd + ef) * self.scale attn_score = (ac + bd + ef) * self.scale
if attn_mask is not None: if attn_mask is not None:
# attn_score = attn_score * (1 - attn_mask) - 1e30 * attn_mask # attn_score = attn_score * (1 - attn_mask) - 1e30 * attn_mask
if attn_mask.dtype == torch.float16:
attn_score = attn_score - 65500 * attn_mask
else:
attn_score = attn_score - 1e30 * attn_mask attn_score = attn_score - 1e30 * attn_mask
# attention probability # attention probability
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment