Fix #4098

274d850d · Julien Chaumond · 26dad0a9 · 274d850d
Commit 274d850d authored May 08, 2020 by Julien Chaumond
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

src/transformers/modeling_gpt2.py src/transformers/modeling_gpt2.py +1 -1

No files found.
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -145,7 +145,7 @@ class Attention(nn.Module):
            w = w / (v.size(-1) ** 0.5)
        nd, ns = w.size(-2), w.size(-1)
        mask = self.bias[:, :, ns - nd : ns, :ns]
-        w = torch.where(mask, w, self.masked_bias)
+        w = torch.where(mask, w, self.masked_bias.to(w.dtype))

        if attention_mask is not None:
            # Apply the attention mask