Fix #3954 - GPT2 is not traceable (#3955)

* Update sqrt computation so it can survive a torch.jit.trace * Update modeling_gpt2.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>

Fix #3954 - GPT2 is not traceable (#3955)
* Update sqrt computation so it can survive a torch.jit.trace * Update modeling_gpt2.py Co-authored-by: Patrick von Platen <patrick.v.platen@gmail.com>
c7d06b79 · jazzcook15 · GitHub · 9a0a8c1c · c7d06b79
Unverified Commit c7d06b79 authored Apr 28, 2020 by jazzcook15 Committed by GitHub Apr 28, 2020
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 2 deletions

src/transformers/modeling_gpt2.py src/transformers/modeling_gpt2.py +1 -2

No files found.
--- a/src/transformers/modeling_gpt2.py
+++ b/src/transformers/modeling_gpt2.py
@@ -17,7 +17,6 @@


 import logging
-import math
 import os

 import torch
@@ -143,7 +142,7 @@ class Attention(nn.Module):
    def _attn(self, q, k, v, attention_mask=None, head_mask=None):
        w = torch.matmul(q, k)
        if self.scale:
-            w = w / math.sqrt(v.size(-1))
+            w = w / (v.size(-1) ** 0.5)
        nd, ns = w.size(-2), w.size(-1)
        mask = self.bias[:, :, ns - nd : ns, :ns]
        w = torch.where(mask, w, self.masked_bias)