Fix opt softmax small nit (#19243)

* fix opt softmax nit - Use the same logic as 1eb09537550734a783c194e416029cb9bc4cb119 for consistency * Update src/transformers/models/opt/modeling_opt.py

Fix opt softmax small nit (#19243)
* fix opt softmax nit - Use the same logic as 1eb09537550734a783c194e416029cb9bc4cb119 for consistency * Update src/transformers/models/opt/modeling_opt.py
3a27ba3d · Younes Belkada · GitHub · ba9e336f · 3a27ba3d
Unverified Commit 3a27ba3d authored Sep 29, 2022 by Younes Belkada Committed by GitHub Sep 29, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 3 deletions

src/transformers/models/opt/modeling_opt.py src/transformers/models/opt/modeling_opt.py +2 -3

No files found.
--- a/src/transformers/models/opt/modeling_opt.py
+++ b/src/transformers/models/opt/modeling_opt.py
@@ -218,11 +218,10 @@ class OPTAttention(nn.Module):
            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
            attn_weights = torch.max(attn_weights, torch.tensor(torch.finfo(attn_weights.dtype).min))
            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
-            dtype_attn_weights = attn_weights.dtype
        # upcast to fp32 if the weights are in fp16. Please see https://github.com/huggingface/transformers/pull/17437
-        if dtype_attn_weights == torch.float16:
+        if attn_weights.dtype == torch.float16:
-            attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(dtype_attn_weights)
+            attn_weights = nn.functional.softmax(attn_weights, dim=-1, dtype=torch.float32).to(torch.float16)
        else:
            attn_weights = nn.functional.softmax(attn_weights, dim=-1)