fix nan in full-fp16 label_smoothing eval (#10815)

e21f89f6 · Stas Bekman · GitHub · b5b957a6 · e21f89f6
Unverified Commit e21f89f6 authored Mar 22, 2021 by Stas Bekman Committed by GitHub Mar 22, 2021
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

src/transformers/trainer_pt_utils.py src/transformers/trainer_pt_utils.py +2 -1

No files found.
--- a/src/transformers/trainer_pt_utils.py
+++ b/src/transformers/trainer_pt_utils.py
@@ -433,7 +433,8 @@ class LabelSmoother:
        # will ignore them in any case.
        labels.clamp_min_(0)
        nll_loss = log_probs.gather(dim=-1, index=labels)
-        smoothed_loss = log_probs.sum(dim=-1, keepdim=True)
+        # works for fp16 input tensor too, by internally upcasting it to fp32
+        smoothed_loss = log_probs.sum(dim=-1, keepdim=True, dtype=torch.float32)

        nll_loss.masked_fill_(padding_mask, 0.0)
        smoothed_loss.masked_fill_(padding_mask, 0.0)