Unverified Commit 5d390e9e authored by Anton Lozhkov's avatar Anton Lozhkov Committed by GitHub
Browse files

Fix nan-loss condition (#13911)

parent 8f2c07d3
......@@ -1315,8 +1315,11 @@ class Trainer:
else:
tr_loss_step = self.training_step(model, inputs)
if args.logging_nan_inf_filter and not is_torch_tpu_available():
if torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step):
if (
args.logging_nan_inf_filter
and not is_torch_tpu_available()
and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
):
# if loss is nan or inf simply add the average of previous logged losses
tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment