"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "5ea2595ecde9a89079d4d289bc65dc5f8dc02ef6"
Unverified Commit 5d390e9e authored by Anton Lozhkov's avatar Anton Lozhkov Committed by GitHub
Browse files

Fix nan-loss condition (#13911)

parent 8f2c07d3
...@@ -1315,10 +1315,13 @@ class Trainer: ...@@ -1315,10 +1315,13 @@ class Trainer:
else: else:
tr_loss_step = self.training_step(model, inputs) tr_loss_step = self.training_step(model, inputs)
if args.logging_nan_inf_filter and not is_torch_tpu_available(): if (
if torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step): args.logging_nan_inf_filter
# if loss is nan or inf simply add the average of previous logged losses and not is_torch_tpu_available()
tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged) and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
):
# if loss is nan or inf simply add the average of previous logged losses
tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
else: else:
tr_loss += tr_loss_step tr_loss += tr_loss_step
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment