Unverified Commit 29853c3e authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

less scary overflow notice (#833)


Co-authored-by: default avatarJeff Rasley <jerasley@microsoft.com>
parent dd03cff2
......@@ -153,9 +153,9 @@ class FP16_Optimizer(object):
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
combined_scale = self.unscale_and_clip_grads(grads_groups_flat,
......
......@@ -213,7 +213,7 @@ if __name__ == "__main__":
optimizer.step()
# Otherwise, don't do anything -- ie, skip iteration
else:
print('OVERFLOW!')
print('fp16 dynamic loss scale overflow!')
# Update loss scale for next iteration
loss_scaler.update_scale(has_overflow)
......
......@@ -139,9 +139,9 @@ class FP16_UnfusedOptimizer(object):
self._update_scale(self.overflow)
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
......@@ -165,9 +165,9 @@ class FP16_UnfusedOptimizer(object):
self._update_scale(self.overflow)
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
......
......@@ -630,9 +630,9 @@ class FP16_DeepSpeedZeroOptimizer_Stage1(object):
if self.overflow:
self.zero_grad()
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.loss_scale))
return self.overflow
......
......@@ -1355,7 +1355,7 @@ class FP16_DeepSpeedZeroOptimizer(object):
see_memory_usage('After overflow after clearing gradients')
logger.info(
"[deepscale] OVERFLOW! Rank {} Skipping step. Attempted loss scale: {}, "
"[deepspeed] fp16 dynamic loss scale overflow! Rank {} Skipping step. Attempted loss scale: {}, "
"reducing to {}".format(dist.get_rank(),
prev_scale,
self.loss_scale))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment