Unverified Commit 29853c3e authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

less scary overflow notice (#833)


Co-authored-by: default avatarJeff Rasley <jerasley@microsoft.com>
parent dd03cff2
......@@ -153,10 +153,10 @@ class FP16_Optimizer(object):
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
self.cur_scale))
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
combined_scale = self.unscale_and_clip_grads(grads_groups_flat,
norm_groups,
......
......@@ -213,7 +213,7 @@ if __name__ == "__main__":
optimizer.step()
# Otherwise, don't do anything -- ie, skip iteration
else:
print('OVERFLOW!')
print('fp16 dynamic loss scale overflow!')
# Update loss scale for next iteration
loss_scaler.update_scale(has_overflow)
......
......@@ -139,10 +139,10 @@ class FP16_UnfusedOptimizer(object):
self._update_scale(self.overflow)
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
self.cur_scale))
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
combined_scale = self.unscale_and_clip_grads(norm_groups, apply_scale=False)
......@@ -165,10 +165,10 @@ class FP16_UnfusedOptimizer(object):
self._update_scale(self.overflow)
if self.overflow:
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
self.cur_scale))
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.cur_scale))
return self.overflow
norm_groups = []
......
......@@ -630,10 +630,10 @@ class FP16_DeepSpeedZeroOptimizer_Stage1(object):
if self.overflow:
self.zero_grad()
if self.verbose:
logger.info("[deepspeed] OVERFLOW! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(
prev_scale,
self.loss_scale))
logger.info(
"[deepspeed] fp16 dynamic loss scale overflow! Skipping step. Attempted loss "
"scale: {}, reducing to {}".format(prev_scale,
self.loss_scale))
return self.overflow
norm_groups = []
......
......@@ -1355,7 +1355,7 @@ class FP16_DeepSpeedZeroOptimizer(object):
see_memory_usage('After overflow after clearing gradients')
logger.info(
"[deepscale] OVERFLOW! Rank {} Skipping step. Attempted loss scale: {}, "
"[deepspeed] fp16 dynamic loss scale overflow! Rank {} Skipping step. Attempted loss scale: {}, "
"reducing to {}".format(dist.get_rank(),
prev_scale,
self.loss_scale))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment