Unverified Commit 6e65c2cc authored by Olatunji Ruwase's avatar Olatunji Ruwase Committed by GitHub
Browse files

Deprecate client ability to disable gradient reduction (#552)


Co-authored-by: default avatarJeff Rasley <jerasley@microsoft.com>
parent 1ef5cd23
...@@ -878,6 +878,11 @@ class DeepSpeedEngine(Module): ...@@ -878,6 +878,11 @@ class DeepSpeedEngine(Module):
allreduce_gradients: If this is False, then gradient averaging will be skipped. Default is True. allreduce_gradients: If this is False, then gradient averaging will be skipped. Default is True.
""" """
if not allreduce_gradients:
logger.warning(
f'Argument `allreduce_gradients` is deprecated, ignored, and will soon be removed'
)
# scale loss w.r.t. gradient accumulation if needed # scale loss w.r.t. gradient accumulation if needed
if self.gradient_accumulation_steps() > 1: if self.gradient_accumulation_steps() > 1:
loss = self._scale_loss(loss.float()) loss = self._scale_loss(loss.float())
...@@ -931,7 +936,7 @@ class DeepSpeedEngine(Module): ...@@ -931,7 +936,7 @@ class DeepSpeedEngine(Module):
self.timers('backward_allreduce_microstep').start() self.timers('backward_allreduce_microstep').start()
self.timers('backward_allreduce').start() self.timers('backward_allreduce').start()
if allreduce_gradients and self.enable_backward_allreduce: if self.enable_backward_allreduce:
self.allreduce_gradients() self.allreduce_gradients()
if self.wall_clock_breakdown(): if self.wall_clock_breakdown():
......
...@@ -955,6 +955,12 @@ class FP16_DeepSpeedZeroOptimizer(object): ...@@ -955,6 +955,12 @@ class FP16_DeepSpeedZeroOptimizer(object):
with torch.cuda.stream(stream): with torch.cuda.stream(stream):
for _, param, param_id in self.params_in_ipg_bucket: for _, param, param_id in self.params_in_ipg_bucket:
assert self.params_already_reduced[param_id] == False, \
f"The parameter {param_id} has already been reduced. \
Gradient computed twice for this partition. \
Multiple gradient reduction is currently not supported"
self.params_already_reduced[param_id] = True self.params_already_reduced[param_id] = True
if not self.is_param_in_current_partition[param_id]: if not self.is_param_in_current_partition[param_id]:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment