"vscode:/vscode.git/clone" did not exist on "b1ec112eeb0e0633e57d89a60f6f80322cff0028"
Unverified Commit e1bea67f authored by Jeff Rasley's avatar Jeff Rasley Committed by GitHub
Browse files

attach empty grad to its param to ensure it's copied after reduction (#316)

parent 6855ba1c
......@@ -1013,10 +1013,10 @@ class DeepSpeedLight(Module):
# rank is reducing the same size. In some cases it may make
# sense in the future to support the ability to average not
# w.r.t. world size but with a different value.
grads.append(
torch.zeros(param.size(),
param.grad = torch.zeros(param.size(),
dtype=param.dtype,
device=param.device))
device=param.device)
grads.append(param.grad.data)
else:
grad_data = param.grad.data
if self.sparse_gradients_enabled(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment