Commit 604d423b authored by Thor Johnsen's avatar Thor Johnsen
Browse files

Broadcast parameters from rank 0 just to be safe

parent bc98827b
...@@ -89,6 +89,7 @@ class DistributedFusedAdam(torch.optim.Optimizer): ...@@ -89,6 +89,7 @@ class DistributedFusedAdam(torch.optim.Optimizer):
self._grads_info = [] self._grads_info = []
for group in self.param_groups: for group in self.param_groups:
for p in group['params']: for p in group['params']:
torch.distributed.broadcast(p,0)
if not p.requires_grad: if not p.requires_grad:
continue continue
p_grads_size = p.numel() p_grads_size = p.numel()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment