Commit 85497632 authored by Thor Johnsen's avatar Thor Johnsen
Browse files

Bug fix

parent cef660ba
...@@ -165,8 +165,7 @@ class DistributedFusedAdam(torch.optim.Optimizer): ...@@ -165,8 +165,7 @@ class DistributedFusedAdam(torch.optim.Optimizer):
if torch.distributed.get_rank() in ranks: if torch.distributed.get_rank() in ranks:
self._rs_pg.append(grp) self._rs_pg.append(grp)
if self._compute_L2_grad_norm and torch.distributed.get_rank() in ranks: if self._compute_L2_grad_norm and torch.distributed.get_rank() in ranks:
#self._l2_grad_norm_pg = torch.distributed.new_group(ranks=ranks) self._l2_grad_norm_pg = torch.distributed.new_group(ranks=ranks)
self._l2_grad_norm_pg = self._rs_pg[-1]
self._rs_st = [torch.cuda.Stream() for _ in range(self._num_rs_pg)] self._rs_st = [torch.cuda.Stream() for _ in range(self._num_rs_pg)]
if self._num_ag_pg == 0: if self._num_ag_pg == 0:
self._ag_pg = self._rs_pg self._ag_pg = self._rs_pg
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment