self._contrib_tensor_list.append((master_param_fragment,opti_state_m_fragment,opti_state_v_fragment,opti_state_u_fragment,opti_state_g_fragment,opti_state_p_fragment))# p, m, v, u, g, p_copy
self._contrib_tensor_list.append((master_param_fragment,opti_state_m_fragment,opti_state_v_fragment,opti_state_u_fragment,opti_state_g_fragment,opti_state_p_fragment))# p, m, v, u, g, p_copy
...
@@ -322,7 +425,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):
...
@@ -322,7 +425,7 @@ class DistributedFusedLAMB(torch.optim.Optimizer):