Commit 7da28fc3 authored by Thor Johnsen's avatar Thor Johnsen
Browse files

Bug fix

parent 9c82241d
......@@ -129,6 +129,7 @@ class DistributedFusedAdam(torch.optim.Optimizer):
# (weight_ih, weight_hh, bias_ih, bias_hh)
if prev is not None and (prev.data_ptr() + prev.numel() * prev.element_size() != p.data_ptr()):
p_offset = ((p_offset + 63) // 64) * 64
prev = p
p_i += 1
self._grads_generated = [False]*len(self._grads_info)
self._flat_mt = flat_mt
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment