Unverified Commit 0399d32c authored by Raul Puri's avatar Raul Puri Committed by GitHub
Browse files

fixed save race condition

parent 3573423f
......@@ -149,7 +149,7 @@ def save_checkpoint(model_suffix, epoch, i, model, optimizer, lr_scheduler, args
np.random.get_state(),
random.getstate())
if not (torch.distributed.is_initialized() and \
torch.distributed.get_rank() > 1):
torch.distributed.get_rank() > 0):
if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir)
total_iters = args.train_iters * (epoch-1) + i
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment