"doc/git@developer.sourcefind.cn:ox696c/ktransformers.git" did not exist on "72e8e16fa419404954248d62bf446bbfd60b7f98"
Unverified Commit 0399d32c authored by Raul Puri's avatar Raul Puri Committed by GitHub
Browse files

fixed save race condition

parent 3573423f
...@@ -149,7 +149,7 @@ def save_checkpoint(model_suffix, epoch, i, model, optimizer, lr_scheduler, args ...@@ -149,7 +149,7 @@ def save_checkpoint(model_suffix, epoch, i, model, optimizer, lr_scheduler, args
np.random.get_state(), np.random.get_state(),
random.getstate()) random.getstate())
if not (torch.distributed.is_initialized() and \ if not (torch.distributed.is_initialized() and \
torch.distributed.get_rank() > 1): torch.distributed.get_rank() > 0):
if not os.path.exists(checkpoint_dir): if not os.path.exists(checkpoint_dir):
os.makedirs(checkpoint_dir) os.makedirs(checkpoint_dir)
total_iters = args.train_iters * (epoch-1) + i total_iters = args.train_iters * (epoch-1) + i
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment