"git@developer.sourcefind.cn:chenpangpang/Real-ESRGAN.git" did not exist on "601b568fd65a6784f7574e344f2347caf2b8559d"
Unverified Commit f4c9a7e6 authored by Sylvain Gugger's avatar Sylvain Gugger Committed by GitHub
Browse files

Accumulate opt state dict on do_rank 0 (#11481)

parent 1e8e0686
...@@ -1420,6 +1420,7 @@ class Trainer: ...@@ -1420,6 +1420,7 @@ class Trainer:
xm.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt")) xm.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
reissue_pt_warnings(caught_warnings) reissue_pt_warnings(caught_warnings)
elif is_sagemaker_mp_enabled(): elif is_sagemaker_mp_enabled():
if smp.dp_rank() == 0:
# Consolidate the state dict on all processed of dp_rank 0 # Consolidate the state dict on all processed of dp_rank 0
opt_state_dict = self.optimizer.state_dict() opt_state_dict = self.optimizer.state_dict()
# Save it and the scheduler on the main process # Save it and the scheduler on the main process
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment