fixed load_checkpoint().

74ba3797 · Lawrence McAfee · b178e6fc · 74ba3797
Commit 74ba3797 authored Jun 08, 2022 by Lawrence McAfee
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

megatron/checkpointing.py megatron/checkpointing.py +4 -1

No files found.
--- a/megatron/checkpointing.py
+++ b/megatron/checkpointing.py
@@ -367,7 +367,10 @@ def load_checkpoint(model, optimizer, opt_param_scheduler, load_arg='load', stri
    # Load the checkpoint.
    try:
        model_state_dict = torch.load(model_checkpoint_name, map_location='cpu')
-        optim_state_dict = torch.load(optim_checkpoint_name, map_location='cpu')
+        if args.use_distributed_optimizer:
+            optim_state_dict = torch.load(optim_checkpoint_name, map_location='cpu')
+        else:
+            optim_state_dict = model_state_dict
    except ModuleNotFoundError:
        from megatron.fp16_deprecated import loss_scaler
        # For backward compatibility.