Merge branch 'master' into move_vocab_padding_to_utils

7e46ca58 · Mohammad Shoeybi · 2e6d5ed9 · 6b68bb8a · 7e46ca58
Commit 7e46ca58 authored Oct 09, 2019 by Mohammad Shoeybi
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 1 deletion

megatron/utils.py megatron/utils.py +13 -1

No files found.
--- a/megatron/utils.py
+++ b/megatron/utils.py
@@ -351,7 +351,19 @@ def load_checkpoint(model, optimizer, lr_scheduler, args):
            torch.distributed.get_rank(), checkpoint_name))

    # Load the checkpoint.
+    try:
+        sd = torch.load(checkpoint_name, map_location='cpu')
+    except ModuleNotFoundError:
+        # For backward compatibility.
+        print_rank_0(' > deserializing using the old code structure ...')
+        import sys
+        sys.modules['fp16.loss_scaler'] = sys.modules[
+            'megatron.fp16.loss_scaler']
        sd = torch.load(checkpoint_name, map_location='cpu')
+        sys.modules.pop('fp16.loss_scaler', None)
+    except:
+        print_rank_0('could not load the checkpoint')
+        exit()

    # Iterations.
    if args.finetune or release: