Fix load of model checkpoints in the Trainer (#18470)

df28de05 · Sylvain Gugger · GitHub · 330247ed · df28de05
Unverified Commit df28de05 authored Aug 04, 2022 by Sylvain Gugger Committed by GitHub Aug 04, 2022
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

src/transformers/trainer.py src/transformers/trainer.py +2 -2

No files found.
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -1935,7 +1935,7 @@ class Trainer:
            else:
                # We load the model state dict on the CPU to avoid an OOM error.
                state_dict = torch.load(os.path.join(resume_from_checkpoint, WEIGHTS_NAME), map_location="cpu")
-                load_result = model.load_state_dict(state_dict)
+                load_result = model.load_state_dict(state_dict, strict=False)
                # release memory
                del state_dict
                self._issue_warnings_after_load(load_result)
@@ -1989,7 +1989,7 @@ class Trainer:
                    # We load the model state dict on the CPU to avoid an OOM error.
                    state_dict = torch.load(best_model_path, map_location="cpu")
                    # If the model is on the GPU, it still works!
-                    load_result = model.load_state_dict(state_dict)
+                    load_result = model.load_state_dict(state_dict, strict=False)
                if not is_sagemaker_mp_enabled():
                    self._issue_warnings_after_load(load_result)
        elif os.path.exists(os.path.join(self.state.best_model_checkpoint, WEIGHTS_INDEX_NAME)):