Unverified Commit b91cff5a authored by Jintao's avatar Jintao Committed by GitHub
Browse files

fix resume_from_checkpoint bug (#26739)

* fix resume_from_checkpoint bug

* update code
parent a5f5568d
...@@ -2862,7 +2862,10 @@ class Trainer: ...@@ -2862,7 +2862,10 @@ class Trainer:
checkpoints_sorted = sorted(ordering_and_checkpoint_path) checkpoints_sorted = sorted(ordering_and_checkpoint_path)
checkpoints_sorted = [checkpoint[1] for checkpoint in checkpoints_sorted] checkpoints_sorted = [checkpoint[1] for checkpoint in checkpoints_sorted]
# Make sure we don't delete the best model. # Make sure we don't delete the best model.
if self.state.best_model_checkpoint is not None: if (
self.state.best_model_checkpoint is not None
and str(Path(self.state.best_model_checkpoint)) in checkpoints_sorted
):
best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint))) best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint)))
for i in range(best_model_index, len(checkpoints_sorted) - 2): for i in range(best_model_index, len(checkpoints_sorted) - 2):
checkpoints_sorted[i], checkpoints_sorted[i + 1] = checkpoints_sorted[i + 1], checkpoints_sorted[i] checkpoints_sorted[i], checkpoints_sorted[i + 1] = checkpoints_sorted[i + 1], checkpoints_sorted[i]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment