Fix auto-resume training from checkpoint (#9822)

* Fix auto-resume training from checkpoint * style fixes

Fix auto-resume training from checkpoint (#9822)
* Fix auto-resume training from checkpoint * style fixes
f4bf0dea · jncasey · GitHub · f2fabedb · f4bf0dea
Unverified Commit f4bf0dea authored Jan 27, 2021 by jncasey Committed by GitHub Jan 27, 2021
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 3 deletions

src/transformers/trainer_utils.py src/transformers/trainer_utils.py +7 -3

No files found.
--- a/src/transformers/trainer_utils.py
+++ b/src/transformers/trainer_utils.py
@@ -77,15 +77,19 @@ class TrainOutput(NamedTuple):


 PREFIX_CHECKPOINT_DIR = "checkpoint"
-_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d)+$")
+_re_checkpoint = re.compile(r"^" + PREFIX_CHECKPOINT_DIR + r"\-(\d+)$")


 def get_last_checkpoint(folder):
    content = os.listdir(folder)
-    checkpoints = [path for path in content if _re_checkpoint.search(path) is not None and os.path.isdir(path)]
+    checkpoints = [
+        path
+        for path in content
+        if _re_checkpoint.search(path) is not None and os.path.isdir(os.path.join(folder, path))
+    ]
    if len(checkpoints) == 0:
        return
-    return max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0]))
+    return os.path.join(folder, max(checkpoints, key=lambda x: int(_re_checkpoint.search(x).groups()[0])))


 class EvaluationStrategy(ExplicitEnum):