Fix error with global step in run_squad.py

f24232cd · Lysandre Debut · 1b59b57b · f24232cd
Commit f24232cd authored Jan 08, 2020 by Lysandre Debut
Show whitespace changes
Inline Side-by-side

Showing with 13 additions and 9 deletions

examples/run_squad.py examples/run_squad.py +13 -9

No files found.
--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -170,8 +170,10 @@ def train(args, train_dataset, model, tokenizer):
    steps_trained_in_current_epoch = 0
    # Check if continuing training from a checkpoint
    if os.path.exists(args.model_name_or_path):
+        try:
            # set global_step to gobal_step of last saved checkpoint from model path
-        global_step = int(args.model_name_or_path.split("-")[-1].split("/")[0])
+            checkpoint_suffix = args.model_name_or_path.split("-")[-1].split("/")[0]
+            global_step = int(checkpoint_suffix)
            epochs_trained = global_step // (len(train_dataloader) // args.gradient_accumulation_steps)
            steps_trained_in_current_epoch = global_step % (len(train_dataloader) // args.gradient_accumulation_steps)

@@ -179,6 +181,8 @@ def train(args, train_dataset, model, tokenizer):
            logger.info("  Continuing training from epoch %d", epochs_trained)
            logger.info("  Continuing training from global step %d", global_step)
            logger.info("  Will skip the first %d steps in the first epoch", steps_trained_in_current_epoch)
+        except ValueError:
+            logger.info("  Starting fine-tuning.")

    tr_loss, logging_loss = 0.0, 0.0
    model.zero_grad()