Save tokenizer after each epoch to be able to resume training from a checkpoint

a03fcf57 · Bilal Khan · Lysandre Debut · f71b1bb0 · a03fcf57
Commit a03fcf57 authored Nov 27, 2019 by Bilal Khan Committed by Lysandre Debut Dec 09, 2019
Show whitespace changes
Inline Side-by-side

Showing with 3 additions and 0 deletions

examples/run_lm_finetuning.py examples/run_lm_finetuning.py +3 -0

No files found.
--- a/examples/run_lm_finetuning.py
+++ b/examples/run_lm_finetuning.py
@@ -274,6 +274,8 @@ def train(args, train_dataset, model, tokenizer):
                        os.makedirs(output_dir)
                    model_to_save = model.module if hasattr(model, 'module') else model  # Take care of distributed/parallel training
                    model_to_save.save_pretrained(output_dir)
+                    tokenizer.save_pretrained(output_dir)
+
                    torch.save(args, os.path.join(output_dir, 'training_args.bin'))
                    logger.info("Saving model checkpoint to %s", output_dir)

@@ -282,6 +284,7 @@ def train(args, train_dataset, model, tokenizer):
                    torch.save(optimizer.state_dict(), os.path.join(output_dir, 'optimizer.pt'))
                    torch.save(scheduler.state_dict(), os.path.join(output_dir, 'scheduler.pt'))
                    torch.save(epoch, os.path.join(output_dir, 'training_state.pt'))
+                    logger.info("Saving training state to %s", output_dir)

            if args.max_steps > 0 and global_step > args.max_steps:
                epoch_iterator.close()