updated tokenizer loading for addressing reproducibility issues

350bb6bf · Rabeeh KARIMI · 3d47a7f8 · 350bb6bf
Commit 350bb6bf authored Aug 30, 2019 by Rabeeh KARIMI
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

examples/run_glue.py examples/run_glue.py +2 -2

No files found.
--- a/examples/run_glue.py
+++ b/examples/run_glue.py
@@ -448,13 +448,14 @@ def main():

        # Load a trained model and vocabulary that you have fine-tuned
        model = model_class.from_pretrained(args.output_dir)
-        tokenizer = tokenizer_class.from_pretrained(args.output_dir)
+        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        model.to(args.device)


    # Evaluation
    results = {}
    if args.do_eval and args.local_rank in [-1, 0]:
+        tokenizer = tokenizer_class.from_pretrained(args.output_dir, do_lower_case=args.do_lower_case)
        checkpoints = [args.output_dir]
        if args.eval_all_checkpoints:
            checkpoints = list(os.path.dirname(c) for c in sorted(glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True)))
@@ -463,7 +464,6 @@ def main():
        for checkpoint in checkpoints:
            global_step = checkpoint.split('-')[-1] if len(checkpoints) > 1 else ""
            model = model_class.from_pretrained(checkpoint)
-            tokenizer = tokenizer_class.from_pretrained(checkpoint)
            model.to(args.device)
            result = evaluate(args, model, tokenizer, prefix=global_step)
            result = dict((k + '_{}'.format(global_step), v) for k, v in result.items())