update run_openai_gpt to fix #1264

e768f232 · thomwolf · 83349939 · e768f232
Commit e768f232 authored Sep 18, 2019 by thomwolf
Hide whitespace changes
Inline Side-by-side

Showing with 5 additions and 3 deletions

examples/contrib/run_openai_gpt.py examples/contrib/run_openai_gpt.py +5 -3

No files found.
--- a/examples/contrib/run_openai_gpt.py
+++ b/examples/contrib/run_openai_gpt.py
@@ -153,9 +153,11 @@ def main():
    # This loading functions also add new tokens and embeddings called `special tokens`
    # These new embeddings will be fine-tuned on the RocStories dataset
    special_tokens = ['_start_', '_delimiter_', '_classify_']
-    tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name, special_tokens=special_tokens)
-    special_tokens_ids = list(tokenizer.convert_tokens_to_ids(token) for token in special_tokens)
-    model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name, num_special_tokens=len(special_tokens))
+    tokenizer = OpenAIGPTTokenizer.from_pretrained(args.model_name)
+    tokenizer.add_tokens(special_tokens)
+    special_tokens_ids = tokenizer.convert_tokens_to_ids(special_tokens)
+    model = OpenAIGPTDoubleHeadsModel.from_pretrained(args.model_name)
+    model.resize_token_embeddings(len(tokenizer))
    model.to(device)

    # Load and encode the datasets