Commit 49ee4db2 authored by Leo Gao's avatar Leo Gao
Browse files

Handle GPTNeoConfig not having n_ctx

parent 44d7b2fa
...@@ -20,7 +20,11 @@ class GPT2LM(LM): ...@@ -20,7 +20,11 @@ class GPT2LM(LM):
# pretrained tokenizer for neo is broken for now so just hardcoding this to gpt2 # pretrained tokenizer for neo is broken for now so just hardcoding this to gpt2
self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained('gpt2') self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained('gpt2')
self.tokenizer.pad_token = "<|endoftext|>" self.tokenizer.pad_token = "<|endoftext|>"
try:
self.max_length = self.gpt2.config.n_ctx self.max_length = self.gpt2.config.n_ctx
except AttributeError:
# gptneoconfig doesn't have n_ctx apparantly
self.max_length = self.gpt2.config.max_position_embeddings
assert self.tokenizer.encode('hello\n\nhello') == [31373, 198, 198, 31373] assert self.tokenizer.encode('hello\n\nhello') == [31373, 198, 198, 31373]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment