Unverified Commit 2598f990 authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #343 from EleutherAI/fix-tokenizer-init

Manually concat tokenizer revision with subfolder
parents b0b76d87 3dc27bd2
...@@ -34,17 +34,17 @@ class HFLM(BaseLM): ...@@ -34,17 +34,17 @@ class HFLM(BaseLM):
) )
# TODO: update this to be less of a hack once subfolder is fixed in HF # TODO: update this to be less of a hack once subfolder is fixed in HF
revision = revision + ("/" + subfolder if subfolder is not None else "")
self.gpt2 = transformers.AutoModelForCausalLM.from_pretrained( self.gpt2 = transformers.AutoModelForCausalLM.from_pretrained(
pretrained, pretrained,
revision=revision + ("/" + subfolder if subfolder is not None else ""), revision=revision,
).to(self.device) ).to(self.device)
self.gpt2.eval() self.gpt2.eval()
# pretrained tokenizer for neo is broken for now so just hard-coding this to gpt2
self.tokenizer = transformers.AutoTokenizer.from_pretrained( self.tokenizer = transformers.AutoTokenizer.from_pretrained(
pretrained if tokenizer is None else tokenizer, pretrained if tokenizer is None else tokenizer,
revision=revision, revision=revision,
subfolder=subfolder,
) )
assert isinstance( assert isinstance(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment