"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "74843695ebb6a36df332eac5ebbdd3b6a79030a9"
Commit ac27548b authored by thomwolf's avatar thomwolf
Browse files

fix unk_token test

parent c717d385
...@@ -104,7 +104,7 @@ class GPT2Tokenizer(PreTrainedTokenizer): ...@@ -104,7 +104,7 @@ class GPT2Tokenizer(PreTrainedTokenizer):
def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>", def __init__(self, vocab_file, merges_file, errors='replace', unk_token="<|endoftext|>",
bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs): bos_token="<|endoftext|>", eos_token="<|endoftext|>", **kwargs):
super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, **kwargs) super(GPT2Tokenizer, self).__init__(bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs)
self.encoder = json.load(open(vocab_file)) self.encoder = json.load(open(vocab_file))
self.decoder = {v:k for k,v in self.encoder.items()} self.decoder = {v:k for k,v in self.encoder.items()}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment