"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "1762ded30a49649bdd5f8f5ee38b46dea051026a"
Unverified Commit a3e607d1 authored by Nicolas Patry's avatar Nicolas Patry Committed by GitHub
Browse files

Supporting Merges.txt files than contain an endline. (#15782)

(`hf-internal-testing/tiny-clip` for instance)
parent 24588c67
......@@ -166,7 +166,7 @@ class CLIPTokenizer(PreTrainedTokenizer):
self.byte_encoder = bytes_to_unicode()
self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
with open(merges_file, encoding="utf-8") as merges_handle:
bpe_merges = merges_handle.read().split("\n")[1 : 49152 - 256 - 2 + 1]
bpe_merges = merges_handle.read().strip().split("\n")[1 : 49152 - 256 - 2 + 1]
bpe_merges = [tuple(merge.split()) for merge in bpe_merges]
self.bpe_ranks = dict(zip(bpe_merges, range(len(bpe_merges))))
self.cache = {"<|startoftext|>": "<|startoftext|>", "<|endoftext|>": "<|endoftext|>"}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment