Unverified Commit 915cce39 authored by Marc Sun's avatar Marc Sun Committed by GitHub
Browse files

Fix llama gguf converter (#31575)

parent b07770c5
...@@ -632,7 +632,27 @@ class GGUFLlamaConverter(LlamaConverter): ...@@ -632,7 +632,27 @@ class GGUFLlamaConverter(LlamaConverter):
return decoders.Sequence(sequence) return decoders.Sequence(sequence)
def converted(self): def converted(self):
tokenizer = super().converted() # Copied partly from converted method in SpmConverter class
tokenizer = self.tokenizer(self.proto)
# Tokenizer assemble
normalizer = self.normalizer(self.proto)
if normalizer is not None:
tokenizer.normalizer = normalizer
replacement = "▁"
add_prefix_space = True
if hasattr(self.original_tokenizer, "add_prefix_space"):
add_prefix_space = self.original_tokenizer.add_prefix_space
pre_tokenizer = self.pre_tokenizer(replacement, add_prefix_space)
if pre_tokenizer is not None:
tokenizer.pre_tokenizer = pre_tokenizer
tokenizer.decoder = self.decoder(replacement, add_prefix_space)
post_processor = self.post_processor()
if post_processor:
tokenizer.post_processor = post_processor
# HACK: patch the llama-3 tokenizer to use the correspinding pre-tokenizer # HACK: patch the llama-3 tokenizer to use the correspinding pre-tokenizer
# and normalizer # and normalizer
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment