Unverified Commit 9febf610 authored by Chen Xin's avatar Chen Xin Committed by GitHub
Browse files

fix tokenizer_info when convert the model (#661)

parent 529e56bd
...@@ -5,7 +5,6 @@ import os.path as osp ...@@ -5,7 +5,6 @@ import os.path as osp
import torch import torch
from safetensors.torch import load_file from safetensors.torch import load_file
from sentencepiece import SentencePieceProcessor
from lmdeploy.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
...@@ -168,18 +167,11 @@ class LlamaModel(BaseInputModel): ...@@ -168,18 +167,11 @@ class LlamaModel(BaseInputModel):
def tokenizer_info(self): def tokenizer_info(self):
"""Read tokenizer info.""" """Read tokenizer info."""
assert osp.isfile(self.tokenizer_path), self.tokenizer_path assert osp.isdir(self.model_path), self.model_path
try: tk_model = Tokenizer(self.model_path)
tk_model = SentencePieceProcessor(model_file=self.tokenizer_path) n_words = tk_model.vocab_size
# BOS / EOS token IDs bos_id = tk_model.bos_token_id
n_words = tk_model.vocab_size eos_id = tk_model.eos_token_id
bos_id = tk_model.bos_token_id
eos_id = tk_model.eos_token_id
except Exception:
tk_model = Tokenizer(self.model_path)
n_words = tk_model.vocab_size
bos_id = tk_model.bos_token_id
eos_id = tk_model.eos_token_id
return n_words, bos_id, eos_id return n_words, bos_id, eos_id
def model_info(self): def model_info(self):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment