Unverified Commit 9febf610 authored by Chen Xin's avatar Chen Xin Committed by GitHub
Browse files

fix tokenizer_info when convert the model (#661)

parent 529e56bd
......@@ -5,7 +5,6 @@ import os.path as osp
import torch
from safetensors.torch import load_file
from sentencepiece import SentencePieceProcessor
from lmdeploy.tokenizer import Tokenizer
......@@ -168,14 +167,7 @@ class LlamaModel(BaseInputModel):
def tokenizer_info(self):
"""Read tokenizer info."""
assert osp.isfile(self.tokenizer_path), self.tokenizer_path
try:
tk_model = SentencePieceProcessor(model_file=self.tokenizer_path)
# BOS / EOS token IDs
n_words = tk_model.vocab_size
bos_id = tk_model.bos_token_id
eos_id = tk_model.eos_token_id
except Exception:
assert osp.isdir(self.model_path), self.model_path
tk_model = Tokenizer(self.model_path)
n_words = tk_model.vocab_size
bos_id = tk_model.bos_token_id
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment