Unverified Commit 61e8d2c6 authored by q.yao's avatar q.yao Committed by GitHub
Browse files

remove tokenizer_path from chat_example and move it to lmdeploy/turbomind (#55)

parent da62f428
import os.path as osp
import random import random
import fire import fire
...@@ -13,9 +14,10 @@ def input_prompt(): ...@@ -13,9 +14,10 @@ def input_prompt():
return '\n'.join(iter(input, sentinel)) return '\n'.join(iter(input, sentinel))
def main(model_name, model_path, tokenizer_model_path, session_id: int = 1): def main(model_name, model_path, session_id: int = 1):
tm_model = tm.TurboMind(model_path) tm_model = tm.TurboMind(model_path)
generator = tm_model.create_instance() generator = tm_model.create_instance()
tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
tokenizer = AutoTokenizer.from_pretrained(tokenizer_model_path) tokenizer = AutoTokenizer.from_pretrained(tokenizer_model_path)
model = MODELS.get(model_name)() model = MODELS.get(model_name)()
...@@ -48,8 +50,8 @@ def main(model_name, model_path, tokenizer_model_path, session_id: int = 1): ...@@ -48,8 +50,8 @@ def main(model_name, model_path, tokenizer_model_path, session_id: int = 1):
random_seed=seed if nth_round == 1 else None): random_seed=seed if nth_round == 1 else None):
res, tokens = outputs[0] res, tokens = outputs[0]
# decode res # decode res
response = tokenizer.decode(res[step:], response = tokenizer.decode(
skip_special_tokens=True) res[step:], skip_special_tokens=True)
print(f'session {session_id}, {tokens}, {response}') print(f'session {session_id}, {tokens}, {response}')
# update step # update step
step = tokens - 1 step = tokens - 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment