"docs/en/git@developer.sourcefind.cn:OpenDAS/mmcv.git" did not exist on "9a5685a9958f9e3b85b24b3621be3e339febd494"
Unverified Commit c261b49d authored by q.yao's avatar q.yao Committed by GitHub
Browse files

Move `tokenizer.py` to the folder of lmdeploy (#543)

* move tokenizer

* remove Tokenizer in init

* update deploy.py
parent f4422fab
......@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion)
from tqdm import tqdm
from lmdeploy.turbomind import Tokenizer, TurboMind
from lmdeploy.tokenizer import Tokenizer
from lmdeploy.turbomind import TurboMind
def infer(model, session_id: int, input_ids: str, output_seqlen: int,
......
......@@ -8,7 +8,7 @@ import fire
import numpy as np
import requests
from lmdeploy.turbomind.tokenizer import Tokenizer
from lmdeploy.tokenizer import Tokenizer
from lmdeploy.utils import get_logger
......
......@@ -8,7 +8,7 @@ import fire
import numpy as np
from lmdeploy.serve.turbomind.chatbot import Chatbot
from lmdeploy.turbomind.tokenizer import Tokenizer
from lmdeploy.tokenizer import Tokenizer
def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
......
......@@ -8,7 +8,8 @@ from typing import List, Tuple
import fire
from lmdeploy.turbomind import Tokenizer, TurboMind
from lmdeploy.tokenizer import Tokenizer
from lmdeploy.turbomind import TurboMind
def sample_requests(
......
......@@ -30,7 +30,7 @@ class AsyncEngine:
def __init__(self, model_path, instance_num=32, tp=1) -> None:
from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer
from lmdeploy.tokenizer import Tokenizer
tokenizer_model_path = osp.join(model_path, 'triton_models',
'tokenizer')
tokenizer = Tokenizer(tokenizer_model_path)
......
......@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(tokenizer_path,
osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer'))
else:
print(f'tokenizer model {tokenizer_path} does not exist')
......@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer'))
else:
print(f'tokenizer model {tokenizer_path} does not exist')
......@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer'))
else:
print(f'tokenizer model {tokenizer_path} does not exist')
......@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer'))
else:
print(f'tokenizer model {tokenizer_path} does not exist')
......
# Copyright (c) OpenMMLab. All rights reserved.
from .tokenizer import Tokenizer
from .turbomind import TurboMind
__all__ = ['Tokenizer', 'TurboMind']
__all__ = ['TurboMind']
......@@ -8,7 +8,7 @@ import fire
from lmdeploy import turbomind as tm
from lmdeploy.model import MODELS
from lmdeploy.turbomind.tokenizer import Tokenizer
from lmdeploy.tokenizer import Tokenizer
os.environ['TM_LOG_LEVEL'] = 'ERROR'
......
......@@ -6,7 +6,7 @@ import fire
import torch
from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer
from lmdeploy.tokenizer import Tokenizer
os.environ['TM_LOG_LEVEL'] = 'ERROR'
......
......@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence
import lmdeploy
from lmdeploy.model import MODELS
from lmdeploy.turbomind import Tokenizer
from lmdeploy.tokenizer import Tokenizer
from lmdeploy.utils import get_logger
# TODO: find another way import _turbomind
......
from lmdeploy.turbomind.tokenizer import Tokenizer
def main():
tokenizer = Tokenizer('huggyllama/llama-7b')
prompts = ['cest la vie', '上帝已死']
for prompt in prompts:
tokens = tokenizer.encode(prompt)
output = tokenizer.decode(tokens)
print(output)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment