Unverified Commit c261b49d authored by q.yao's avatar q.yao Committed by GitHub
Browse files

Move `tokenizer.py` to the folder of lmdeploy (#543)

* move tokenizer

* remove Tokenizer in init

* update deploy.py
parent f4422fab
...@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, ...@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion) nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion)
from tqdm import tqdm from tqdm import tqdm
from lmdeploy.turbomind import Tokenizer, TurboMind from lmdeploy.tokenizer import Tokenizer
from lmdeploy.turbomind import TurboMind
def infer(model, session_id: int, input_ids: str, output_seqlen: int, def infer(model, session_id: int, input_ids: str, output_seqlen: int,
......
...@@ -8,7 +8,7 @@ import fire ...@@ -8,7 +8,7 @@ import fire
import numpy as np import numpy as np
import requests import requests
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
from lmdeploy.utils import get_logger from lmdeploy.utils import get_logger
......
...@@ -8,7 +8,7 @@ import fire ...@@ -8,7 +8,7 @@ import fire
import numpy as np import numpy as np
from lmdeploy.serve.turbomind.chatbot import Chatbot from lmdeploy.serve.turbomind.chatbot import Chatbot
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue): def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):
......
...@@ -8,7 +8,8 @@ from typing import List, Tuple ...@@ -8,7 +8,8 @@ from typing import List, Tuple
import fire import fire
from lmdeploy.turbomind import Tokenizer, TurboMind from lmdeploy.tokenizer import Tokenizer
from lmdeploy.turbomind import TurboMind
def sample_requests( def sample_requests(
......
...@@ -30,7 +30,7 @@ class AsyncEngine: ...@@ -30,7 +30,7 @@ class AsyncEngine:
def __init__(self, model_path, instance_num=32, tp=1) -> None: def __init__(self, model_path, instance_num=32, tp=1) -> None:
from lmdeploy import turbomind as tm from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
tokenizer_model_path = osp.join(model_path, 'triton_models', tokenizer_model_path = osp.join(model_path, 'triton_models',
'tokenizer') 'tokenizer')
tokenizer = Tokenizer(tokenizer_model_path) tokenizer = Tokenizer(tokenizer_model_path)
......
...@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str, ...@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(tokenizer_path, shutil.copy(tokenizer_path,
osp.join(triton_models_path, 'tokenizer/tokenizer.model')) osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
with get_package_root_path() as root_path: with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer')) osp.join(triton_models_path, 'tokenizer'))
else: else:
print(f'tokenizer model {tokenizer_path} does not exist') print(f'tokenizer model {tokenizer_path} does not exist')
...@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str, ...@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path, shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file)) osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path: with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer')) osp.join(triton_models_path, 'tokenizer'))
else: else:
print(f'tokenizer model {tokenizer_path} does not exist') print(f'tokenizer model {tokenizer_path} does not exist')
...@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str, ...@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path, shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file)) osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path: with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer')) osp.join(triton_models_path, 'tokenizer'))
else: else:
print(f'tokenizer model {tokenizer_path} does not exist') print(f'tokenizer model {tokenizer_path} does not exist')
...@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str, ...@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
shutil.copy(json_path, shutil.copy(json_path,
osp.join(triton_models_path, 'tokenizer', _file)) osp.join(triton_models_path, 'tokenizer', _file))
with get_package_root_path() as root_path: with get_package_root_path() as root_path:
shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'), shutil.copy(osp.join(root_path, 'tokenizer.py'),
osp.join(triton_models_path, 'tokenizer')) osp.join(triton_models_path, 'tokenizer'))
else: else:
print(f'tokenizer model {tokenizer_path} does not exist') print(f'tokenizer model {tokenizer_path} does not exist')
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from .tokenizer import Tokenizer
from .turbomind import TurboMind from .turbomind import TurboMind
__all__ = ['Tokenizer', 'TurboMind'] __all__ = ['TurboMind']
...@@ -8,7 +8,7 @@ import fire ...@@ -8,7 +8,7 @@ import fire
from lmdeploy import turbomind as tm from lmdeploy import turbomind as tm
from lmdeploy.model import MODELS from lmdeploy.model import MODELS
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
os.environ['TM_LOG_LEVEL'] = 'ERROR' os.environ['TM_LOG_LEVEL'] = 'ERROR'
......
...@@ -6,7 +6,7 @@ import fire ...@@ -6,7 +6,7 @@ import fire
import torch import torch
from lmdeploy import turbomind as tm from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer from lmdeploy.tokenizer import Tokenizer
os.environ['TM_LOG_LEVEL'] = 'ERROR' os.environ['TM_LOG_LEVEL'] = 'ERROR'
......
...@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence ...@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence
import lmdeploy import lmdeploy
from lmdeploy.model import MODELS from lmdeploy.model import MODELS
from lmdeploy.turbomind import Tokenizer from lmdeploy.tokenizer import Tokenizer
from lmdeploy.utils import get_logger from lmdeploy.utils import get_logger
# TODO: find another way import _turbomind # TODO: find another way import _turbomind
......
from lmdeploy.turbomind.tokenizer import Tokenizer
def main():
tokenizer = Tokenizer('huggyllama/llama-7b')
prompts = ['cest la vie', '上帝已死']
for prompt in prompts:
tokens = tokenizer.encode(prompt)
output = tokenizer.decode(tokens)
print(output)
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment