Move `tokenizer.py` to the folder of lmdeploy (#543)

* move tokenizer * remove Tokenizer in init * update deploy.py

Move `tokenizer.py` to the folder of lmdeploy (#543)
* move tokenizer * remove Tokenizer in init * update deploy.py
c261b49d · q.yao · GitHub · f4422fab · c261b49d · c261b49d
Unverified Commit c261b49d authored Oct 16, 2023 by q.yao Committed by GitHub Oct 16, 2023
12 changed files
--- a/benchmark/profile_generation.py
+++ b/benchmark/profile_generation.py
@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
                    nvmlInit, nvmlShutdown, nvmlSystemGetDriverVersion)
 from tqdm import tqdm

-from lmdeploy.turbomind import Tokenizer, TurboMind
+from lmdeploy.tokenizer import Tokenizer
+from lmdeploy.turbomind import TurboMind


 def infer(model, session_id: int, input_ids: str, output_seqlen: int,

--- a/benchmark/profile_restful_api.py
+++ b/benchmark/profile_restful_api.py
@@ -8,7 +8,7 @@ import fire
 import numpy as np
 import requests

-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 from lmdeploy.utils import get_logger



--- a/benchmark/profile_serving.py
+++ b/benchmark/profile_serving.py
@@ -8,7 +8,7 @@ import fire
 import numpy as np

 from lmdeploy.serve.turbomind.chatbot import Chatbot
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer


 def infer(chatbot, session_id: int, req_que: mp.Queue, res_que: mp.Queue):

--- a/benchmark/profile_throughput.py
+++ b/benchmark/profile_throughput.py
@@ -8,7 +8,8 @@ from typing import List, Tuple

 import fire

-from lmdeploy.turbomind import Tokenizer, TurboMind
+from lmdeploy.tokenizer import Tokenizer
+from lmdeploy.turbomind import TurboMind


 def sample_requests(

--- a/lmdeploy/serve/async_engine.py
+++ b/lmdeploy/serve/async_engine.py
@@ -30,7 +30,7 @@ class AsyncEngine:

    def __init__(self, model_path, instance_num=32, tp=1) -> None:
        from lmdeploy import turbomind as tm
-        from lmdeploy.turbomind.tokenizer import Tokenizer
+        from lmdeploy.tokenizer import Tokenizer
        tokenizer_model_path = osp.join(model_path, 'triton_models',
                                        'tokenizer')
        tokenizer = Tokenizer(tokenizer_model_path)

--- a/lmdeploy/serve/turbomind/deploy.py
+++ b/lmdeploy/serve/turbomind/deploy.py
@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
        shutil.copy(tokenizer_path,
                    osp.join(triton_models_path, 'tokenizer/tokenizer.model'))
        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                        osp.join(triton_models_path, 'tokenizer'))
    else:
        print(f'tokenizer model {tokenizer_path} does not exist')
@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
                shutil.copy(json_path,
                            osp.join(triton_models_path, 'tokenizer', _file))
        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                        osp.join(triton_models_path, 'tokenizer'))
    else:
        print(f'tokenizer model {tokenizer_path} does not exist')
@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
                shutil.copy(json_path,
                            osp.join(triton_models_path, 'tokenizer', _file))
        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                        osp.join(triton_models_path, 'tokenizer'))
    else:
        print(f'tokenizer model {tokenizer_path} does not exist')
@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
                shutil.copy(json_path,
                            osp.join(triton_models_path, 'tokenizer', _file))
        with get_package_root_path() as root_path:
-            shutil.copy(osp.join(root_path, 'turbomind/tokenizer.py'),
+            shutil.copy(osp.join(root_path, 'tokenizer.py'),
                        osp.join(triton_models_path, 'tokenizer'))
    else:
        print(f'tokenizer model {tokenizer_path} does not exist')

--- a/lmdeploy/turbomind/tokenizer.py
+++ b/lmdeploy/turbomind/tokenizer.py
--- a/lmdeploy/turbomind/__init__.py
+++ b/lmdeploy/turbomind/__init__.py
 # Copyright (c) OpenMMLab. All rights reserved.
-from .tokenizer import Tokenizer
 from .turbomind import TurboMind

-__all__ = ['Tokenizer', 'TurboMind']
+__all__ = ['TurboMind']
--- a/lmdeploy/turbomind/chat.py
+++ b/lmdeploy/turbomind/chat.py
@@ -8,7 +8,7 @@ import fire

 from lmdeploy import turbomind as tm
 from lmdeploy.model import MODELS
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer

 os.environ['TM_LOG_LEVEL'] = 'ERROR'


--- a/lmdeploy/turbomind/decode.py
+++ b/lmdeploy/turbomind/decode.py
@@ -6,7 +6,7 @@ import fire
 import torch

 from lmdeploy import turbomind as tm
-from lmdeploy.turbomind.tokenizer import Tokenizer
+from lmdeploy.tokenizer import Tokenizer

 os.environ['TM_LOG_LEVEL'] = 'ERROR'


--- a/lmdeploy/turbomind/turbomind.py
+++ b/lmdeploy/turbomind/turbomind.py
@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence

 import lmdeploy
 from lmdeploy.model import MODELS
-from lmdeploy.turbomind import Tokenizer
+from lmdeploy.tokenizer import Tokenizer
 from lmdeploy.utils import get_logger

 # TODO: find another way import _turbomind

--- a/tests/python/test_tokenizer.py
+++ b/tests/python/test_tokenizer.py
-from lmdeploy.turbomind.tokenizer import Tokenizer
-
-
-def main():
-    tokenizer = Tokenizer('huggyllama/llama-7b')
-
-    prompts = ['cest la vie', '上帝已死']
-    for prompt in prompts:
-        tokens = tokenizer.encode(prompt)
-        output = tokenizer.decode(tokens)
-        print(output)
-
-
-if __name__ == '__main__':
-    main()