Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
c261b49d
Unverified
Commit
c261b49d
authored
Oct 16, 2023
by
q.yao
Committed by
GitHub
Oct 16, 2023
Browse files
Move `tokenizer.py` to the folder of lmdeploy (#543)
* move tokenizer * remove Tokenizer in init * update deploy.py
parent
f4422fab
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
15 additions
and
29 deletions
+15
-29
benchmark/profile_generation.py
benchmark/profile_generation.py
+2
-1
benchmark/profile_restful_api.py
benchmark/profile_restful_api.py
+1
-1
benchmark/profile_serving.py
benchmark/profile_serving.py
+1
-1
benchmark/profile_throughput.py
benchmark/profile_throughput.py
+2
-1
lmdeploy/serve/async_engine.py
lmdeploy/serve/async_engine.py
+1
-1
lmdeploy/serve/turbomind/deploy.py
lmdeploy/serve/turbomind/deploy.py
+4
-4
lmdeploy/tokenizer.py
lmdeploy/tokenizer.py
+0
-0
lmdeploy/turbomind/__init__.py
lmdeploy/turbomind/__init__.py
+1
-2
lmdeploy/turbomind/chat.py
lmdeploy/turbomind/chat.py
+1
-1
lmdeploy/turbomind/decode.py
lmdeploy/turbomind/decode.py
+1
-1
lmdeploy/turbomind/turbomind.py
lmdeploy/turbomind/turbomind.py
+1
-1
tests/python/test_tokenizer.py
tests/python/test_tokenizer.py
+0
-15
No files found.
benchmark/profile_generation.py
View file @
c261b49d
...
...
@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
nvmlInit
,
nvmlShutdown
,
nvmlSystemGetDriverVersion
)
from
tqdm
import
tqdm
from
lmdeploy.turbomind
import
Tokenizer
,
TurboMind
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.turbomind
import
TurboMind
def
infer
(
model
,
session_id
:
int
,
input_ids
:
str
,
output_seqlen
:
int
,
...
...
benchmark/profile_restful_api.py
View file @
c261b49d
...
...
@@ -8,7 +8,7 @@ import fire
import
numpy
as
np
import
requests
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.utils
import
get_logger
...
...
benchmark/profile_serving.py
View file @
c261b49d
...
...
@@ -8,7 +8,7 @@ import fire
import
numpy
as
np
from
lmdeploy.serve.turbomind.chatbot
import
Chatbot
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
def
infer
(
chatbot
,
session_id
:
int
,
req_que
:
mp
.
Queue
,
res_que
:
mp
.
Queue
):
...
...
benchmark/profile_throughput.py
View file @
c261b49d
...
...
@@ -8,7 +8,8 @@ from typing import List, Tuple
import
fire
from
lmdeploy.turbomind
import
Tokenizer
,
TurboMind
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.turbomind
import
TurboMind
def
sample_requests
(
...
...
lmdeploy/serve/async_engine.py
View file @
c261b49d
...
...
@@ -30,7 +30,7 @@ class AsyncEngine:
def
__init__
(
self
,
model_path
,
instance_num
=
32
,
tp
=
1
)
->
None
:
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
tokenizer_model_path
=
osp
.
join
(
model_path
,
'triton_models'
,
'tokenizer'
)
tokenizer
=
Tokenizer
(
tokenizer_model_path
)
...
...
lmdeploy/serve/turbomind/deploy.py
View file @
c261b49d
...
...
@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
tokenizer_path
,
osp
.
join
(
triton_models_path
,
'tokenizer/tokenizer.model'
))
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
...
@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
...
@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
...
@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
...
lmdeploy/
turbomind/
tokenizer.py
→
lmdeploy/tokenizer.py
View file @
c261b49d
File moved
lmdeploy/turbomind/__init__.py
View file @
c261b49d
# Copyright (c) OpenMMLab. All rights reserved.
from
.tokenizer
import
Tokenizer
from
.turbomind
import
TurboMind
__all__
=
[
'Tokenizer'
,
'TurboMind'
]
__all__
=
[
'TurboMind'
]
lmdeploy/turbomind/chat.py
View file @
c261b49d
...
...
@@ -8,7 +8,7 @@ import fire
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.model
import
MODELS
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
...
...
lmdeploy/turbomind/decode.py
View file @
c261b49d
...
...
@@ -6,7 +6,7 @@ import fire
import
torch
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
...
...
lmdeploy/turbomind/turbomind.py
View file @
c261b49d
...
...
@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence
import
lmdeploy
from
lmdeploy.model
import
MODELS
from
lmdeploy.t
urbomind
import
Tokenizer
from
lmdeploy.t
okenizer
import
Tokenizer
from
lmdeploy.utils
import
get_logger
# TODO: find another way import _turbomind
...
...
tests/python/test_tokenizer.py
deleted
100644 → 0
View file @
f4422fab
from
lmdeploy.turbomind.tokenizer
import
Tokenizer
def
main
():
tokenizer
=
Tokenizer
(
'huggyllama/llama-7b'
)
prompts
=
[
'cest la vie'
,
'上帝已死'
]
for
prompt
in
prompts
:
tokens
=
tokenizer
.
encode
(
prompt
)
output
=
tokenizer
.
decode
(
tokens
)
print
(
output
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment