Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Lmdeploy
Commits
c261b49d
Unverified
Commit
c261b49d
authored
Oct 16, 2023
by
q.yao
Committed by
GitHub
Oct 16, 2023
Browse files
Move `tokenizer.py` to the folder of lmdeploy (#543)
* move tokenizer * remove Tokenizer in init * update deploy.py
parent
f4422fab
Changes
12
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
15 additions
and
29 deletions
+15
-29
benchmark/profile_generation.py
benchmark/profile_generation.py
+2
-1
benchmark/profile_restful_api.py
benchmark/profile_restful_api.py
+1
-1
benchmark/profile_serving.py
benchmark/profile_serving.py
+1
-1
benchmark/profile_throughput.py
benchmark/profile_throughput.py
+2
-1
lmdeploy/serve/async_engine.py
lmdeploy/serve/async_engine.py
+1
-1
lmdeploy/serve/turbomind/deploy.py
lmdeploy/serve/turbomind/deploy.py
+4
-4
lmdeploy/tokenizer.py
lmdeploy/tokenizer.py
+0
-0
lmdeploy/turbomind/__init__.py
lmdeploy/turbomind/__init__.py
+1
-2
lmdeploy/turbomind/chat.py
lmdeploy/turbomind/chat.py
+1
-1
lmdeploy/turbomind/decode.py
lmdeploy/turbomind/decode.py
+1
-1
lmdeploy/turbomind/turbomind.py
lmdeploy/turbomind/turbomind.py
+1
-1
tests/python/test_tokenizer.py
tests/python/test_tokenizer.py
+0
-15
No files found.
benchmark/profile_generation.py
View file @
c261b49d
...
@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
...
@@ -18,7 +18,8 @@ from pynvml import (NVMLError, nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex,
nvmlInit
,
nvmlShutdown
,
nvmlSystemGetDriverVersion
)
nvmlInit
,
nvmlShutdown
,
nvmlSystemGetDriverVersion
)
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lmdeploy.turbomind
import
Tokenizer
,
TurboMind
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.turbomind
import
TurboMind
def
infer
(
model
,
session_id
:
int
,
input_ids
:
str
,
output_seqlen
:
int
,
def
infer
(
model
,
session_id
:
int
,
input_ids
:
str
,
output_seqlen
:
int
,
...
...
benchmark/profile_restful_api.py
View file @
c261b49d
...
@@ -8,7 +8,7 @@ import fire
...
@@ -8,7 +8,7 @@ import fire
import
numpy
as
np
import
numpy
as
np
import
requests
import
requests
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.utils
import
get_logger
from
lmdeploy.utils
import
get_logger
...
...
benchmark/profile_serving.py
View file @
c261b49d
...
@@ -8,7 +8,7 @@ import fire
...
@@ -8,7 +8,7 @@ import fire
import
numpy
as
np
import
numpy
as
np
from
lmdeploy.serve.turbomind.chatbot
import
Chatbot
from
lmdeploy.serve.turbomind.chatbot
import
Chatbot
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
def
infer
(
chatbot
,
session_id
:
int
,
req_que
:
mp
.
Queue
,
res_que
:
mp
.
Queue
):
def
infer
(
chatbot
,
session_id
:
int
,
req_que
:
mp
.
Queue
,
res_que
:
mp
.
Queue
):
...
...
benchmark/profile_throughput.py
View file @
c261b49d
...
@@ -8,7 +8,8 @@ from typing import List, Tuple
...
@@ -8,7 +8,8 @@ from typing import List, Tuple
import
fire
import
fire
from
lmdeploy.turbomind
import
Tokenizer
,
TurboMind
from
lmdeploy.tokenizer
import
Tokenizer
from
lmdeploy.turbomind
import
TurboMind
def
sample_requests
(
def
sample_requests
(
...
...
lmdeploy/serve/async_engine.py
View file @
c261b49d
...
@@ -30,7 +30,7 @@ class AsyncEngine:
...
@@ -30,7 +30,7 @@ class AsyncEngine:
def
__init__
(
self
,
model_path
,
instance_num
=
32
,
tp
=
1
)
->
None
:
def
__init__
(
self
,
model_path
,
instance_num
=
32
,
tp
=
1
)
->
None
:
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
tokenizer_model_path
=
osp
.
join
(
model_path
,
'triton_models'
,
tokenizer_model_path
=
osp
.
join
(
model_path
,
'triton_models'
,
'tokenizer'
)
'tokenizer'
)
tokenizer
=
Tokenizer
(
tokenizer_model_path
)
tokenizer
=
Tokenizer
(
tokenizer_model_path
)
...
...
lmdeploy/serve/turbomind/deploy.py
View file @
c261b49d
...
@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -306,7 +306,7 @@ def deploy_llama(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
tokenizer_path
,
shutil
.
copy
(
tokenizer_path
,
osp
.
join
(
triton_models_path
,
'tokenizer/tokenizer.model'
))
osp
.
join
(
triton_models_path
,
'tokenizer/tokenizer.model'
))
with
get_package_root_path
()
as
root_path
:
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -435,7 +435,7 @@ def deploy_hf(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -601,7 +601,7 @@ def deploy_awq(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
...
@@ -831,7 +831,7 @@ def deploy_qwen(model_name: str, model_path: str, tokenizer_path: str,
shutil
.
copy
(
json_path
,
shutil
.
copy
(
json_path
,
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
osp
.
join
(
triton_models_path
,
'tokenizer'
,
_file
))
with
get_package_root_path
()
as
root_path
:
with
get_package_root_path
()
as
root_path
:
shutil
.
copy
(
osp
.
join
(
root_path
,
'
turbomind/
tokenizer.py'
),
shutil
.
copy
(
osp
.
join
(
root_path
,
'tokenizer.py'
),
osp
.
join
(
triton_models_path
,
'tokenizer'
))
osp
.
join
(
triton_models_path
,
'tokenizer'
))
else
:
else
:
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
print
(
f
'tokenizer model
{
tokenizer_path
}
does not exist'
)
...
...
lmdeploy/
turbomind/
tokenizer.py
→
lmdeploy/tokenizer.py
View file @
c261b49d
File moved
lmdeploy/turbomind/__init__.py
View file @
c261b49d
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
.tokenizer
import
Tokenizer
from
.turbomind
import
TurboMind
from
.turbomind
import
TurboMind
__all__
=
[
'Tokenizer'
,
'TurboMind'
]
__all__
=
[
'TurboMind'
]
lmdeploy/turbomind/chat.py
View file @
c261b49d
...
@@ -8,7 +8,7 @@ import fire
...
@@ -8,7 +8,7 @@ import fire
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.model
import
MODELS
from
lmdeploy.model
import
MODELS
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
...
...
lmdeploy/turbomind/decode.py
View file @
c261b49d
...
@@ -6,7 +6,7 @@ import fire
...
@@ -6,7 +6,7 @@ import fire
import
torch
import
torch
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy
import
turbomind
as
tm
from
lmdeploy.
turbomind.
tokenizer
import
Tokenizer
from
lmdeploy.tokenizer
import
Tokenizer
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
os
.
environ
[
'TM_LOG_LEVEL'
]
=
'ERROR'
...
...
lmdeploy/turbomind/turbomind.py
View file @
c261b49d
...
@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence
...
@@ -14,7 +14,7 @@ from torch.nn.utils.rnn import pad_sequence
import
lmdeploy
import
lmdeploy
from
lmdeploy.model
import
MODELS
from
lmdeploy.model
import
MODELS
from
lmdeploy.t
urbomind
import
Tokenizer
from
lmdeploy.t
okenizer
import
Tokenizer
from
lmdeploy.utils
import
get_logger
from
lmdeploy.utils
import
get_logger
# TODO: find another way import _turbomind
# TODO: find another way import _turbomind
...
...
tests/python/test_tokenizer.py
deleted
100644 → 0
View file @
f4422fab
from
lmdeploy.turbomind.tokenizer
import
Tokenizer
def
main
():
tokenizer
=
Tokenizer
(
'huggyllama/llama-7b'
)
prompts
=
[
'cest la vie'
,
'上帝已死'
]
for
prompt
in
prompts
:
tokens
=
tokenizer
.
encode
(
prompt
)
output
=
tokenizer
.
decode
(
tokens
)
print
(
output
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment