Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
1f12122b
Unverified
Commit
1f12122b
authored
Jun 18, 2024
by
zhyncs
Committed by
GitHub
Jun 17, 2024
Browse files
[Misc] use AutoTokenizer for benchmark serving when vLLM not installed (#5588)
parent
890d8d96
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
32 additions
and
2 deletions
+32
-2
benchmarks/backend_request_func.py
benchmarks/backend_request_func.py
+28
-1
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+4
-1
No files found.
benchmarks/backend_request_func.py
View file @
1f12122b
...
...
@@ -4,10 +4,13 @@ import sys
import
time
import
traceback
from
dataclasses
import
dataclass
,
field
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
,
Union
import
aiohttp
import
huggingface_hub.constants
from
tqdm.asyncio
import
tqdm
from
transformers
import
(
AutoTokenizer
,
PreTrainedTokenizer
,
PreTrainedTokenizerFast
)
AIOHTTP_TIMEOUT
=
aiohttp
.
ClientTimeout
(
total
=
6
*
60
*
60
)
...
...
@@ -388,6 +391,30 @@ def remove_prefix(text: str, prefix: str) -> str:
return
text
def
get_model
(
pretrained_model_name_or_path
:
str
):
if
os
.
getenv
(
'VLLM_USE_MODELSCOPE'
,
'False'
).
lower
()
==
'true'
:
from
modelscope
import
snapshot_download
else
:
from
huggingface_hub
import
snapshot_download
model_path
=
snapshot_download
(
model_id
=
pretrained_model_name_or_path
,
local_files_only
=
huggingface_hub
.
constants
.
HF_HUB_OFFLINE
,
ignore_file_pattern
=
[
".*.pt"
,
".*.safetensors"
,
".*.bin"
])
return
model_path
def
get_tokenizer
(
pretrained_model_name_or_path
:
str
,
trust_remote_code
:
bool
)
->
Union
[
PreTrainedTokenizer
,
PreTrainedTokenizerFast
]:
if
pretrained_model_name_or_path
is
not
None
and
not
os
.
path
.
exists
(
pretrained_model_name_or_path
):
pretrained_model_name_or_path
=
get_model
(
pretrained_model_name_or_path
)
return
AutoTokenizer
.
from_pretrained
(
pretrained_model_name_or_path
,
trust_remote_code
=
trust_remote_code
)
ASYNC_REQUEST_FUNCS
=
{
"tgi"
:
async_request_tgi
,
"vllm"
:
async_request_openai_completions
,
...
...
benchmarks/benchmark_serving.py
View file @
1f12122b
...
...
@@ -39,7 +39,10 @@ from backend_request_func import (ASYNC_REQUEST_FUNCS, RequestFuncInput,
from
tqdm.asyncio
import
tqdm
from
transformers
import
PreTrainedTokenizerBase
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
try
:
from
vllm.transformers_utils.tokenizer
import
get_tokenizer
except
ImportError
:
from
backend_request_func
import
get_tokenizer
@
dataclass
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment