Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
8c4b2592
Unverified
Commit
8c4b2592
authored
Jul 20, 2023
by
Ricardo Lu
Committed by
GitHub
Jul 19, 2023
Browse files
fix: enable trust-remote-code in api server & benchmark. (#509)
parent
cf21a9bd
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
14 additions
and
6 deletions
+14
-6
benchmarks/benchmark_latency.py
benchmarks/benchmark_latency.py
+3
-0
benchmarks/benchmark_serving.py
benchmarks/benchmark_serving.py
+3
-1
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+6
-4
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/api_server.py
+2
-1
No files found.
benchmarks/benchmark_latency.py
View file @
8c4b2592
...
...
@@ -21,6 +21,7 @@ def main(args: argparse.Namespace):
tensor_parallel_size
=
args
.
tensor_parallel_size
,
max_num_seqs
=
args
.
batch_size
,
max_num_batched_tokens
=
args
.
batch_size
*
args
.
input_len
,
trust_remote_code
=
args
.
trust_remote_code
,
)
sampling_params
=
SamplingParams
(
...
...
@@ -74,5 +75,7 @@ if __name__ == '__main__':
parser
.
add_argument
(
'--use-beam-search'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--num-iters'
,
type
=
int
,
default
=
3
,
help
=
'Number of iterations to run.'
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
args
=
parser
.
parse_args
()
main
(
args
)
benchmarks/benchmark_serving.py
View file @
8c4b2592
...
...
@@ -177,7 +177,7 @@ def main(args: argparse.Namespace):
np
.
random
.
seed
(
args
.
seed
)
api_url
=
f
"http://
{
args
.
host
}
:
{
args
.
port
}
/generate"
tokenizer
=
get_tokenizer
(
args
.
tokenizer
)
tokenizer
=
get_tokenizer
(
args
.
tokenizer
,
trust_remote_code
=
args
.
trust_remote_code
)
input_requests
=
sample_requests
(
args
.
dataset
,
args
.
num_prompts
,
tokenizer
)
benchmark_start_time
=
time
.
time
()
...
...
@@ -227,5 +227,7 @@ if __name__ == "__main__":
"Otherwise, we use Poisson process to synthesize "
"the request arrival times."
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
args
=
parser
.
parse_args
()
main
(
args
)
benchmarks/benchmark_throughput.py
View file @
8c4b2592
...
...
@@ -74,7 +74,7 @@ def run_vllm(
tokenizer
=
tokenizer
,
tensor_parallel_size
=
tensor_parallel_size
,
seed
=
seed
,
trust_remote_code
=
trust_remote_code
trust_remote_code
=
trust_remote_code
,
)
# Add the requests to the engine.
...
...
@@ -111,7 +111,8 @@ def run_hf(
trust_remote_code
:
bool
,
)
->
float
:
assert
not
use_beam_search
llm
=
AutoModelForCausalLM
.
from_pretrained
(
model
,
torch_dtype
=
torch
.
float16
,
trust_remote_code
=
trust_remote_code
)
llm
=
AutoModelForCausalLM
.
from_pretrained
(
model
,
torch_dtype
=
torch
.
float16
,
trust_remote_code
=
trust_remote_code
)
if
llm
.
config
.
model_type
==
"llama"
:
# To enable padding in the HF backend.
tokenizer
.
pad_token
=
tokenizer
.
eos_token
...
...
@@ -173,8 +174,9 @@ def main(args: argparse.Namespace):
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
trust_remote_code
)
elif
args
.
backend
==
"hf"
:
assert
args
.
tensor_parallel_size
==
1
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
args
.
use_beam_search
,
args
.
hf_max_batch_size
)
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
args
.
use_beam_search
,
args
.
hf_max_batch_size
,
args
.
trust_remote_code
)
else
:
raise
ValueError
(
f
"Unknown backend:
{
args
.
backend
}
"
)
total_num_tokens
=
sum
(
...
...
vllm/entrypoints/openai/api_server.py
View file @
8c4b2592
...
...
@@ -585,7 +585,8 @@ if __name__ == "__main__":
# A separate tokenizer to map token IDs to strings.
tokenizer
=
get_tokenizer
(
engine_args
.
tokenizer
,
tokenizer_mode
=
engine_args
.
tokenizer_mode
)
tokenizer_mode
=
engine_args
.
tokenizer_mode
,
trust_remote_code
=
engine_args
.
trust_remote_code
)
uvicorn
.
run
(
app
,
host
=
args
.
host
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment