Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
cf21a9bd
Unverified
Commit
cf21a9bd
authored
Jul 20, 2023
by
WRH
Committed by
GitHub
Jul 19, 2023
Browse files
support trust_remote_code in benchmark (#518)
parent
16c3e295
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
3 deletions
+9
-3
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+9
-3
No files found.
benchmarks/benchmark_throughput.py
View file @
cf21a9bd
...
@@ -67,12 +67,14 @@ def run_vllm(
...
@@ -67,12 +67,14 @@ def run_vllm(
seed
:
int
,
seed
:
int
,
n
:
int
,
n
:
int
,
use_beam_search
:
bool
,
use_beam_search
:
bool
,
trust_remote_code
:
bool
,
)
->
float
:
)
->
float
:
llm
=
LLM
(
llm
=
LLM
(
model
=
model
,
model
=
model
,
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
seed
=
seed
,
seed
=
seed
,
trust_remote_code
=
trust_remote_code
)
)
# Add the requests to the engine.
# Add the requests to the engine.
...
@@ -106,9 +108,10 @@ def run_hf(
...
@@ -106,9 +108,10 @@ def run_hf(
n
:
int
,
n
:
int
,
use_beam_search
:
bool
,
use_beam_search
:
bool
,
max_batch_size
:
int
,
max_batch_size
:
int
,
trust_remote_code
:
bool
,
)
->
float
:
)
->
float
:
assert
not
use_beam_search
assert
not
use_beam_search
llm
=
AutoModelForCausalLM
.
from_pretrained
(
model
,
torch_dtype
=
torch
.
float16
)
llm
=
AutoModelForCausalLM
.
from_pretrained
(
model
,
torch_dtype
=
torch
.
float16
,
trust_remote_code
=
trust_remote_code
)
if
llm
.
config
.
model_type
==
"llama"
:
if
llm
.
config
.
model_type
==
"llama"
:
# To enable padding in the HF backend.
# To enable padding in the HF backend.
tokenizer
.
pad_token
=
tokenizer
.
eos_token
tokenizer
.
pad_token
=
tokenizer
.
eos_token
...
@@ -161,13 +164,13 @@ def main(args: argparse.Namespace):
...
@@ -161,13 +164,13 @@ def main(args: argparse.Namespace):
random
.
seed
(
args
.
seed
)
random
.
seed
(
args
.
seed
)
# Sample the requests.
# Sample the requests.
tokenizer
=
get_tokenizer
(
args
.
tokenizer
)
tokenizer
=
get_tokenizer
(
args
.
tokenizer
,
trust_remote_code
=
args
.
trust_remote_code
)
requests
=
sample_requests
(
args
.
dataset
,
args
.
num_prompts
,
tokenizer
)
requests
=
sample_requests
(
args
.
dataset
,
args
.
num_prompts
,
tokenizer
)
if
args
.
backend
==
"vllm"
:
if
args
.
backend
==
"vllm"
:
elapsed_time
=
run_vllm
(
elapsed_time
=
run_vllm
(
requests
,
args
.
model
,
args
.
tokenizer
,
args
.
tensor_parallel_size
,
requests
,
args
.
model
,
args
.
tokenizer
,
args
.
tensor_parallel_size
,
args
.
seed
,
args
.
n
,
args
.
use_beam_search
)
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
trust_remote_code
)
elif
args
.
backend
==
"hf"
:
elif
args
.
backend
==
"hf"
:
assert
args
.
tensor_parallel_size
==
1
assert
args
.
tensor_parallel_size
==
1
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
...
@@ -199,6 +202,9 @@ if __name__ == "__main__":
...
@@ -199,6 +202,9 @@ if __name__ == "__main__":
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
"--seed"
,
type
=
int
,
default
=
0
)
parser
.
add_argument
(
"--hf-max-batch-size"
,
type
=
int
,
default
=
None
,
parser
.
add_argument
(
"--hf-max-batch-size"
,
type
=
int
,
default
=
None
,
help
=
"Maximum batch size for HF backend."
)
help
=
"Maximum batch size for HF backend."
)
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
backend
==
"vllm"
:
if
args
.
backend
==
"vllm"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment