Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
8d8c2f6f
"vscode:/vscode.git/clone" did not exist on "ff0dfb74d76872bcbcbadb6e1e52c0dcb00bb4ce"
Unverified
Commit
8d8c2f6f
authored
Dec 01, 2023
by
aisensiy
Committed by
GitHub
Nov 30, 2023
Browse files
Support max-model-len argument for throughput benchmark (#1858)
parent
51d3cb95
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
1 deletion
+10
-1
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+10
-1
No files found.
benchmarks/benchmark_throughput.py
View file @
8d8c2f6f
...
@@ -69,6 +69,7 @@ def run_vllm(
...
@@ -69,6 +69,7 @@ def run_vllm(
use_beam_search
:
bool
,
use_beam_search
:
bool
,
trust_remote_code
:
bool
,
trust_remote_code
:
bool
,
dtype
:
str
,
dtype
:
str
,
max_model_len
:
Optional
[
int
]
=
None
,
)
->
float
:
)
->
float
:
from
vllm
import
LLM
,
SamplingParams
from
vllm
import
LLM
,
SamplingParams
llm
=
LLM
(
llm
=
LLM
(
...
@@ -79,6 +80,7 @@ def run_vllm(
...
@@ -79,6 +80,7 @@ def run_vllm(
seed
=
seed
,
seed
=
seed
,
trust_remote_code
=
trust_remote_code
,
trust_remote_code
=
trust_remote_code
,
dtype
=
dtype
,
dtype
=
dtype
,
max_model_len
=
max_model_len
,
)
)
# Add the requests to the engine.
# Add the requests to the engine.
...
@@ -201,7 +203,8 @@ def main(args: argparse.Namespace):
...
@@ -201,7 +203,8 @@ def main(args: argparse.Namespace):
elapsed_time
=
run_vllm
(
requests
,
args
.
model
,
args
.
tokenizer
,
elapsed_time
=
run_vllm
(
requests
,
args
.
model
,
args
.
tokenizer
,
args
.
quantization
,
args
.
tensor_parallel_size
,
args
.
quantization
,
args
.
tensor_parallel_size
,
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
trust_remote_code
,
args
.
dtype
)
args
.
trust_remote_code
,
args
.
dtype
,
args
.
max_model_len
)
elif
args
.
backend
==
"hf"
:
elif
args
.
backend
==
"hf"
:
assert
args
.
tensor_parallel_size
==
1
assert
args
.
tensor_parallel_size
==
1
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
...
@@ -261,6 +264,12 @@ if __name__ == "__main__":
...
@@ -261,6 +264,12 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--trust-remote-code'
,
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
help
=
'trust remote code from huggingface'
)
parser
.
add_argument
(
'--max-model-len'
,
type
=
int
,
default
=
None
,
help
=
'Maximum length of a sequence (including prompt and output). '
'If None, will be derived from the model.'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--dtype'
,
'--dtype'
,
type
=
str
,
type
=
str
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment