Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
b5a10eb0
Unverified
Commit
b5a10eb0
authored
Oct 01, 2023
by
kg6-sleipnir
Committed by
GitHub
Sep 30, 2023
Browse files
Added `dtype` arg to benchmarks (#1228)
parent
0967102c
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
1 deletion
+22
-1
benchmarks/benchmark_latency.py
benchmarks/benchmark_latency.py
+10
-0
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+12
-1
No files found.
benchmarks/benchmark_latency.py
View file @
b5a10eb0
...
@@ -23,6 +23,7 @@ def main(args: argparse.Namespace):
...
@@ -23,6 +23,7 @@ def main(args: argparse.Namespace):
max_num_seqs
=
args
.
batch_size
,
max_num_seqs
=
args
.
batch_size
,
max_num_batched_tokens
=
args
.
batch_size
*
args
.
input_len
,
max_num_batched_tokens
=
args
.
batch_size
*
args
.
input_len
,
trust_remote_code
=
args
.
trust_remote_code
,
trust_remote_code
=
args
.
trust_remote_code
,
dtype
=
args
.
dtype
,
)
)
sampling_params
=
SamplingParams
(
sampling_params
=
SamplingParams
(
...
@@ -87,5 +88,14 @@ if __name__ == '__main__':
...
@@ -87,5 +88,14 @@ if __name__ == '__main__':
parser
.
add_argument
(
'--trust-remote-code'
,
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
help
=
'trust remote code from huggingface'
)
parser
.
add_argument
(
'--dtype'
,
type
=
str
,
default
=
'auto'
,
choices
=
[
'auto'
,
'half'
,
'float16'
,
'bfloat16'
,
'float'
,
'float32'
],
help
=
'data type for model weights and activations. '
'The "auto" option will use FP16 precision '
'for FP32 and FP16 models, and BF16 precision '
'for BF16 models.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
main
(
args
)
main
(
args
)
benchmarks/benchmark_throughput.py
View file @
b5a10eb0
...
@@ -64,6 +64,7 @@ def run_vllm(
...
@@ -64,6 +64,7 @@ def run_vllm(
n
:
int
,
n
:
int
,
use_beam_search
:
bool
,
use_beam_search
:
bool
,
trust_remote_code
:
bool
,
trust_remote_code
:
bool
,
dtype
:
str
,
)
->
float
:
)
->
float
:
llm
=
LLM
(
llm
=
LLM
(
model
=
model
,
model
=
model
,
...
@@ -72,6 +73,7 @@ def run_vllm(
...
@@ -72,6 +73,7 @@ def run_vllm(
tensor_parallel_size
=
tensor_parallel_size
,
tensor_parallel_size
=
tensor_parallel_size
,
seed
=
seed
,
seed
=
seed
,
trust_remote_code
=
trust_remote_code
,
trust_remote_code
=
trust_remote_code
,
dtype
=
dtype
,
)
)
# Add the requests to the engine.
# Add the requests to the engine.
...
@@ -171,7 +173,7 @@ def main(args: argparse.Namespace):
...
@@ -171,7 +173,7 @@ def main(args: argparse.Namespace):
elapsed_time
=
run_vllm
(
requests
,
args
.
model
,
args
.
tokenizer
,
elapsed_time
=
run_vllm
(
requests
,
args
.
model
,
args
.
tokenizer
,
args
.
quantization
,
args
.
tensor_parallel_size
,
args
.
quantization
,
args
.
tensor_parallel_size
,
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
seed
,
args
.
n
,
args
.
use_beam_search
,
args
.
trust_remote_code
)
args
.
trust_remote_code
,
args
.
dtype
)
elif
args
.
backend
==
"hf"
:
elif
args
.
backend
==
"hf"
:
assert
args
.
tensor_parallel_size
==
1
assert
args
.
tensor_parallel_size
==
1
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
elapsed_time
=
run_hf
(
requests
,
args
.
model
,
tokenizer
,
args
.
n
,
...
@@ -219,6 +221,15 @@ if __name__ == "__main__":
...
@@ -219,6 +221,15 @@ if __name__ == "__main__":
parser
.
add_argument
(
'--trust-remote-code'
,
parser
.
add_argument
(
'--trust-remote-code'
,
action
=
'store_true'
,
action
=
'store_true'
,
help
=
'trust remote code from huggingface'
)
help
=
'trust remote code from huggingface'
)
parser
.
add_argument
(
'--dtype'
,
type
=
str
,
default
=
'auto'
,
choices
=
[
'auto'
,
'half'
,
'float16'
,
'bfloat16'
,
'float'
,
'float32'
],
help
=
'data type for model weights and activations. '
'The "auto" option will use FP16 precision '
'for FP32 and FP16 models, and BF16 precision '
'for BF16 models.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
backend
==
"vllm"
:
if
args
.
backend
==
"vllm"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment