Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
891b9d33
Unverified
Commit
891b9d33
authored
May 12, 2025
by
Brayden Zhong
Committed by
GitHub
May 11, 2025
Browse files
[Fix] Benchmark `"EngineClient" has no attribute "model_config"` (#17976)
Signed-off-by:
Brayden Zhong
<
b8zhong@uwaterloo.ca
>
parent
43078301
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
5 deletions
+7
-5
benchmarks/benchmark_throughput.py
benchmarks/benchmark_throughput.py
+4
-3
vllm/benchmarks/throughput.py
vllm/benchmarks/throughput.py
+3
-2
No files found.
benchmarks/benchmark_throughput.py
View file @
891b9d33
...
...
@@ -146,9 +146,10 @@ async def run_vllm_async(
async
with
build_async_engine_client_from_engine_args
(
engine_args
,
disable_frontend_multiprocessing
)
as
llm
:
model_config
=
await
llm
.
get_model_config
()
assert
all
(
llm
.
model_config
.
max_model_len
>=
(
request
.
prompt_len
+
request
.
expected_output_len
)
model_config
.
max_model_len
>=
(
request
.
prompt_len
+
request
.
expected_output_len
)
for
request
in
requests
),
(
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests."
)
...
...
@@ -599,7 +600,7 @@ if __name__ == "__main__":
"--lora-path"
,
type
=
str
,
default
=
None
,
help
=
"Path to the
lora
adapters to use. This can be an absolute path, "
help
=
"Path to the
LoRA
adapters to use. This can be an absolute path, "
"a relative path, or a Hugging Face model identifier."
)
parser
.
add_argument
(
"--prefix-len"
,
...
...
vllm/benchmarks/throughput.py
View file @
891b9d33
...
...
@@ -148,9 +148,10 @@ async def run_vllm_async(
async
with
build_async_engine_client_from_engine_args
(
engine_args
,
disable_frontend_multiprocessing
)
as
llm
:
model_config
=
await
llm
.
get_model_config
()
assert
all
(
llm
.
model_config
.
max_model_len
>=
(
request
.
prompt_len
+
request
.
expected_output_len
)
model_config
.
max_model_len
>=
(
request
.
prompt_len
+
request
.
expected_output_len
)
for
request
in
requests
),
(
"Please ensure that max_model_len is greater than the sum of"
" prompt_len and expected_output_len for all requests."
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment