Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
3b8f31b3
Unverified
Commit
3b8f31b3
authored
Dec 26, 2025
by
Ning Xie
Committed by
GitHub
Dec 26, 2025
Browse files
[benchmark] use model card root instead of id (#31329)
Signed-off-by:
Andy Xie
<
andy.xning@gmail.com
>
parent
2cd94259
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
6 deletions
+6
-6
vllm/benchmarks/serve.py
vllm/benchmarks/serve.py
+6
-6
No files found.
vllm/benchmarks/serve.py
View file @
3b8f31b3
...
@@ -61,7 +61,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a
...
@@ -61,7 +61,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a
async
def
get_first_model_from_server
(
async
def
get_first_model_from_server
(
base_url
:
str
,
headers
:
dict
|
None
=
None
base_url
:
str
,
headers
:
dict
|
None
=
None
)
->
str
:
)
->
tuple
[
str
,
str
]
:
"""Fetch the first model from the server's /v1/models endpoint."""
"""Fetch the first model from the server's /v1/models endpoint."""
models_url
=
f
"
{
base_url
}
/v1/models"
models_url
=
f
"
{
base_url
}
/v1/models"
async
with
aiohttp
.
ClientSession
()
as
session
:
async
with
aiohttp
.
ClientSession
()
as
session
:
...
@@ -70,7 +70,7 @@ async def get_first_model_from_server(
...
@@ -70,7 +70,7 @@ async def get_first_model_from_server(
response
.
raise_for_status
()
response
.
raise_for_status
()
data
=
await
response
.
json
()
data
=
await
response
.
json
()
if
"data"
in
data
and
len
(
data
[
"data"
])
>
0
:
if
"data"
in
data
and
len
(
data
[
"data"
])
>
0
:
return
data
[
"data"
][
0
][
"id"
]
return
data
[
"data"
][
0
][
"id"
]
,
data
[
"data"
][
0
][
"root"
]
else
:
else
:
raise
ValueError
(
raise
ValueError
(
f
"No models found on the server at
{
base_url
}
. "
f
"No models found on the server at
{
base_url
}
. "
...
@@ -1157,7 +1157,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
...
@@ -1157,7 +1157,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--save-detailed"
,
"--save-detailed"
,
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"When saving the results, whether to include per request "
help
=
"When saving the results, whether to include per request "
"information such as response, error, ttfs, tpots, etc."
,
"information such as response, error, ttf
t
s, tpots, etc."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
"--append-result"
,
"--append-result"
,
...
@@ -1396,12 +1396,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
...
@@ -1396,12 +1396,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
# Fetch model from server if not specified
# Fetch model from server if not specified
if
args
.
model
is
None
:
if
args
.
model
is
None
:
print
(
"Model not specified, fetching first model from server..."
)
print
(
"Model not specified, fetching first model from server..."
)
model_id
=
await
get_first_model_from_server
(
base_url
,
headers
)
model_name
,
model_id
=
await
get_first_model_from_server
(
base_url
,
headers
)
print
(
f
"
Using
model:
{
model_id
}
"
)
print
(
f
"
First model name:
{
model_name
}
, first
model
id
:
{
model_id
}
"
)
else
:
else
:
model_name
=
args
.
served_model_name
model_id
=
args
.
model
model_id
=
args
.
model
model_name
=
args
.
served_model_name
tokenizer_id
=
args
.
tokenizer
if
args
.
tokenizer
is
not
None
else
model_id
tokenizer_id
=
args
.
tokenizer
if
args
.
tokenizer
is
not
None
else
model_id
tokenizer_mode
=
args
.
tokenizer_mode
tokenizer_mode
=
args
.
tokenizer_mode
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment