Unverified Commit 3b8f31b3 authored by Ning Xie's avatar Ning Xie Committed by GitHub
Browse files

[benchmark] use model card root instead of id (#31329)


Signed-off-by: default avatarAndy Xie <andy.xning@gmail.com>
parent 2cd94259
...@@ -61,7 +61,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a ...@@ -61,7 +61,7 @@ TERM_PLOTLIB_AVAILABLE = (importlib.util.find_spec("termplotlib") is not None) a
async def get_first_model_from_server( async def get_first_model_from_server(
base_url: str, headers: dict | None = None base_url: str, headers: dict | None = None
) -> str: ) -> tuple[str, str]:
"""Fetch the first model from the server's /v1/models endpoint.""" """Fetch the first model from the server's /v1/models endpoint."""
models_url = f"{base_url}/v1/models" models_url = f"{base_url}/v1/models"
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession() as session:
...@@ -70,7 +70,7 @@ async def get_first_model_from_server( ...@@ -70,7 +70,7 @@ async def get_first_model_from_server(
response.raise_for_status() response.raise_for_status()
data = await response.json() data = await response.json()
if "data" in data and len(data["data"]) > 0: if "data" in data and len(data["data"]) > 0:
return data["data"][0]["id"] return data["data"][0]["id"], data["data"][0]["root"]
else: else:
raise ValueError( raise ValueError(
f"No models found on the server at {base_url}. " f"No models found on the server at {base_url}. "
...@@ -1157,7 +1157,7 @@ def add_cli_args(parser: argparse.ArgumentParser): ...@@ -1157,7 +1157,7 @@ def add_cli_args(parser: argparse.ArgumentParser):
"--save-detailed", "--save-detailed",
action="store_true", action="store_true",
help="When saving the results, whether to include per request " help="When saving the results, whether to include per request "
"information such as response, error, ttfs, tpots, etc.", "information such as response, error, ttfts, tpots, etc.",
) )
parser.add_argument( parser.add_argument(
"--append-result", "--append-result",
...@@ -1396,12 +1396,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: ...@@ -1396,12 +1396,12 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]:
# Fetch model from server if not specified # Fetch model from server if not specified
if args.model is None: if args.model is None:
print("Model not specified, fetching first model from server...") print("Model not specified, fetching first model from server...")
model_id = await get_first_model_from_server(base_url, headers) model_name, model_id = await get_first_model_from_server(base_url, headers)
print(f"Using model: {model_id}") print(f"First model name: {model_name}, first model id: {model_id}")
else: else:
model_name = args.served_model_name
model_id = args.model model_id = args.model
model_name = args.served_model_name
tokenizer_id = args.tokenizer if args.tokenizer is not None else model_id tokenizer_id = args.tokenizer if args.tokenizer is not None else model_id
tokenizer_mode = args.tokenizer_mode tokenizer_mode = args.tokenizer_mode
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment