"vscode:/vscode.git/clone" did not exist on "a1d874224d9c29ae84f3850474b4816f0ed9574b"
Unverified Commit 9ca66ecc authored by Huy Do's avatar Huy Do Committed by GitHub
Browse files

Indicate compile mode in the benchmark results (#32990)


Signed-off-by: default avatarHuy Do <huydhn@gmail.com>
parent c3a9752b
...@@ -393,6 +393,11 @@ run_serving_tests() { ...@@ -393,6 +393,11 @@ run_serving_tests() {
fi fi
fi fi
# save the compilation mode and optimization level on the serving results
# whenever they are set
compilation_config_mode=$(echo "$server_params" | jq -r '."compilation_config.mode" // empty')
optimization_level=$(echo "$server_params" | jq -r '.optimization_level // empty')
# iterate over different QPS # iterate over different QPS
for qps in $qps_list; do for qps in $qps_list; do
# remove the surrounding single quote from qps # remove the surrounding single quote from qps
...@@ -406,15 +411,15 @@ run_serving_tests() { ...@@ -406,15 +411,15 @@ run_serving_tests() {
for max_concurrency in $max_concurrency_list; do for max_concurrency in $max_concurrency_list; do
new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency
echo " new test name $new_test_name" echo " new test name $new_test_name"
# pass the tensor parallel size to the client so that it can be displayed # pass the tensor parallel size, the compilation mode, and the optimization
# on the benchmark dashboard # level to the client so that they can be used on the benchmark dashboard
client_command="vllm bench serve \ client_command="vllm bench serve \
--save-result \ --save-result \
--result-dir $RESULTS_FOLDER \ --result-dir $RESULTS_FOLDER \
--result-filename ${new_test_name}.json \ --result-filename ${new_test_name}.json \
--request-rate $qps \ --request-rate $qps \
--max-concurrency $max_concurrency \ --max-concurrency $max_concurrency \
--metadata "tensor_parallel_size=$tp" \ --metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \
$client_args $client_remote_args " $client_args $client_remote_args "
echo "Running test case $test_name with qps $qps" echo "Running test case $test_name with qps $qps"
......
...@@ -8,6 +8,32 @@ import os ...@@ -8,6 +8,32 @@ import os
from typing import Any from typing import Any
def extract_field(
args: argparse.Namespace, extra_info: dict[str, Any], field_name: str
) -> str:
if field_name in extra_info:
return extra_info[field_name]
v = args
# For example, args.compilation_config.mode
for nested_field in field_name.split("."):
if not hasattr(v, nested_field):
return ""
v = getattr(v, nested_field)
return v
def use_compile(args: argparse.Namespace, extra_info: dict[str, Any]) -> bool:
"""
Check if the benchmark is run with torch.compile
"""
return not (
extract_field(args, extra_info, "compilation_config.mode") == "0"
or "eager" in getattr(args, "output_json", "")
or "eager" in getattr(args, "result_filename", "")
)
def convert_to_pytorch_benchmark_format( def convert_to_pytorch_benchmark_format(
args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any] args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any]
) -> list: ) -> list:
...@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format( ...@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
"name": "vLLM benchmark", "name": "vLLM benchmark",
"extra_info": { "extra_info": {
"args": vars(args), "args": vars(args),
"compilation_config.mode": extract_field(
args, extra_info, "compilation_config.mode"
),
"optimization_level": extract_field(
args, extra_info, "optimization_level"
),
# A boolean field used by vLLM benchmark HUD dashboard
"use_compile": use_compile(args, extra_info),
}, },
}, },
"model": { "model": {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment