Indicate compile mode in the benchmark results (#32990)

Signed-off-by: Huy Do <huydhn@gmail.com>

Indicate compile mode in the benchmark results (#32990)
Signed-off-by: Huy Do <huydhn@gmail.com>
9ca66ecc · Huy Do · GitHub · c3a9752b · 9ca66ecc · 9ca66ecc
Unverified Commit 9ca66ecc authored Jan 30, 2026 by Huy Do Committed by GitHub Jan 30, 2026
Showing with 42 additions and 3 deletions

.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh ...formance-benchmarks/scripts/run-performance-benchmarks.sh +8 -3

vllm/benchmarks/lib/utils.py vllm/benchmarks/lib/utils.py +34 -0

No files found.
--- a/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
+++ b/.buildkite/performance-benchmarks/scripts/run-performance-benchmarks.sh
@@ -393,6 +393,11 @@ run_serving_tests() {
      fi
    fi
+    # save the compilation mode and optimization level on the serving results
+    # whenever they are set
+    compilation_config_mode=$(echo "$server_params" | jq -r '."compilation_config.mode" // empty')
+    optimization_level=$(echo "$server_params" | jq -r '.optimization_level // empty')
    # iterate over different QPS
    for qps in $qps_list; do
      # remove the surrounding single quote from qps
@@ -406,15 +411,15 @@ run_serving_tests() {
      for max_concurrency in $max_concurrency_list; do
        new_test_name=$test_name"_qps_"$qps"_concurrency_"$max_concurrency
        echo " new test name $new_test_name"
-        # pass the tensor parallel size to the client so that it can be displayed
+        # pass the tensor parallel size, the compilation mode, and the optimization
-        # on the benchmark dashboard
+        # level to the client so that they can be used on the benchmark dashboard
        client_command="vllm bench serve \
          --save-result \
          --result-dir $RESULTS_FOLDER \
          --result-filename ${new_test_name}.json \
          --request-rate $qps \
          --max-concurrency $max_concurrency \
-          --metadata "tensor_parallel_size=$tp" \
+          --metadata tensor_parallel_size=$tp compilation_config.mode=$compilation_config_mode optimization_level=$optimization_level \
          $client_args $client_remote_args "
        echo "Running test case $test_name with qps $qps"

--- a/vllm/benchmarks/lib/utils.py
+++ b/vllm/benchmarks/lib/utils.py
@@ -8,6 +8,32 @@ import os
 from typing import Any
+def extract_field(
+    args: argparse.Namespace, extra_info: dict[str, Any], field_name: str
+) -> str:
+    if field_name in extra_info:
+        return extra_info[field_name]
+    v = args
+    # For example, args.compilation_config.mode
+    for nested_field in field_name.split("."):
+        if not hasattr(v, nested_field):
+            return ""
+        v = getattr(v, nested_field)
+    return v
+def use_compile(args: argparse.Namespace, extra_info: dict[str, Any]) -> bool:
+    """
+    Check if the benchmark is run with torch.compile
+    """
+    return not (
+        extract_field(args, extra_info, "compilation_config.mode") == "0"
+        or "eager" in getattr(args, "output_json", "")
+        or "eager" in getattr(args, "result_filename", "")
+    )
 def convert_to_pytorch_benchmark_format(
    args: argparse.Namespace, metrics: dict[str, list], extra_info: dict[str, Any]
 ) -> list:
@@ -26,6 +52,14 @@ def convert_to_pytorch_benchmark_format(
                "name": "vLLM benchmark",
                "extra_info": {
                    "args": vars(args),
+                    "compilation_config.mode": extract_field(
+                        args, extra_info, "compilation_config.mode"
+                    ),
+                    "optimization_level": extract_field(
+                        args, extra_info, "optimization_level"
+                    ),
+                    # A boolean field used by vLLM benchmark HUD dashboard
+                    "use_compile": use_compile(args, extra_info),
                },
            },
            "model": {