feat: Replace genai-perf with aiperf (#3533)

Signed-off-by: lkomali <lkomali@nvidia.com>

feat: Replace genai-perf with aiperf (#3533)
Signed-off-by: lkomali <lkomali@nvidia.com>
9f310225 · Harshini Komali · GitHub · 1972f71f · 9f310225 · 9f310225
Unverified Commit 9f310225 authored Oct 16, 2025 by Harshini Komali Committed by GitHub Oct 16, 2025
16 changed files
--- a/benchmarks/router/README.md
+++ b/benchmarks/router/README.md
@@ -13,7 +13,7 @@ This directory contains scripts for benchmarking the Dynamo router with prefix c
 - etcd and NATS running (required for Dynamo coordination)
 - Required Python packages:
  - `dynamo` package (with vllm and frontend modules)
-  - `genai-perf` for benchmarking
+  - `aiperf` for benchmarking
  - `matplotlib` for plotting results
  - `data-generator` package (install with `pip install -e ./benchmarks` from repo root)

@@ -230,11 +230,11 @@ python real_data_benchmark.py --input-dataset trace.jsonl --prefix-root-multipli
 ```

 > [!Note]
-> At the time of writing this documentation, you may need to install the latest genai-perf from the main source branch to loadgen on the trace files:
+> At the time of writing this documentation, you may need to install the latest aiperf from the main source branch to loadgen on the trace files:
 > ```bash
-> pip install git+https://github.com/triton-inference-server/perf_analyzer.git#subdirectory=genai-perf
+> pip install git+https://github.com/ai-dynamo/aiperf.git#subdirectory=aiperf
 > ```
-> However, by the time of release, the genai-perf version included in the vLLM runtime container should be up to date enough to use as-is.
+> However, by the time of release, the aiperf version included in the vLLM runtime container should be up to date enough to use as-is.

 ## Troubleshooting


--- a/benchmarks/router/prefix_ratio_benchmark.py
+++ b/benchmarks/router/prefix_ratio_benchmark.py
@@ -27,7 +27,7 @@ console_handler.setFormatter(formatter)
 logger.addHandler(console_handler)


-def get_genai_perf_cmd(
+def get_aiperf_cmd(
    model,
    tokenizer,  # Add tokenizer parameter
    prefix_ratio,
@@ -40,12 +40,12 @@ def get_genai_perf_cmd(
    artifact_dir,
    url="http://localhost:8888",
 ):
-    """Build genai-perf command based on prefix ratio"""
+    """Build aiperf command based on prefix ratio"""
    prefix_length = int(isl * prefix_ratio)
    synthetic_input_length = int(isl * (1 - prefix_ratio))

    return [
-        "genai-perf",
+        "aiperf",
        "profile",
        "--model",
        model,
@@ -84,10 +84,7 @@ def get_genai_perf_cmd(
        str(num_prefix_prompts),
        "--artifact-dir",
        artifact_dir,
-        "--",
        "-v",
-        "--max-threads",
-        "256",
        "-H",
        "Authorization: Bearer NOT USED",
        "-H",
@@ -95,17 +92,17 @@ def get_genai_perf_cmd(
    ]


-def get_gap_result(artifact_dir: str) -> dict:
-    """Parse genai-perf results from JSON file"""
+def get_aiperf_result(artifact_dir: str) -> dict:
+    """Parse aiperf results from JSON file"""
    json_file_path = None
    for root, _, files in os.walk(artifact_dir):
-        if "profile_export_genai_perf.json" in files:
-            json_file_path = os.path.join(root, "profile_export_genai_perf.json")
+        if "profile_export_aiperf.json" in files:
+            json_file_path = os.path.join(root, "profile_export_aiperf.json")
            break

    if json_file_path is None:
        raise FileNotFoundError(
-            f"profile_export_genai_perf.json not found in {artifact_dir}"
+            f"profile_export_aiperf.json not found in {artifact_dir}"
        )

    with open(json_file_path, "r") as f:
@@ -125,8 +122,8 @@ def run_benchmark_single_url(
    artifact_dir,
    url,
 ) -> Optional[Dict]:
-    """Run genai-perf benchmark for a single URL"""
-    genai_perf_cmd = get_genai_perf_cmd(
+    """Run aiperf benchmark for a single URL"""
+    aiperf_cmd = get_aiperf_cmd(
        model,
        tokenizer,  # Pass tokenizer parameter
        prefix_ratio,
@@ -140,21 +137,21 @@ def run_benchmark_single_url(
        url,
    )

-    logger.info(f"Running command for URL {url}: {' '.join(genai_perf_cmd)}")
+    logger.info(f"Running command for URL {url}: {' '.join(aiperf_cmd)}")

    try:
-        gap_process = subprocess.run(
-            genai_perf_cmd, capture_output=True, text=True, check=True
+        aiperf_process = subprocess.run(
+            aiperf_cmd, capture_output=True, text=True, check=True
        )

-        logger.info(f"Genai-perf profiling completed successfully for URL {url}")
-        logger.info(gap_process.stdout)
+        logger.info(f"AIPerf profiling completed successfully for URL {url}")
+        logger.info(aiperf_process.stdout)

-        gap_result = get_gap_result(artifact_dir)
-        return gap_result
+        aiperf_result = get_aiperf_result(artifact_dir)
+        return aiperf_result

    except subprocess.CalledProcessError as e:
-        logger.error(f"Genai-perf failed for URL {url} with error code: {e.returncode}")
+        logger.error(f"AIPerf failed for URL {url} with error code: {e.returncode}")
        logger.error(f"stderr: {e.stderr}")
        return None

@@ -197,7 +194,7 @@ def run_benchmark(
    output_dir,
    urls,
 ) -> Optional[Dict]:
-    """Run genai-perf benchmark for a specific prefix ratio"""
+    """Run aiperf benchmark for a specific prefix ratio"""
    logger.info(
        f"Running benchmark with prefix_ratio={prefix_ratio}, seed={seed}, URLs={urls}"
    )
@@ -242,7 +239,7 @@ def run_benchmark(
        os.makedirs(artifact_dir, exist_ok=True)
        artifact_dirs.append(artifact_dir)

-        genai_perf_cmd = get_genai_perf_cmd(
+        aiperf_cmd = get_aiperf_cmd(
            model,
            tokenizer,  # Pass tokenizer parameter
            prefix_ratio,
@@ -256,10 +253,10 @@ def run_benchmark(
            url,
        )

-        logger.info(f"Launching process for URL {url}: {' '.join(genai_perf_cmd)}")
+        logger.info(f"Launching process for URL {url}: {' '.join(aiperf_cmd)}")

        process = subprocess.Popen(
-            genai_perf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
+            aiperf_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
        )
        processes.append((process, url, artifact_dir))

@@ -269,18 +266,18 @@ def run_benchmark(
        stdout, stderr = process.communicate()

        if process.returncode == 0:
-            logger.info(f"Genai-perf completed successfully for URL {url}")
+            logger.info(f"AIPerf completed successfully for URL {url}")
            logger.info(stdout)

            try:
-                gap_result = get_gap_result(artifact_dir)
-                results.append(gap_result)
+                aiperf_result = get_aiperf_result(artifact_dir)
+                results.append(aiperf_result)
            except Exception as e:
                logger.error(f"Failed to get results for URL {url}: {e}")
                results.append(None)
        else:
            logger.error(
-                f"Genai-perf failed for URL {url} with error code: {process.returncode}"
+                f"AIPerf failed for URL {url} with error code: {process.returncode}"
            )
            logger.error(f"stderr: {stderr}")
            results.append(None)

--- a/benchmarks/router/real_data_benchmark.py
+++ b/benchmarks/router/real_data_benchmark.py
@@ -24,7 +24,7 @@ console_handler.setFormatter(formatter)
 logger.addHandler(console_handler)


-def get_genai_perf_cmd_for_trace(
+def get_aiperf_cmd_for_trace(
    model,
    tokenizer,
    input_dataset,
@@ -33,7 +33,7 @@ def get_genai_perf_cmd_for_trace(
    url="http://localhost:8888",
 ):
    return [
-        "genai-perf",
+        "aiperf",
        "profile",
        "--model",
        model,
@@ -47,17 +47,13 @@ def get_genai_perf_cmd_for_trace(
        "--url",
        url,
        "--input-file",
-        f"payload:{input_dataset}",
-        "--fixed-schedule",
-        "True",
+        f"{input_dataset}",
+        "--fixed-schedule-auto-offset",
        "--random-seed",
        str(seed),
        "--artifact-dir",
        artifact_dir,
-        "--",
        "-v",
-        "--max-threads",
-        "256",
        "-H",
        "Authorization: Bearer NOT USED",
        "-H",
@@ -73,8 +69,8 @@ def run_benchmark_with_trace(
    url,
    seed,
 ):
-    """Run genai-perf benchmark with a trace dataset"""
-    genai_perf_cmd = get_genai_perf_cmd_for_trace(
+    """Run aiperf benchmark with a trace dataset"""
+    aiperf_cmd = get_aiperf_cmd_for_trace(
        model,
        tokenizer,
        trace_dataset,
@@ -83,17 +79,17 @@ def run_benchmark_with_trace(
        url,
    )

-    logger.info(f"Running genai-perf with trace dataset: {trace_dataset}")
-    logger.info(f"Command: {' '.join(genai_perf_cmd)}")
+    logger.info(f"Running aiperf with trace dataset: {trace_dataset}")
+    logger.info(f"Command: {' '.join(aiperf_cmd)}")

    try:
-        # Run genai-perf and let it output directly to terminal
-        subprocess.run(genai_perf_cmd, check=True)
+        # Run aiperf and let it output directly to terminal
+        subprocess.run(aiperf_cmd, check=True)

-        logger.info("Genai-perf profiling completed successfully")
+        logger.info("AIPerf profiling completed successfully")

    except subprocess.CalledProcessError as e:
-        logger.error(f"Genai-perf failed with error code: {e.returncode}")
+        logger.error(f"AIPerf failed with error code: {e.returncode}")
        logger.error(f"stderr: {e.stderr}")
        raise

@@ -301,7 +297,7 @@ def main():
        logger.info(f"Synthetic trace data saved to: {trace_dataset_path}")

    # Run benchmark with the trace dataset
-    artifact_dir = os.path.join(args.output_dir, "genai_perf_artifacts")
+    artifact_dir = os.path.join(args.output_dir, "aiperf_artifacts")
    os.makedirs(artifact_dir, exist_ok=True)

    run_benchmark_with_trace(

--- a/benchmarks/sin_load_generator/README.md
+++ b/benchmarks/sin_load_generator/README.md
@@ -5,7 +5,7 @@ SPDX-License-Identifier: Apache-2.0

 # Sinusoidal Load Generator

-`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf/genai_perf).
+`sin_synth.py` is a simple script to generate synthetic load with sinusoidal request rate and isl/osl ratio. The output is in [mooncake-style](https://github.com/kvcache-ai/Mooncake) jsonl format, which can be directly used in [AIPerf](https://github.com/ai-dynamo/aiperf/tree/main/aiperf).

 ## Usage


--- a/benchmarks/utils/genai.py
+++ b/benchmarks/utils/genai.py
--- a/benchmarks/utils/workflow.py
+++ b/benchmarks/utils/workflow.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import Dict, List

-from benchmarks.utils.genai import run_concurrency_sweep
+from benchmarks.utils.aiperf import run_concurrency_sweep
 from deploy.utils.kubernetes import is_running_in_cluster



--- a/docs/backends/trtllm/gpt-oss.md
+++ b/docs/backends/trtllm/gpt-oss.md
@@ -404,7 +404,7 @@ curl localhost:8000/v1/chat/completions   -H "Content-Type: application/json"

 ### Performance Testing with AIPerf

-The Dynamo container includes [AIPerf](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/perf_analyzer/aiperf/README.html), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.
+The Dynamo container includes [AIPerf](https://github.com/ai-dynamo/aiperf/blob/main/README.md), NVIDIA's tool for benchmarking generative AI models. This tool helps measure throughput, latency, and other performance metrics for your deployment.

 **Run the following benchmark from inside the container** (after completing the deployment steps above):


--- a/docs/benchmarks/benchmarking.md
+++ b/docs/benchmarks/benchmarking.md
@@ -283,7 +283,7 @@ results/                         # Client-side: ./benchmarks/results/ or custom
 │   └── avg_time_to_first_token_vs_concurrency.png
 ├── <your-benchmark-name>/       # Results for your benchmark (uses your custom name)
 │   ├── c1/                      # Concurrency level 1
-│   │   └── profile_export_genai_perf.json
+│   │   └── profile_export_aiperf.json
 │   ├── c2/                      # Concurrency level 2
 │   ├── c5/                      # Concurrency level 5
 │   └── ...                      # Other concurrency levels (10, 50, 100, 250)
@@ -457,7 +457,7 @@ Results are stored in `/data/results` and follow the same structure as client-si
 /data/results/
 └── <benchmark-name>/                # Results for your benchmark name
    ├── c1/                          # Concurrency level 1
-    │   └── profile_export_genai_perf.json
+    │   └── profile_export_aiperf.json
    ├── c2/                          # Concurrency level 2
    └── ...                          # Other concurrency levels
 ```

--- a/docs/guides/disagg_perf_tuning.md
+++ b/docs/guides/disagg_perf_tuning.md
@@ -56,11 +56,11 @@ Typically, the number of GPUs vs the performance follows the following pattern:
 |       2 |                269 |                   135 |                        1.19x |
 |       4 |                578 |                   144 |                        1.28x |

-The best number of GPUs to use in the prefill and decode engines can be determined by running a few fixed ISL/OSL/concurrency test using [GenAI-Perf](https://github.com/triton-inference-server/perf_analyzer/tree/main/genai-perf) and compare with the SLA.
-GenAI-Perf is pre-installed in the dynamo container.
+The best number of GPUs to use in the prefill and decode engines can be determined by running a few fixed ISL/OSL/concurrency test using [AIPerf](https://github.com/ai-dynamo/aiperf/tree/main) and compare with the SLA.
+AIPerf is pre-installed in the dynamo container.

 > [!Tip]
-> If you are unfamiliar with GenAI-Perf, please see this helpful [tutorial](https://github.com/triton-inference-server/perf_analyzer/blob/main/genai-perf/docs/tutorial.md) to get you started.
+> If you are unfamiliar with AIPerf, please see this helpful [tutorial](https://github.com/ai-dynamo/aiperf/blob/main/docs/tutorial.md) to get you started.

 Besides the parallelization mapping, other common knobs to tune are maximum batch size, maximum number of tokens, and block size.
 For prefill engines, usually a small batch size and large `max_num_token` is preferred.

--- a/examples/basics/kubernetes/Distributed_Inference/README.md
+++ b/examples/basics/kubernetes/Distributed_Inference/README.md
@@ -54,4 +54,4 @@ curl localhost:8000/v1/chat/completions \
    "max_tokens": 30
  }'
  ```
-You can also benchmark the performance of the endpoint by [GenAI-Perf](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/perf_analyzer/genai-perf/README.html)
+You can also benchmark the performance of the endpoint by [AIPerf](https://github.com/ai-dynamo/aiperf/blob/main/README.md)
--- a/examples/basics/kubernetes/shared_frontend/README.md
+++ b/examples/basics/kubernetes/shared_frontend/README.md
@@ -39,4 +39,4 @@ curl localhost:8000/v1/chat/completions \
    "max_tokens": 30
  }'
  ```
-You can also benchmark the performance of the endpoint by [GenAI-Perf](https://docs.nvidia.com/deeplearning/triton-inference-server/user-guide/docs/perf_analyzer/genai-perf/README.html)
+You can also benchmark the performance of the endpoint by [AIPerf](https://github.com/ai-dynamo/aiperf/blob/main/README.md)
--- a/examples/deployments/router_standalone/README.md
+++ b/examples/deployments/router_standalone/README.md
@@ -80,7 +80,7 @@ While not implemented in this example, the router can also operate in a pure pre
 - Integrates with vLLM's OpenAI serving components for request preprocessing and response formatting

 ### `perf.sh`
- Benchmarking script using `genai-perf` to test the router setup
+- Benchmarking script using `aiperf` to test the router setup
 - Configured for streaming chat completions with synthetic workloads
 - Tests concurrent requests to evaluate routing performance


--- a/examples/deployments/router_standalone/perf.sh
+++ b/examples/deployments/router_standalone/perf.sh
@@ -28,7 +28,7 @@ num_unique_prompts=10

 seed=42

-genai-perf profile \
+aiperf profile \
  --model ${model} \
  --tokenizer ${model} \
  --endpoint-type ${type} \
@@ -47,8 +47,6 @@ genai-perf profile \
  --request-count ${num_requests} \
  --num-dataset-entries ${num_unique_prompts} \
  --random-seed ${seed} \
-  -- \
  -v \
-  --max-threads 256 \
  -H 'Authorization: Bearer NOT USED' \
  -H 'Accept: text/event-stream'
--- a/tests/planner/README.md
+++ b/tests/planner/README.md
@@ -215,10 +215,10 @@ When running deployment with sla-planner, to reduce the image pulling time, depl
 kubectl apply -f ./perf_test_configs/image_cache_daemonset.yaml -n <namespace>
 ```

-Then, port-forward or shell into the frontend pod and run GenAI-Perf to get the goodput:
+Then, port-forward or shell into the frontend pod and run AIPerf to get the goodput:

 ```bash
-genai-perf profile \
+aiperf profile \
  --model nvidia/Llama-3.1-8B-Instruct-FP8 \
  --tokenizer nvidia/Llama-3.1-8B-Instruct-FP8 \
  --endpoint-type chat \
@@ -227,11 +227,11 @@ genai-perf profile \
  --input-file payload:/workspace/rr-5-45_i3000o300.jsonl \ # path to the generated load dataset \
  --fixed-schedule True \
  --goodput time_to_first_token:200 inter_token_latency:10 \
-  -- -v -max-threads 64 \
+  -v \
 ```

 > [!NOTE]
-> Sometimes, when sla planner scales down the number of workers, a few requests will error out and cause GenAI-Perf to stuck. We are aware of this issue and are working on fixing it.
+> Sometimes, when sla planner scales down the number of workers, a few requests will error out and cause AIPerf to stuck. We are aware of this issue and are working on fixing it.

 #### E2E Perf Test Results


--- a/tests/planner/scaling/run_scaling_test.sh
+++ b/tests/planner/scaling/run_scaling_test.sh
@@ -64,9 +64,9 @@ check_prerequisites() {
        exit 1
    fi

-    # Check for genai-perf
-    if ! command -v genai-perf &> /dev/null; then
-        log_error "genai-perf not found. This tool is required for load generation."
+    # Check for aiperf
+    if ! command -v aiperf &> /dev/null; then
+        log_error "aiperf not found. This tool is required for load generation."
        log_error "Please install the required dependencies by following the instructions in tests/planner/README.md"
        exit 1
    fi

--- a/tests/planner/utils/load_generator.py
+++ b/tests/planner/utils/load_generator.py
@@ -4,7 +4,7 @@
 """
 Load generation script for SLA planner scaling tests.

-This script uses genai-perf to generate load at specific request rates
+This script uses aiperf to generate load at specific request rates
 to test the planner's scaling behavior.
 """

@@ -24,7 +24,7 @@ logger = logging.getLogger(__name__)


 class LoadGenerator:
-    """Generate load using genai-perf to test planner scaling."""
+    """Generate load using aiperf to test planner scaling."""

    def __init__(
        self,
@@ -40,12 +40,12 @@ class LoadGenerator:
        self.osl = osl
        self.save_results = save_results

-    def _calculate_genai_perf_params(
+    def _calculate_aiperf_params(
        self,
        req_per_sec: float,
    ) -> Dict[str, Any]:
        """
-        Calculate genai-perf parameters to approximate desired request rate.
+        Calculate aiperf parameters to approximate desired request rate.

        Args:
            req_per_sec: Desired requests per second
@@ -71,15 +71,15 @@ class LoadGenerator:
        Args:
            req_per_sec: Target requests per second
            duration_sec: Duration to generate load (seconds)
-            artifact_dir: Directory to store genai-perf artifacts
+            artifact_dir: Directory to store aiperf artifacts

        Returns:
            Dictionary with load test results
        """
        logger.info(f"Generating load: {req_per_sec} req/s for {duration_sec}s")

-        # Calculate genai-perf parameters
-        params = self._calculate_genai_perf_params(req_per_sec)
+        # Calculate aiperf parameters
+        params = self._calculate_aiperf_params(req_per_sec)
        logger.info(f"Using request_rate={params['request_rate']} req/s")

        # Create artifact directory if not provided
@@ -95,9 +95,9 @@ class LoadGenerator:
            f"Adjusted parameters: duration={duration_sec}s, request_count={request_count}"
        )

-        # Build genai-perf command based on coworker's successful approach
+        # Build aiperf command based on coworker's successful approach
        cmd = [
-            "genai-perf",
+            "aiperf",
            "profile",
            "--model",
            self.model,
@@ -124,10 +124,7 @@ class LoadGenerator:
            ),  # Generate reasonable dataset size
            "--artifact-dir",
            artifact_dir,
-            "--",
            "-v",
-            "-max-threads",
-            "64",
        ]

        logger.info(f"Running command: {' '.join(cmd)}")
@@ -135,7 +132,7 @@ class LoadGenerator:
            f"Expected duration: {duration_sec}s, timeout: {max(duration_sec * 2 + 120, int(duration_sec * 2.5))}s"
        )

-        # Run genai-perf (async)
+        # Run aiperf (async)
        start_time = time.time()
        # More generous timeout for high-load tests - allow 2x duration + 2 minutes buffer
        timeout = max(duration_sec * 2 + 120, int(duration_sec * 2.5))
@@ -152,7 +149,7 @@ class LoadGenerator:
            except asyncio.TimeoutError:
                proc.kill()
                await proc.communicate()
-                logger.error("genai-perf timed out")
+                logger.error("aiperf timed out")
                raise RuntimeError("Load generation timed out")

            end_time = time.time()
@@ -160,13 +157,9 @@ class LoadGenerator:

            # Persist logs for debugging
            try:
-                with open(
-                    os.path.join(artifact_dir, "genai_perf.stdout.log"), "wb"
-                ) as f:
+                with open(os.path.join(artifact_dir, "aiperf.stdout.log"), "wb") as f:
                    f.write(stdout or b"")
-                with open(
-                    os.path.join(artifact_dir, "genai_perf.stderr.log"), "wb"
-                ) as f:
+                with open(os.path.join(artifact_dir, "aiperf.stderr.log"), "wb") as f:
                    f.write(stderr or b"")
            except Exception:
                pass
@@ -174,31 +167,31 @@ class LoadGenerator:
            if proc.returncode == 0:
                logger.info("Load generation completed successfully")
                logger.info(f"Actual duration: {actual_duration:.2f}s")
-                results = self._parse_genai_perf_results(artifact_dir)
+                results = self._parse_aiperf_results(artifact_dir)
                results.update(
                    {
                        "requested_req_per_sec": req_per_sec,
                        "actual_duration": actual_duration,
                        "target_duration": duration_sec,
-                        "genai_perf_params": params,
+                        "aiperf_params": params,
                        "artifact_dir": artifact_dir,
                        "success": True,
                    }
                )
                return results
            else:
-                logger.error(f"genai-perf failed with return code {proc.returncode}")
-                raise RuntimeError("genai-perf failed; see logs in artifact dir")
+                logger.error(f"aiperf failed with return code {proc.returncode}")
+                raise RuntimeError("aiperf failed; see logs in artifact dir")
        except RuntimeError:
            raise
        except Exception as e:
-            logger.error(f"genai-perf execution error: {e}")
+            logger.error(f"aiperf execution error: {e}")
            raise

-    def _parse_genai_perf_results(self, artifact_dir: str) -> Dict[str, Any]:
-        """Parse genai-perf results from artifact directory."""
+    def _parse_aiperf_results(self, artifact_dir: str) -> Dict[str, Any]:
+        """Parse aiperf results from artifact directory."""
        try:
-            # Look for the profile_export_genai_perf.json file
+            # Look for the profile_export_aiperf.json file
            json_files = [f for f in os.listdir(artifact_dir) if f.endswith(".json")]
            if not json_files:
                logger.warning("No JSON results found in artifact directory")
@@ -207,7 +200,7 @@ class LoadGenerator:
            # Main results file
            results_file = None
            for json_file in json_files:
-                if "profile_export" in json_file or "genai_perf" in json_file:
+                if "profile_export" in json_file or "aiperf" in json_file:
                    results_file = os.path.join(artifact_dir, json_file)
                    break

@@ -236,7 +229,7 @@ class LoadGenerator:
                            ).get("avg", 0),
                        }
                    )
-            if not results and "profile_export_genai_perf" in data:
+            if not results and "profile_export_aiperf" in data:
                summary = data.get("summary", {})
                results.update(
                    {
@@ -250,7 +243,7 @@ class LoadGenerator:
            return results

        except Exception as e:
-            logger.warning(f"Failed to parse genai-perf results: {e}")
+            logger.warning(f"Failed to parse aiperf results: {e}")
            return {}

    async def run_scaling_test(self) -> Dict[str, Any]: