runner.py

# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import subprocess
from pathlib import Path
from typing import List


def _build_aiperf_cmd(
    model: str,
    port: int,
    sweep_mode: str,
    sweep_value: int,
    conversation_num: int,
    warmup_count: int,
    input_file: str,
    osl: int,
    artifact_dir: Path,
) -> List[str]:
    if sweep_mode == "concurrency":
        sweep_flag = "--concurrency"
    else:
        sweep_flag = "--request-rate"

    return [
        "aiperf",
        "profile",
        "-m",
        model,
        "-u",
        f"http://localhost:{port}",
        sweep_flag,
        str(sweep_value),
        "--conversation-num",
        str(conversation_num),
        "--warmup-request-count",
        str(warmup_count),
        "--input-file",
        input_file,
        "--custom-dataset-type",
        "single_turn",
        "--extra-inputs",
        f"max_tokens:{osl}",
        "--extra-inputs",
        f"min_tokens:{osl}",
        "--extra-inputs",
        "ignore_eos:true",
        "--extra-inputs",
        "stream:true",
        "--streaming",
        "--artifact-dir",
        str(artifact_dir),
        "--ui",
        "none",
        "--no-server-metrics",
    ]


def run_aiperf_single(
    model: str,
    port: int,
    sweep_mode: str,
    sweep_value: int,
    conversation_num: int,
    warmup_count: int,
    input_file: str,
    osl: int,
    artifact_dir: Path,
) -> None:
    """Run a single aiperf profile invocation."""
    artifact_dir.mkdir(parents=True, exist_ok=True)
    cmd = _build_aiperf_cmd(
        model=model,
        port=port,
        sweep_mode=sweep_mode,
        sweep_value=sweep_value,
        conversation_num=conversation_num,
        warmup_count=warmup_count,
        input_file=input_file,
        osl=osl,
        artifact_dir=artifact_dir,
    )

    print(f"  aiperf {sweep_mode}={sweep_value} -> {artifact_dir}", flush=True)
    proc = subprocess.run(cmd, capture_output=True, text=True)

    if proc.returncode != 0:
        print(f"  aiperf FAILED (exit {proc.returncode})", flush=True)
        for stream_name, stream in [("stderr", proc.stderr), ("stdout", proc.stdout)]:
            if stream:
                for line in stream.strip().splitlines()[-15:]:
                    print(f"    [{stream_name}] {line}", flush=True)
        raise subprocess.CalledProcessError(
            proc.returncode, cmd, output=proc.stdout, stderr=proc.stderr
        )

    print(f"  aiperf {sweep_mode}={sweep_value} done.", flush=True)


def run_sweep(
    model: str,
    port: int,
    sweep_mode: str,
    sweep_values: List[int],
    conversation_num: int,
    warmup_count: int,
    input_file: str,
    osl: int,
    output_dir: Path,
) -> None:
    """Run aiperf across all sweep values, writing results under output_dir/{mode}{N}/."""
    output_dir.mkdir(parents=True, exist_ok=True)

    for value in sorted(sweep_values):
        run_aiperf_single(
            model=model,
            port=port,
            sweep_mode=sweep_mode,
            sweep_value=value,
            conversation_num=conversation_num,
            warmup_count=warmup_count,
            input_file=input_file,
            osl=osl,
            artifact_dir=output_dir / f"{sweep_mode}{value}",
        )

    print(f"Sweep complete. Results in {output_dir}", flush=True)