runner.py 3.39 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import subprocess
from pathlib import Path
from typing import List


def _build_aiperf_cmd(
    model: str,
    port: int,
14
15
    sweep_mode: str,
    sweep_value: int,
16
    conversation_num: int,
17
18
19
20
21
    warmup_count: int,
    input_file: str,
    osl: int,
    artifact_dir: Path,
) -> List[str]:
22
23
24
25
26
    if sweep_mode == "concurrency":
        sweep_flag = "--concurrency"
    else:
        sweep_flag = "--request-rate"

27
28
29
30
31
32
33
    return [
        "aiperf",
        "profile",
        "-m",
        model,
        "-u",
        f"http://localhost:{port}",
34
35
        sweep_flag,
        str(sweep_value),
36
37
        "--conversation-num",
        str(conversation_num),
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
        "--warmup-request-count",
        str(warmup_count),
        "--input-file",
        input_file,
        "--custom-dataset-type",
        "single_turn",
        "--extra-inputs",
        f"max_tokens:{osl}",
        "--extra-inputs",
        f"min_tokens:{osl}",
        "--extra-inputs",
        "ignore_eos:true",
        "--extra-inputs",
        "stream:true",
        "--streaming",
        "--artifact-dir",
        str(artifact_dir),
        "--ui",
        "none",
        "--no-server-metrics",
    ]


def run_aiperf_single(
    model: str,
    port: int,
64
65
    sweep_mode: str,
    sweep_value: int,
66
    conversation_num: int,
67
68
69
70
71
72
73
74
75
76
    warmup_count: int,
    input_file: str,
    osl: int,
    artifact_dir: Path,
) -> None:
    """Run a single aiperf profile invocation."""
    artifact_dir.mkdir(parents=True, exist_ok=True)
    cmd = _build_aiperf_cmd(
        model=model,
        port=port,
77
78
        sweep_mode=sweep_mode,
        sweep_value=sweep_value,
79
        conversation_num=conversation_num,
80
81
82
83
84
85
        warmup_count=warmup_count,
        input_file=input_file,
        osl=osl,
        artifact_dir=artifact_dir,
    )

86
    print(f"  aiperf {sweep_mode}={sweep_value} -> {artifact_dir}", flush=True)
87
88
89
90
91
92
93
94
95
96
97
98
    proc = subprocess.run(cmd, capture_output=True, text=True)

    if proc.returncode != 0:
        print(f"  aiperf FAILED (exit {proc.returncode})", flush=True)
        for stream_name, stream in [("stderr", proc.stderr), ("stdout", proc.stdout)]:
            if stream:
                for line in stream.strip().splitlines()[-15:]:
                    print(f"    [{stream_name}] {line}", flush=True)
        raise subprocess.CalledProcessError(
            proc.returncode, cmd, output=proc.stdout, stderr=proc.stderr
        )

99
    print(f"  aiperf {sweep_mode}={sweep_value} done.", flush=True)
100
101


102
def run_sweep(
103
104
    model: str,
    port: int,
105
106
    sweep_mode: str,
    sweep_values: List[int],
107
    conversation_num: int,
108
109
110
111
112
    warmup_count: int,
    input_file: str,
    osl: int,
    output_dir: Path,
) -> None:
113
    """Run aiperf across all sweep values, writing results under output_dir/{mode}{N}/."""
114
115
    output_dir.mkdir(parents=True, exist_ok=True)

116
    for value in sorted(sweep_values):
117
118
119
        run_aiperf_single(
            model=model,
            port=port,
120
121
            sweep_mode=sweep_mode,
            sweep_value=value,
122
            conversation_num=conversation_num,
123
124
125
            warmup_count=warmup_count,
            input_file=input_file,
            osl=osl,
126
            artifact_dir=output_dir / f"{sweep_mode}{value}",
127
128
129
        )

    print(f"Sweep complete. Results in {output_dir}", flush=True)