Unverified Commit f222e555 authored by Qi Wang's avatar Qi Wang Committed by GitHub
Browse files

fix(benchmarks): use --conversation-num for grouped single_turn sweeps (DIS-1807) (#8458)


Signed-off-by: default avatarQi Wang <qiwa@nvidia.com>
Co-authored-by: default avatarClaude Opus 4.7 (1M context) <noreply@anthropic.com>
parent f12c8605
......@@ -29,7 +29,9 @@ python -m benchmarks.multimodal.sweep \
model: Qwen/Qwen3-VL-30B-A3B-Instruct-FP8
concurrencies: [16, 32, 64, 128, 256]
osl: 150 # output sequence length
request_count: 1000 # requests per concurrency level
conversation_num: 10 # sessions per sweep value (optional; derived from
# input JSONL's unique session_id count if unset;
# flat JSONLs count each row as a 1-turn conversation)
warmup_count: 5
port: 8000
timeout: 900 # seconds to wait for server readiness
......@@ -64,10 +66,21 @@ python -m benchmarks.multimodal.sweep \
--config experiments/embedding_cache/vllm_serve.yaml \
--concurrencies 1,2,4 \
--osl 200 \
--request-count 50 \
--conversation-num 10 \
--skip-plots
```
## Warmup semantics
`warmup_count: N` is a **request (turn) budget**, not a session budget. For a
10×10 JSONL with `warmup_count: 2`, warmup issues 2 total requests — both go
to `user_0` (turns 0 and 1) because aiperf's continuation-turn priority keeps
feeding the in-flight session until its budget runs out. Warmup does NOT
consume 2 full sessions (20 requests). Profiling then starts at `user_1`,
runs `user_1..user_9` to completion, and wraps to a fresh `user_0` instance
for the 10th session. Keep `warmup_count` small (≤ turns-per-session) so
warmup stays within a single session's prefix.
## Output Directory Structure
Given the config above with two input files and two configs (`cache-off`,
......
......@@ -59,10 +59,14 @@ def parse_args(argv=None) -> argparse.Namespace:
help="Override output sequence length.",
)
parser.add_argument(
"--request-count",
"--conversation-num",
type=int,
default=None,
help="Override request count per sweep value.",
help=(
"Override number of conversations (sessions) per sweep value. "
"If unset, derived from the input JSONL's unique session_id count "
"(flat JSONLs count each row as a 1-turn conversation)."
),
)
parser.add_argument(
"--skip-plots",
......
......@@ -32,7 +32,7 @@ class SweepConfig:
request_rates: Optional[List[int]] = None
concurrencies: Optional[List[int]] = None
osl: int = 150
request_count: int = 1000
conversation_num: Optional[int] = None
warmup_count: int = 5
port: int = 8000
timeout: int = 600
......@@ -127,7 +127,7 @@ def load_config(
request_rates=yaml_request_rates,
concurrencies=yaml_concurrencies,
osl=raw.get("osl", 150),
request_count=raw.get("request_count", 1000),
conversation_num=raw.get("conversation_num"),
warmup_count=raw.get("warmup_count", 5),
port=raw.get("port", 8000),
timeout=raw.get("timeout", 600),
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
from __future__ import annotations
import json
from pathlib import Path
def count_session_ids(jsonl_path: str | Path) -> int:
"""Count unique ``session_id`` values in a JSONL dataset.
Used to derive ``conversation_num`` for the sweep when the user hasn't set
it explicitly. Rows without ``session_id`` count as distinct sessions
(matches aiperf's per-row UUID fallback in ``SingleTurnDatasetLoader``).
For multi_turn rows (``{"type": "multi_turn", "session_id": ..., "turns": [...]}``),
the top-level ``session_id`` is what counts.
"""
sessions: set[str] = set()
anon_count = 0
with open(jsonl_path) as f:
for line in f:
line = line.strip()
if not line:
continue
row = json.loads(line)
sid = row.get("session_id")
if sid is None:
anon_count += 1
else:
sessions.add(str(sid))
return len(sessions) + anon_count
......@@ -7,6 +7,7 @@ from pathlib import Path
from typing import List, Optional
from .config import BenchmarkConfig, SweepConfig, input_file_tag, resolve_repo_root
from .dataset_shape import count_session_ids
from .runner import run_aiperf_single
from .server import ServerManager
......@@ -18,6 +19,26 @@ def _resolve_workflow(workflow: str, repo_root: Path) -> str:
return str(repo_root / p)
def _resolve_conversation_num(config: SweepConfig, input_file: str) -> int:
"""Pick conversation_num for this input file: explicit value from config wins,
otherwise derive from the JSONL's unique session_id count. Error if an
explicit value exceeds the JSONL's capacity (sampler would wrap)."""
detected = count_session_ids(input_file)
if config.conversation_num is None:
print(
f" conversation_num derived from {input_file}: {detected}",
flush=True,
)
return detected
if config.conversation_num > detected:
raise ValueError(
f"conversation_num={config.conversation_num} exceeds unique "
f"session_id count ({detected}) in {input_file}. SequentialSampler "
f"would wrap. Set conversation_num <= {detected} or reshape the JSONL."
)
return config.conversation_num
def _print_banner(title: str, char: str = "=", width: int = 70) -> None:
print(f"\n{char * width}")
print(f" {title}")
......@@ -46,7 +67,8 @@ def run_sweep(
print(f" Sweep mode: {sweep_mode}")
print(f" Sweep values: {sweep_values}")
print(f" OSL: {config.osl}")
print(f" Requests: {config.request_count} per {sweep_mode}")
if config.conversation_num is not None:
print(f" Conversations: {config.conversation_num} per {sweep_mode}")
print(
f" Restart: {'every run' if config.restart_server_every_benchmark else 'per config'}"
)
......@@ -98,11 +120,13 @@ def _run_config(
_print_banner(f"Config: {bench_cfg.label}", char="#")
# Collect pending runs, skipping those with existing results.
pending_runs: List[tuple[str, str, int, Path]] = []
pending_runs: List[tuple[str, str, int, Path, int]] = []
for input_file in config.input_files:
file_tag = input_file_tag(input_file)
sweep_dir = output_base / file_tag / bench_cfg.label
conversation_num = _resolve_conversation_num(config, input_file)
for value in sorted(sweep_values):
artifact_dir = sweep_dir / f"{sweep_mode}{value}"
......@@ -113,7 +137,9 @@ def _run_config(
flush=True,
)
else:
pending_runs.append((input_file, file_tag, value, artifact_dir))
pending_runs.append(
(input_file, file_tag, value, artifact_dir, conversation_num)
)
if not pending_runs:
print(f" All runs skipped for {bench_cfg.label}", flush=True)
......@@ -128,7 +154,7 @@ def _run_config(
)
try:
for input_file, file_tag, value, artifact_dir in pending_runs:
for input_file, file_tag, value, artifact_dir, conversation_num in pending_runs:
_print_banner(
f"[{file_tag}] Config: {bench_cfg.label} " f"{sweep_mode}={value}",
char="-",
......@@ -148,7 +174,7 @@ def _run_config(
port=config.port,
sweep_mode=sweep_mode,
sweep_value=value,
request_count=config.request_count,
conversation_num=conversation_num,
warmup_count=config.warmup_count,
input_file=input_file,
osl=config.osl,
......
......@@ -13,7 +13,7 @@ def _build_aiperf_cmd(
port: int,
sweep_mode: str,
sweep_value: int,
request_count: int,
conversation_num: int,
warmup_count: int,
input_file: str,
osl: int,
......@@ -33,8 +33,8 @@ def _build_aiperf_cmd(
f"http://localhost:{port}",
sweep_flag,
str(sweep_value),
"--request-count",
str(request_count),
"--conversation-num",
str(conversation_num),
"--warmup-request-count",
str(warmup_count),
"--input-file",
......@@ -63,7 +63,7 @@ def run_aiperf_single(
port: int,
sweep_mode: str,
sweep_value: int,
request_count: int,
conversation_num: int,
warmup_count: int,
input_file: str,
osl: int,
......@@ -76,7 +76,7 @@ def run_aiperf_single(
port=port,
sweep_mode=sweep_mode,
sweep_value=sweep_value,
request_count=request_count,
conversation_num=conversation_num,
warmup_count=warmup_count,
input_file=input_file,
osl=osl,
......@@ -104,7 +104,7 @@ def run_sweep(
port: int,
sweep_mode: str,
sweep_values: List[int],
request_count: int,
conversation_num: int,
warmup_count: int,
input_file: str,
osl: int,
......@@ -119,7 +119,7 @@ def run_sweep(
port=port,
sweep_mode=sweep_mode,
sweep_value=value,
request_count=request_count,
conversation_num=conversation_num,
warmup_count=warmup_count,
input_file=input_file,
osl=osl,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment