Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
4292e3b8
Unverified
Commit
4292e3b8
authored
Feb 28, 2026
by
Cyrus Leung
Committed by
GitHub
Feb 28, 2026
Browse files
[Benchmark] Improve UX of sweep scripts (#35600)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
24d6ea8a
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
191 additions
and
128 deletions
+191
-128
docs/benchmarking/sweeps.md
docs/benchmarking/sweeps.md
+14
-10
vllm/benchmarks/sweep/plot.py
vllm/benchmarks/sweep/plot.py
+3
-5
vllm/benchmarks/sweep/plot_pareto.py
vllm/benchmarks/sweep/plot_pareto.py
+2
-3
vllm/benchmarks/sweep/serve.py
vllm/benchmarks/sweep/serve.py
+80
-45
vllm/benchmarks/sweep/serve_workload.py
vllm/benchmarks/sweep/serve_workload.py
+22
-32
vllm/benchmarks/sweep/startup.py
vllm/benchmarks/sweep/startup.py
+70
-33
No files found.
docs/benchmarking/sweeps.md
View file @
4292e3b8
...
...
@@ -72,7 +72,7 @@ Follow these steps to run the script:
]
```
5.
D
et
ermine where you want to save the results, and pass that to
`--output-dir`
.
5.
S
et
`--output-dir`
and optionally
`--experiment-name`
to control where to save the results
.
Example command:
...
...
@@ -82,7 +82,8 @@ vllm bench sweep serve \
--bench-cmd
'vllm bench serve --model meta-llama/Llama-2-7b-chat-hf --backend vllm --endpoint /v1/completions --dataset-name sharegpt --dataset-path benchmarks/ShareGPT_V3_unfiltered_cleaned_split.json'
\
--serve-params
benchmarks/serve_hparams.json
\
--bench-params
benchmarks/bench_hparams.json
\
-o
benchmarks/results
--output-dir
benchmarks/results
\
--experiment-name
demo
```
By default, each parameter combination is benchmarked 3 times to make the results more reliable. You can adjust the number of runs by setting
`--num-runs`
.
...
...
@@ -118,7 +119,8 @@ vllm bench sweep serve_workload \
--serve-params
benchmarks/serve_hparams.json
\
--bench-params
benchmarks/bench_hparams.json
\
--num-runs
1
\
-o
benchmarks/results
--output-dir
benchmarks/results
\
--experiment-name
demo
```
The algorithm for exploring different workload levels can be summarized as follows:
...
...
@@ -186,7 +188,8 @@ vllm bench sweep startup \
--startup-cmd
'vllm bench startup --model Qwen/Qwen3-0.6B'
\
--serve-params
benchmarks/serve_hparams.json
\
--startup-params
benchmarks/startup_hparams.json
\
-o
benchmarks/results
--output-dir
benchmarks/results
\
--experiment-name
demo
```
!!! important
...
...
@@ -204,11 +207,10 @@ Control the variables to plot via `--var-x` and `--var-y`, optionally applying `
Example commands for visualizing
[
Workload Explorer
](
#workload-explorer
)
results:
```
bash
# Name of the directory that stores the results
TIMESTAMP
=
$1
EXPERIMENT_DIR
=
${
1
:-
"benchmarks/results/demo"
}
# Latency increases as the workload increases
vllm bench sweep plot
benchmarks/results/
$TIMESTAMP
\
vllm bench sweep plot
$EXPERIMENT_DIR
\
--var-x
max_concurrency
\
--var-y
median_ttft_ms
\
--col-by
_benchmark_name
\
...
...
@@ -216,7 +218,7 @@ vllm bench sweep plot benchmarks/results/$TIMESTAMP \
--fig-name
latency_curve
# Throughput saturates as workload increases
vllm bench sweep plot
benchmarks/results/
$TIMESTAMP
\
vllm bench sweep plot
$EXPERIMENT_DIR
\
--var-x
max_concurrency
\
--var-y
total_token_throughput
\
--col-by
_benchmark_name
\
...
...
@@ -224,7 +226,7 @@ vllm bench sweep plot benchmarks/results/$TIMESTAMP \
--fig-name
throughput_curve
# Tradeoff between latency and throughput
vllm bench sweep plot
benchmarks/results/
$TIMESTAMP
\
vllm bench sweep plot
$EXPERIMENT_DIR
\
--var-x
total_token_throughput
\
--var-y
median_ttft_ms
\
--col-by
_benchmark_name
\
...
...
@@ -249,7 +251,9 @@ Higher concurrency or batch size can raise GPU efficiency (per-GPU), but can add
Example:
```
bash
vllm bench sweep plot_pareto benchmarks/results/<timestamp>
\
EXPERIMENT_DIR
=
${
1
:-
"benchmarks/results/demo"
}
vllm bench sweep plot_pareto
$EXPERIMENT_DIR
\
--label-by
max_concurrency,tensor_parallel_size,pipeline_parallel_size
```
...
...
vllm/benchmarks/sweep/plot.py
View file @
4292e3b8
...
...
@@ -499,7 +499,7 @@ class SweepPlotArgs:
@
classmethod
def
from_cli_args
(
cls
,
args
:
argparse
.
Namespace
):
output_dir
=
Path
(
args
.
OUTPU
T_DIR
)
output_dir
=
Path
(
args
.
EXPERIMEN
T_DIR
)
if
not
output_dir
.
exists
():
raise
ValueError
(
f
"No parameter sweep results under
{
output_dir
}
"
)
...
...
@@ -531,11 +531,9 @@ class SweepPlotArgs:
@
classmethod
def
add_cli_args
(
cls
,
parser
:
argparse
.
ArgumentParser
)
->
argparse
.
ArgumentParser
:
parser
.
add_argument
(
"
OUTPU
T_DIR"
,
"
EXPERIMEN
T_DIR"
,
type
=
str
,
default
=
"results"
,
help
=
"The directory containing the results to plot, "
"i.e., the `--output-dir` argument to the parameter sweep script."
,
help
=
"The directory containing the sweep results to plot."
,
)
parser
.
add_argument
(
"--fig-dir"
,
...
...
vllm/benchmarks/sweep/plot_pareto.py
View file @
4292e3b8
...
...
@@ -325,7 +325,7 @@ class SweepPlotParetoArgs:
@
classmethod
def
from_cli_args
(
cls
,
args
:
argparse
.
Namespace
):
output_dir
=
Path
(
args
.
OUTPU
T_DIR
)
output_dir
=
Path
(
args
.
EXPERIMEN
T_DIR
)
if
not
output_dir
.
exists
():
raise
ValueError
(
f
"No parameter sweep results under
{
output_dir
}
"
)
...
...
@@ -342,9 +342,8 @@ class SweepPlotParetoArgs:
@
classmethod
def
add_cli_args
(
cls
,
parser
:
argparse
.
ArgumentParser
):
parser
.
add_argument
(
"
OUTPU
T_DIR"
,
"
EXPERIMEN
T_DIR"
,
type
=
str
,
default
=
"results"
,
help
=
"The directory containing the sweep results to plot."
,
)
parser
.
add_argument
(
...
...
vllm/benchmarks/sweep/serve.py
View file @
4292e3b8
...
...
@@ -4,6 +4,7 @@ import argparse
import
contextlib
import
json
import
shlex
from
contextlib
import
contextmanager
from
dataclasses
import
dataclass
from
datetime
import
datetime
from
pathlib
import
Path
...
...
@@ -135,7 +136,7 @@ def run_benchmark(
def
_get_comb_base_path
(
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
serve_comb
:
ParameterSweepItem
,
bench_comb
:
ParameterSweepItem
,
*
,
...
...
@@ -149,7 +150,7 @@ def _get_comb_base_path(
if
extra_parts
:
parts
.
extend
(
extra_parts
)
return
outpu
t_dir
/
sanitize_filename
(
"-"
.
join
(
parts
))
return
experimen
t_dir
/
sanitize_filename
(
"-"
.
join
(
parts
))
def
_get_comb_run_path
(
base_path
:
Path
,
run_number
:
int
|
None
):
...
...
@@ -162,10 +163,10 @@ def _get_comb_run_path(base_path: Path, run_number: int | None):
def
_comb_needs_server
(
serve_comb
:
ParameterSweepItem
,
bench_combs
:
ParameterSweep
,
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
):
for
bench_comb
in
bench_combs
:
base_path
=
_get_comb_base_path
(
outpu
t_dir
,
serve_comb
,
bench_comb
)
base_path
=
_get_comb_base_path
(
experimen
t_dir
,
serve_comb
,
bench_comb
)
if
not
_get_comb_run_path
(
base_path
,
run_number
=
None
).
exists
():
return
True
...
...
@@ -179,11 +180,11 @@ def server_ctx(
show_stdout
:
bool
,
serve_comb
:
ParameterSweepItem
,
bench_params
:
ParameterSweep
,
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
dry_run
:
bool
,
server_ready_timeout
:
int
=
300
,
):
if
not
_comb_needs_server
(
serve_comb
,
bench_params
,
outpu
t_dir
):
if
not
_comb_needs_server
(
serve_comb
,
bench_params
,
experimen
t_dir
):
return
contextlib
.
nullcontext
()
return
run_server
(
...
...
@@ -215,10 +216,10 @@ def run_comb(
*
,
serve_comb
:
ParameterSweepItem
,
bench_comb
:
ParameterSweepItem
,
link_vars
:
list
[
tuple
[
str
,
str
]],
base_path
:
Path
,
num_runs
:
int
,
dry_run
:
bool
,
link_vars
:
list
[
tuple
[
str
,
str
]],
):
if
not
_comb_is_valid
(
serve_comb
,
bench_comb
,
link_vars
):
return
None
...
...
@@ -257,10 +258,10 @@ def run_combs(
server_ready_timeout
:
int
,
serve_params
:
ParameterSweep
,
bench_params
:
ParameterSweep
,
output_dir
:
Path
,
link_vars
:
list
[
tuple
[
str
,
str
]],
experiment_dir
:
Path
,
num_runs
:
int
,
dry_run
:
bool
,
link_vars
:
list
[
tuple
[
str
,
str
]],
):
all_data
=
list
[
dict
[
str
,
object
]]()
for
serve_comb
in
serve_params
:
...
...
@@ -270,22 +271,22 @@ def run_combs(
show_stdout
=
show_stdout
,
serve_comb
=
serve_comb
,
bench_params
=
bench_params
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
dry_run
=
dry_run
,
server_ready_timeout
=
server_ready_timeout
,
)
as
server
:
for
bench_comb
in
bench_params
:
base_path
=
_get_comb_base_path
(
outpu
t_dir
,
serve_comb
,
bench_comb
)
base_path
=
_get_comb_base_path
(
experimen
t_dir
,
serve_comb
,
bench_comb
)
comb_data
=
run_comb
(
server
,
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb
,
link_vars
=
link_vars
,
base_path
=
base_path
,
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
)
if
comb_data
is
not
None
:
...
...
@@ -295,7 +296,7 @@ def run_combs(
return
None
combined_df
=
pd
.
DataFrame
.
from_records
(
all_data
)
combined_df
.
to_csv
(
outpu
t_dir
/
"summary.csv"
)
combined_df
.
to_csv
(
experimen
t_dir
/
"summary.csv"
)
return
combined_df
...
...
@@ -309,11 +310,12 @@ class SweepServeArgs:
server_ready_timeout
:
int
serve_params
:
ParameterSweep
bench_params
:
ParameterSweep
link_vars
:
list
[
tuple
[
str
,
str
]]
output_dir
:
Path
experiment_name
:
str
num_runs
:
int
dry_run
:
bool
resume
:
str
|
None
link_vars
:
list
[
tuple
[
str
,
str
]]
resume
:
bool
parser_name
:
ClassVar
[
str
]
=
"serve"
parser_help
:
ClassVar
[
str
]
=
"Run vLLM server benchmark under multiple settings."
...
...
@@ -340,6 +342,11 @@ class SweepServeArgs:
link_vars
=
cls
.
parse_link_vars
(
args
.
link_vars
)
if
args
.
experiment_name
:
experiment_name
=
args
.
experiment_name
else
:
experiment_name
=
datetime
.
now
().
strftime
(
"%Y%m%d_%H%M%S"
)
num_runs
=
args
.
num_runs
if
num_runs
<
1
:
raise
ValueError
(
"`num_runs` should be at least 1."
)
...
...
@@ -351,11 +358,12 @@ class SweepServeArgs:
show_stdout
=
args
.
show_stdout
,
serve_params
=
serve_params
,
bench_params
=
bench_params
,
link_vars
=
link_vars
,
output_dir
=
Path
(
args
.
output_dir
),
experiment_name
=
experiment_name
,
num_runs
=
num_runs
,
dry_run
=
args
.
dry_run
,
resume
=
args
.
resume
,
link_vars
=
link_vars
,
server_ready_timeout
=
args
.
server_ready_timeout
,
)
...
...
@@ -392,6 +400,7 @@ class SweepServeArgs:
default
=
300
,
help
=
"Timeout in seconds to wait for the server to become ready."
,
)
parser
.
add_argument
(
"--serve-params"
,
type
=
str
,
...
...
@@ -402,6 +411,16 @@ class SweepServeArgs:
"If both `serve_params` and `bench_params` are given, "
"this script will iterate over their Cartesian product."
,
)
parser
.
add_argument
(
"--link-vars"
,
type
=
str
,
default
=
""
,
help
=
(
"Comma-separated list of linked variables between serve and bench, "
"e.g. max_num_seqs=max_concurrency,max_model_len=random_input_len"
),
)
parser
.
add_argument
(
"--bench-params"
,
type
=
str
,
...
...
@@ -417,7 +436,15 @@ class SweepServeArgs:
"--output-dir"
,
type
=
str
,
default
=
"results"
,
help
=
"The directory to which results are written."
,
help
=
"The main directory to which results are written."
,
)
parser
.
add_argument
(
"-e"
,
"--experiment-name"
,
type
=
str
,
default
=
None
,
help
=
"The name of this experiment (defaults to current timestamp). "
"Results will be stored under `output_dir/experiment_name`."
,
)
parser
.
add_argument
(
"--num-runs"
,
...
...
@@ -433,21 +460,10 @@ class SweepServeArgs:
)
parser
.
add_argument
(
"--resume"
,
type
=
str
,
default
=
None
,
help
=
"Set this to the name of a directory under `output_dir` (which is a "
"timestamp) to resume a previous execution of this script, i.e., only run "
"parameter combinations for which there are still no output files."
,
)
parser
.
add_argument
(
"--link-vars"
,
type
=
str
,
default
=
""
,
help
=
(
"Comma-separated list of linked variables between serve and bench, "
"e.g. max_num_seqs=max_concurrency,max_model_len=random_input_len"
),
action
=
"store_true"
,
help
=
"Resume a previous execution of this script, i.e., only run "
"parameter combinations for which there are still no output files "
"under `output_dir/experiment_name`."
,
)
return
parser
...
...
@@ -462,33 +478,52 @@ class SweepServeArgs:
pairs
.
append
((
a
.
strip
(),
b
.
strip
()))
return
pairs
def
resolve_experiment_dir
(
self
)
->
Path
:
experiment_dir
=
self
.
output_dir
/
self
.
experiment_name
def
run_main
(
args
:
SweepServeArgs
):
timestamp
=
args
.
resume
or
datetime
.
now
().
strftime
(
"%Y%m%d_%H%M%S"
)
output_dir
=
args
.
output_dir
/
timestamp
if
self
.
resume
:
if
not
experiment_dir
.
exists
():
raise
ValueError
(
f
"Cannot resume from non-existent
{
experiment_dir
=
}
"
)
else
:
if
experiment_dir
.
exists
():
raise
ValueError
(
f
"Cannot overwrite existing
{
experiment_dir
=
}
"
)
if
args
.
resume
and
not
output_dir
.
exists
():
raise
ValueError
(
f
"Cannot resume from non-existent directory (
{
output_dir
}
)"
)
return
experiment_dir
@
contextmanager
def
run_ctx
(
self
,
experiment_dir
:
Path
):
if
self
.
dry_run
:
yield
print
(
f
"Experiment will be saved at:
{
experiment_dir
}
"
)
return
try
:
yield
print
(
f
"Experiment has been saved at:
{
experiment_dir
}
"
)
except
BaseException
as
exc
:
raise
RuntimeError
(
"The script was terminated early. Use `--resume` "
"to continue the script from its last checkpoint."
)
from
exc
def
run_main
(
args
:
SweepServeArgs
):
experiment_dir
=
args
.
resolve_experiment_dir
()
with
args
.
run_ctx
(
experiment_dir
):
return
run_combs
(
serve_cmd
=
args
.
serve_cmd
,
bench_cmd
=
args
.
bench_cmd
,
link_vars
=
args
.
link_vars
,
after_bench_cmd
=
args
.
after_bench_cmd
,
show_stdout
=
args
.
show_stdout
,
server_ready_timeout
=
args
.
server_ready_timeout
,
serve_params
=
args
.
serve_params
,
bench_params
=
args
.
bench_params
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
num_runs
=
args
.
num_runs
,
dry_run
=
args
.
dry_run
,
link_vars
=
args
.
link_vars
,
)
except
BaseException
as
exc
:
raise
RuntimeError
(
f
"The script was terminated early. Use `--resume
{
timestamp
}
` "
f
"to continue the script from its last checkpoint."
)
from
exc
def
main
(
args
:
argparse
.
Namespace
):
...
...
vllm/benchmarks/sweep/serve_workload.py
View file @
4292e3b8
...
...
@@ -3,7 +3,6 @@
import
argparse
import
math
from
dataclasses
import
asdict
,
dataclass
from
datetime
import
datetime
from
pathlib
import
Path
from
typing
import
ClassVar
,
Literal
,
get_args
...
...
@@ -59,10 +58,10 @@ def run_comb_workload(
*
,
serve_comb
:
ParameterSweepItem
,
bench_comb
:
ParameterSweepItem
,
output_dir
:
Path
,
link_vars
:
list
[
tuple
[
str
,
str
]],
experiment_dir
:
Path
,
num_runs
:
int
,
dry_run
:
bool
,
link_vars
:
list
[
tuple
[
str
,
str
]],
workload_var
:
WorkloadVariable
,
workload_value
:
int
,
)
->
list
[
dict
[
str
,
object
]]
|
None
:
...
...
@@ -73,15 +72,15 @@ def run_comb_workload(
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb_workload
,
link_vars
=
link_vars
,
base_path
=
_get_comb_base_path
(
outpu
t_dir
,
experimen
t_dir
,
serve_comb
,
bench_comb
,
extra_parts
=
(
"WL-"
,
f
"
{
workload_var
}
=
{
workload_value
}
"
),
),
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
)
...
...
@@ -91,12 +90,12 @@ def explore_comb_workloads(
*
,
serve_comb
:
ParameterSweepItem
,
bench_comb
:
ParameterSweepItem
,
link_vars
:
list
[
tuple
[
str
,
str
]],
workload_var
:
WorkloadVariable
,
workload_iters
:
int
,
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
num_runs
:
int
,
dry_run
:
bool
,
link_vars
:
list
[
tuple
[
str
,
str
]],
):
print
(
"[WL START]"
)
print
(
f
"Serve parameters:
{
serve_comb
.
as_text
()
or
'(None)'
}
"
)
...
...
@@ -125,10 +124,10 @@ def explore_comb_workloads(
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb
|
{
"max_concurrency"
:
1
},
output_dir
=
output_dir
,
link_vars
=
link_vars
,
experiment_dir
=
experiment_dir
,
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
workload_var
=
workload_var
,
workload_value
=
1
,
)
...
...
@@ -137,10 +136,10 @@ def explore_comb_workloads(
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb
|
{
"max_concurrency"
:
dataset_size
},
output_dir
=
output_dir
,
link_vars
=
link_vars
,
experiment_dir
=
experiment_dir
,
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
workload_var
=
workload_var
,
workload_value
=
dataset_size
,
)
...
...
@@ -177,10 +176,10 @@ def explore_comb_workloads(
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb
,
output_dir
=
output_dir
,
link_vars
=
link_vars
,
experiment_dir
=
experiment_dir
,
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
workload_var
=
workload_var
,
workload_value
=
inter_workload_value
,
)
...
...
@@ -201,12 +200,12 @@ def explore_combs_workloads(
server_ready_timeout
:
int
,
serve_params
:
ParameterSweep
,
bench_params
:
ParameterSweep
,
link_vars
:
list
[
tuple
[
str
,
str
]],
workload_var
:
WorkloadVariable
,
workload_iters
:
int
,
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
num_runs
:
int
,
dry_run
:
bool
,
link_vars
:
list
[
tuple
[
str
,
str
]],
):
if
any
(
bench_comb
.
has_param
(
workload_var
)
for
bench_comb
in
bench_params
):
raise
ValueError
(
...
...
@@ -223,7 +222,7 @@ def explore_combs_workloads(
server_ready_timeout
=
server_ready_timeout
,
serve_comb
=
serve_comb
,
bench_params
=
bench_params
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
dry_run
=
dry_run
,
)
as
server
:
for
bench_comb
in
bench_params
:
...
...
@@ -232,12 +231,12 @@ def explore_combs_workloads(
bench_cmd
,
serve_comb
=
serve_comb
,
bench_comb
=
bench_comb
,
link_vars
=
link_vars
,
workload_var
=
workload_var
,
workload_iters
=
workload_iters
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
num_runs
=
num_runs
,
dry_run
=
dry_run
,
link_vars
=
link_vars
,
)
if
comb_data
is
not
None
:
...
...
@@ -247,7 +246,7 @@ def explore_combs_workloads(
return
None
combined_df
=
pd
.
DataFrame
.
from_records
(
all_data
)
combined_df
.
to_csv
(
outpu
t_dir
/
"summary.csv"
)
combined_df
.
to_csv
(
experimen
t_dir
/
"summary.csv"
)
return
combined_df
...
...
@@ -298,13 +297,9 @@ class SweepServeWorkloadArgs(SweepServeArgs):
def
run_main
(
args
:
SweepServeWorkloadArgs
):
timestamp
=
args
.
resume
or
datetime
.
now
().
strftime
(
"%Y%m%d_%H%M%S"
)
output_dir
=
args
.
output_dir
/
timestamp
if
args
.
resume
and
not
output_dir
.
exists
():
raise
ValueError
(
f
"Cannot resume from non-existent directory (
{
output_dir
}
)"
)
experiment_dir
=
args
.
resolve_experiment_dir
()
try
:
with
args
.
run_ctx
(
experiment_dir
)
:
return
explore_combs_workloads
(
serve_cmd
=
args
.
serve_cmd
,
bench_cmd
=
args
.
bench_cmd
,
...
...
@@ -313,18 +308,13 @@ def run_main(args: SweepServeWorkloadArgs):
server_ready_timeout
=
args
.
server_ready_timeout
,
serve_params
=
args
.
serve_params
,
bench_params
=
args
.
bench_params
,
link_vars
=
args
.
link_vars
,
workload_var
=
args
.
workload_var
,
workload_iters
=
args
.
workload_iters
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
num_runs
=
args
.
num_runs
,
dry_run
=
args
.
dry_run
,
link_vars
=
args
.
link_vars
,
)
except
BaseException
as
exc
:
raise
RuntimeError
(
f
"The script was terminated early. Use `--resume
{
timestamp
}
` "
f
"to continue the script from its last checkpoint."
)
from
exc
def
main
(
args
:
argparse
.
Namespace
):
...
...
vllm/benchmarks/sweep/startup.py
View file @
4292e3b8
...
...
@@ -4,6 +4,7 @@ import argparse
import
json
import
shlex
import
subprocess
from
contextlib
import
contextmanager
from
dataclasses
import
dataclass
from
datetime
import
datetime
from
functools
import
lru_cache
...
...
@@ -111,7 +112,7 @@ def _apply_output_json(cmd: list[str], output_path: Path) -> list[str]:
def
_get_comb_base_path
(
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
serve_comb
:
ParameterSweepItem
,
startup_comb
:
ParameterSweepItem
,
)
->
Path
:
...
...
@@ -120,7 +121,8 @@ def _get_comb_base_path(
parts
.
extend
((
"SERVE-"
,
serve_comb
.
name
))
if
startup_comb
:
parts
.
extend
((
"STARTUP-"
,
startup_comb
.
name
))
return
output_dir
/
sanitize_filename
(
"-"
.
join
(
parts
))
return
experiment_dir
/
sanitize_filename
(
"-"
.
join
(
parts
))
def
_get_comb_run_path
(
base_path
:
Path
,
run_number
:
int
|
None
)
->
Path
:
...
...
@@ -225,7 +227,7 @@ def run_combs(
*
,
serve_params
:
ParameterSweep
,
startup_params
:
ParameterSweep
,
outpu
t_dir
:
Path
,
experimen
t_dir
:
Path
,
num_runs
:
int
,
show_stdout
:
bool
,
dry_run
:
bool
,
...
...
@@ -233,7 +235,7 @@ def run_combs(
all_data
=
list
[
dict
[
str
,
object
]]()
for
serve_comb
in
serve_params
:
for
startup_comb
in
startup_params
:
base_path
=
_get_comb_base_path
(
outpu
t_dir
,
serve_comb
,
startup_comb
)
base_path
=
_get_comb_base_path
(
experimen
t_dir
,
serve_comb
,
startup_comb
)
comb_data
=
run_comb
(
startup_cmd
,
serve_comb
=
serve_comb
,
...
...
@@ -250,7 +252,7 @@ def run_combs(
return
None
combined_df
=
pd
.
DataFrame
.
from_records
(
all_data
)
combined_df
.
to_csv
(
outpu
t_dir
/
"summary.csv"
)
combined_df
.
to_csv
(
experimen
t_dir
/
"summary.csv"
)
return
combined_df
...
...
@@ -260,11 +262,11 @@ class SweepStartupArgs:
serve_params
:
ParameterSweep
startup_params
:
ParameterSweep
output_dir
:
Path
experiment_name
:
str
num_runs
:
int
show_stdout
:
bool
dry_run
:
bool
resume
:
str
|
None
strict_params
:
bool
resume
:
bool
parser_name
:
ClassVar
[
str
]
=
"startup"
parser_help
:
ClassVar
[
str
]
=
(
...
...
@@ -286,13 +288,19 @@ class SweepStartupArgs:
startup_params
=
ParameterSweep
.
from_records
([{}])
supported
=
_get_supported_startup_keys
()
strict_params
=
args
.
strict_params
serve_params
=
_filter_params
(
serve_params
,
supported
=
supported
,
strict
=
args
.
strict_params
serve_params
,
supported
=
supported
,
strict
=
strict_params
)
startup_params
=
_filter_params
(
startup_params
,
supported
=
supported
,
strict
=
args
.
strict_params
startup_params
,
supported
=
supported
,
strict
=
strict_params
)
if
args
.
experiment_name
:
experiment_name
=
args
.
experiment_name
else
:
experiment_name
=
datetime
.
now
().
strftime
(
"%Y%m%d_%H%M%S"
)
if
args
.
num_runs
<
1
:
raise
ValueError
(
"`num_runs` should be at least 1."
)
...
...
@@ -301,11 +309,11 @@ class SweepStartupArgs:
serve_params
=
serve_params
,
startup_params
=
startup_params
,
output_dir
=
Path
(
args
.
output_dir
),
experiment_name
=
experiment_name
,
num_runs
=
args
.
num_runs
,
show_stdout
=
args
.
show_stdout
,
dry_run
=
args
.
dry_run
,
resume
=
args
.
resume
,
strict_params
=
args
.
strict_params
,
)
@
classmethod
...
...
@@ -316,6 +324,7 @@ class SweepStartupArgs:
default
=
"vllm bench startup"
,
help
=
"The command used to run the startup benchmark."
,
)
parser
.
add_argument
(
"--serve-params"
,
type
=
str
,
...
...
@@ -331,12 +340,27 @@ class SweepStartupArgs:
help
=
"Path to JSON file containing parameter combinations "
"for the `vllm bench startup` command."
,
)
parser
.
add_argument
(
"--strict-params"
,
action
=
"store_true"
,
help
=
"If set, unknown parameters in sweep files raise an error "
"instead of being ignored."
,
)
parser
.
add_argument
(
"-o"
,
"--output-dir"
,
type
=
str
,
default
=
"results"
,
help
=
"The directory to which results are written."
,
help
=
"The main directory to which results are written."
,
)
parser
.
add_argument
(
"-e"
,
"--experiment-name"
,
type
=
str
,
default
=
None
,
help
=
"The name of this experiment (defaults to current timestamp). "
"Results will be stored under `output_dir/experiment_name`."
,
)
parser
.
add_argument
(
"--num-runs"
,
...
...
@@ -357,43 +381,56 @@ class SweepStartupArgs:
)
parser
.
add_argument
(
"--resume"
,
type
=
str
,
default
=
None
,
help
=
"Set this to the name of a directory under `output_dir` (which is a "
"timestamp) to resume a previous execution of this script, i.e., only run "
"parameter combinations for which there are still no output files."
,
)
parser
.
add_argument
(
"--strict-params"
,
action
=
"store_true"
,
help
=
"If set, unknown parameters in sweep files raise an error "
"instead of being ignored."
,
help
=
"Resume a previous execution of this script, i.e., only run "
"parameter combinations for which there are still no output files "
"under `output_dir/experiment_name`."
,
)
return
parser
def
resolve_experiment_dir
(
self
)
->
Path
:
experiment_dir
=
self
.
output_dir
/
self
.
experiment_name
def
run_main
(
args
:
SweepStartupArgs
):
timestamp
=
args
.
resume
or
datetime
.
now
().
strftime
(
"%Y%m%d_%H%M%S"
)
output_dir
=
args
.
output_dir
/
timestamp
if
self
.
resume
:
if
not
experiment_dir
.
exists
():
raise
ValueError
(
f
"Cannot resume from non-existent
{
experiment_dir
=
}
"
)
else
:
if
experiment_dir
.
exists
():
raise
ValueError
(
f
"Cannot overwrite existing
{
experiment_dir
=
}
"
)
if
args
.
resume
and
not
output_dir
.
exists
():
raise
ValueError
(
f
"Cannot resume from non-existent directory (
{
output_dir
}
)"
)
return
experiment_dir
@
contextmanager
def
run_ctx
(
self
,
experiment_dir
:
Path
):
if
self
.
dry_run
:
yield
print
(
f
"Experiment will be saved at:
{
experiment_dir
}
"
)
return
try
:
yield
print
(
f
"Experiment has been saved at:
{
experiment_dir
}
"
)
except
BaseException
as
exc
:
raise
RuntimeError
(
"The script was terminated early. Use `--resume` "
"to continue the script from its last checkpoint."
)
from
exc
def
run_main
(
args
:
SweepStartupArgs
):
experiment_dir
=
args
.
resolve_experiment_dir
()
with
args
.
run_ctx
(
experiment_dir
):
return
run_combs
(
startup_cmd
=
args
.
startup_cmd
,
serve_params
=
args
.
serve_params
,
startup_params
=
args
.
startup_params
,
output_dir
=
outpu
t_dir
,
experiment_dir
=
experimen
t_dir
,
num_runs
=
args
.
num_runs
,
show_stdout
=
args
.
show_stdout
,
dry_run
=
args
.
dry_run
,
)
except
BaseException
as
exc
:
raise
RuntimeError
(
f
"The script was terminated early. Use `--resume
{
timestamp
}
` "
f
"to continue the script from its last checkpoint."
)
from
exc
def
main
(
args
:
argparse
.
Namespace
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment