Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7c0d3c51
Unverified
Commit
7c0d3c51
authored
Jan 13, 2026
by
Cyrus Leung
Committed by
GitHub
Jan 13, 2026
Browse files
[Benchmark] Share data between SLA runs (#32184)
Signed-off-by:
DarkLight1337
<
tlleungac@connect.ust.hk
>
parent
5b681074
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
108 additions
and
25 deletions
+108
-25
tests/benchmarks/sweep/test_serve_sla.py
tests/benchmarks/sweep/test_serve_sla.py
+68
-10
vllm/benchmarks/sweep/serve_sla.py
vllm/benchmarks/sweep/serve_sla.py
+40
-15
No files found.
tests/benchmarks/sweep/test_serve_sla.py
View file @
7c0d3c51
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
json
from
collections.abc
import
Callable
from
collections.abc
import
Callable
from
pathlib
import
Path
from
pathlib
import
Path
from
unittest.mock
import
patch
from
unittest.mock
import
patch
from
vllm.benchmarks.sweep.param_sweep
import
ParameterSweepItem
from
vllm.benchmarks.sweep.param_sweep
import
ParameterSweepItem
from
vllm.benchmarks.sweep.serve_sla
import
solve_sla
from
vllm.benchmarks.sweep.serve_sla
import
_get_sla_run_path
,
solve_sla
from
vllm.benchmarks.sweep.server
import
ServerProcess
from
vllm.benchmarks.sweep.server
import
ServerProcess
from
vllm.benchmarks.sweep.sla_sweep
import
(
from
vllm.benchmarks.sweep.sla_sweep
import
(
SLACriterionBase
,
SLACriterionBase
,
...
@@ -34,7 +35,14 @@ def _set_return_value(
...
@@ -34,7 +35,14 @@ def _set_return_value(
num_runs
:
int
,
num_runs
:
int
,
dry_run
:
bool
,
dry_run
:
bool
,
):
):
return
var2metric
(
bench_comb
)
iter_data
=
var2metric
(
bench_comb
)
summary_path
=
_get_sla_run_path
(
iter_path
,
run_number
=
None
)
summary_path
.
parent
.
mkdir
(
parents
=
True
,
exist_ok
=
True
)
with
summary_path
.
open
(
"w"
)
as
f
:
json
.
dump
(
iter_data
,
f
,
indent
=
4
)
return
iter_data
return
patch
(
"vllm.benchmarks.sweep.serve_sla.run_sla"
,
side_effect
=
mock_run_sla
)
return
patch
(
"vllm.benchmarks.sweep.serve_sla.run_sla"
,
side_effect
=
mock_run_sla
)
...
@@ -98,6 +106,7 @@ def _var2metric_sqrt(y_intercept: float):
...
@@ -98,6 +106,7 @@ def _var2metric_sqrt(y_intercept: float):
def
_run_solve_sla
(
def
_run_solve_sla
(
var2metric
:
Callable
[[
ParameterSweepItem
],
list
[
dict
[
str
,
float
]]],
var2metric
:
Callable
[[
ParameterSweepItem
],
list
[
dict
[
str
,
float
]]],
criterion
:
SLACriterionBase
,
criterion
:
SLACriterionBase
,
base_path
:
Path
,
min_value
:
int
=
1
,
min_value
:
int
=
1
,
max_value
:
int
=
100
,
max_value
:
int
=
100
,
):
):
...
@@ -108,7 +117,7 @@ def _run_solve_sla(
...
@@ -108,7 +117,7 @@ def _run_solve_sla(
serve_comb
=
ParameterSweepItem
(),
serve_comb
=
ParameterSweepItem
(),
bench_comb
=
ParameterSweepItem
(),
bench_comb
=
ParameterSweepItem
(),
sla_comb
=
SLASweepItem
({
"request_throughput"
:
criterion
}),
sla_comb
=
SLASweepItem
({
"request_throughput"
:
criterion
}),
base_path
=
Path
(
""
)
,
base_path
=
base_path
,
num_runs
=
1
,
num_runs
=
1
,
dry_run
=
False
,
dry_run
=
False
,
sla_variable
=
"request_rate"
,
sla_variable
=
"request_rate"
,
...
@@ -120,10 +129,11 @@ def _run_solve_sla(
...
@@ -120,10 +129,11 @@ def _run_solve_sla(
return
result
return
result
def
test_solve_linear_sla_le
():
def
test_solve_linear_sla_le
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_linear
(),
_var2metric_linear
(),
SLALessThanOrEqualTo
(
target
=
32
),
SLALessThanOrEqualTo
(
target
=
32
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
32
assert
history
.
get_max_passing
()
==
32
...
@@ -136,10 +146,11 @@ def test_solve_linear_sla_le():
...
@@ -136,10 +146,11 @@ def test_solve_linear_sla_le():
}
}
def
test_solve_linear_sla_lt
():
def
test_solve_linear_sla_lt
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_linear
(),
_var2metric_linear
(),
SLALessThan
(
target
=
32
),
SLALessThan
(
target
=
32
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
31
assert
history
.
get_max_passing
()
==
31
...
@@ -152,10 +163,11 @@ def test_solve_linear_sla_lt():
...
@@ -152,10 +163,11 @@ def test_solve_linear_sla_lt():
}
}
def
test_solve_linear_sla_oob
():
def
test_solve_linear_sla_oob
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_linear
(),
_var2metric_linear
(),
SLALessThanOrEqualTo
(
target
=
32
),
SLALessThanOrEqualTo
(
target
=
32
),
tmp_path
,
min_value
=
64
,
min_value
=
64
,
)
)
...
@@ -168,10 +180,11 @@ def test_solve_linear_sla_oob():
...
@@ -168,10 +180,11 @@ def test_solve_linear_sla_oob():
}
}
def
test_solve_concave_sla_le
():
def
test_solve_concave_sla_le
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_concave
(
elbow_point
=
32
),
_var2metric_concave
(
elbow_point
=
32
),
SLALessThanOrEqualTo
(
target
=
24
),
SLALessThanOrEqualTo
(
target
=
24
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
16
assert
history
.
get_max_passing
()
==
16
...
@@ -187,10 +200,11 @@ def test_solve_concave_sla_le():
...
@@ -187,10 +200,11 @@ def test_solve_concave_sla_le():
}
}
def
test_solve_convex_sla_le
():
def
test_solve_convex_sla_le
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_convex
(
elbow_point
=
32
),
_var2metric_convex
(
elbow_point
=
32
),
SLALessThanOrEqualTo
(
target
=
24
),
SLALessThanOrEqualTo
(
target
=
24
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
26
assert
history
.
get_max_passing
()
==
26
...
@@ -206,10 +220,11 @@ def test_solve_convex_sla_le():
...
@@ -206,10 +220,11 @@ def test_solve_convex_sla_le():
}
}
def
test_solve_quadratic_sla_le
():
def
test_solve_quadratic_sla_le
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_quadratic
(
y_intercept
=
10
),
_var2metric_quadratic
(
y_intercept
=
10
),
SLALessThanOrEqualTo
(
target
=
50
),
SLALessThanOrEqualTo
(
target
=
50
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
20
assert
history
.
get_max_passing
()
==
20
...
@@ -223,10 +238,11 @@ def test_solve_quadratic_sla_le():
...
@@ -223,10 +238,11 @@ def test_solve_quadratic_sla_le():
}
}
def
test_solve_sqrt_sla_le
():
def
test_solve_sqrt_sla_le
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
sla_data
,
history
=
_run_solve_sla
(
_var2metric_sqrt
(
y_intercept
=
10
),
_var2metric_sqrt
(
y_intercept
=
10
),
SLALessThanOrEqualTo
(
target
=
100
),
SLALessThanOrEqualTo
(
target
=
100
),
tmp_path
,
)
)
assert
history
.
get_max_passing
()
==
81
assert
history
.
get_max_passing
()
==
81
...
@@ -238,3 +254,45 @@ def test_solve_sqrt_sla_le():
...
@@ -238,3 +254,45 @@ def test_solve_sqrt_sla_le():
81
:
True
,
81
:
True
,
82
:
False
,
82
:
False
,
}
}
def
test_solve_reuse_history
(
tmp_path
):
sla_data
,
history
=
_run_solve_sla
(
_var2metric_linear
(),
SLALessThanOrEqualTo
(
target
=
10
),
tmp_path
,
min_value
=
1
,
max_value
=
20
,
)
assert
history
.
get_max_passing
()
==
10
assert
{
val
:
margin
<=
0
for
val
,
margin
in
history
.
items
()}
==
{
20
:
False
,
1
:
True
,
10
:
True
,
11
:
False
,
}
sla_data
,
history
=
_run_solve_sla
(
_var2metric_linear
(),
SLALessThanOrEqualTo
(
target
=
30
),
tmp_path
,
min_value
=
21
,
max_value
=
40
,
)
assert
history
.
get_max_passing
()
==
30
assert
{
val
:
margin
<=
0
for
val
,
margin
in
history
.
items
()}
==
{
# Items from the past run
# (the margins are different because the target changed)
20
:
True
,
1
:
True
,
10
:
True
,
11
:
True
,
# Items from this run
40
:
False
,
30
:
True
,
31
:
False
,
}
vllm/benchmarks/sweep/serve_sla.py
View file @
7c0d3c51
...
@@ -65,6 +65,14 @@ def _get_sla_run_path(iter_path: Path, run_number: int | None):
...
@@ -65,6 +65,14 @@ def _get_sla_run_path(iter_path: Path, run_number: int | None):
return
iter_path
/
f
"run=
{
run_number
}
.json"
return
iter_path
/
f
"run=
{
run_number
}
.json"
def
_iter_sla_val_paths
(
base_path
:
Path
,
sla_variable
:
str
):
for
iter_path
in
base_path
.
glob
(
f
"
{
sla_variable
}
=*"
):
sla_value
=
int
(
iter_path
.
name
.
removeprefix
(
f
"
{
sla_variable
}
="
))
summary_path
=
iter_path
/
"summary.json"
if
summary_path
.
exists
():
yield
sla_value
,
summary_path
def
_sla_needs_server
(
def
_sla_needs_server
(
serve_comb
:
ParameterSweepItem
,
serve_comb
:
ParameterSweepItem
,
bench_combs
:
ParameterSweep
,
bench_combs
:
ParameterSweep
,
...
@@ -153,6 +161,25 @@ class SLAHistory(dict[int, float]):
...
@@ -153,6 +161,25 @@ class SLAHistory(dict[int, float]):
)
)
def
_compute_margin
(
sla_comb
:
SLASweepItem
,
iter_data
:
list
[
dict
[
str
,
object
]],
):
assert
iter_data
,
"Summary should not be empty"
iter_data_mean
=
{
k
:
sum
(
float
(
run_data
[
k
])
for
run_data
in
iter_data
)
/
len
(
iter_data
)
# type: ignore
for
k
in
sla_comb
}
sla_margins
=
[
criterion
.
print_and_compute_margin
(
iter_data_mean
,
k
)
for
k
,
criterion
in
sla_comb
.
items
()
]
return
max
(
sla_margins
)
def
solve_sla
(
def
solve_sla
(
server
:
ServerProcess
|
None
,
server
:
ServerProcess
|
None
,
bench_cmd
:
list
[
str
],
bench_cmd
:
list
[
str
],
...
@@ -170,11 +197,18 @@ def solve_sla(
...
@@ -170,11 +197,18 @@ def solve_sla(
sla_data
=
list
[
dict
[
str
,
object
]]()
sla_data
=
list
[
dict
[
str
,
object
]]()
history
=
SLAHistory
(
min_value
=
sla_min_value
,
max_value
=
sla_max_value
)
history
=
SLAHistory
(
min_value
=
sla_min_value
,
max_value
=
sla_max_value
)
# Use results from previous runs
for
past_sla_value
,
path
in
_iter_sla_val_paths
(
base_path
,
sla_variable
):
with
path
.
open
(
"rb"
)
as
f
:
past_iter_data
=
json
.
load
(
f
)
history
[
past_sla_value
]
=
_compute_margin
(
sla_comb
,
past_iter_data
)
# NOTE: We don't use equality here to be more robust against noisy results
# NOTE: We don't use equality here to be more robust against noisy results
while
history
.
get_max_passing
()
+
1
<
history
.
get_min_failing
():
while
history
.
get_max_passing
()
+
1
<
history
.
get_min_failing
():
if
len
(
history
)
==
0
:
if
max
(
history
,
default
=
sla_min_value
)
<
sla_max_value
:
val
=
sla_max_value
val
=
sla_max_value
elif
le
n
(
history
)
==
1
:
elif
mi
n
(
history
,
default
=
sla_max_value
)
>
sla_min_value
:
val
=
sla_min_value
val
=
sla_min_value
else
:
else
:
spl
=
PchipInterpolator
(
*
history
.
get_xy
(),
extrapolate
=
False
)
spl
=
PchipInterpolator
(
*
history
.
get_xy
(),
extrapolate
=
False
)
...
@@ -205,24 +239,15 @@ def solve_sla(
...
@@ -205,24 +239,15 @@ def solve_sla(
if
iter_data
is
None
:
if
iter_data
is
None
:
return
None
return
None
sla_data
.
extend
(
iter_data
)
margin
=
_compute_margin
(
sla_comb
,
iter_data
)
iter_data_mean
=
{
k
:
sum
(
float
(
run_data
[
k
])
for
run_data
in
iter_data
)
/
len
(
iter_data
)
# type: ignore
for
k
in
sla_comb
}
sla_margins
=
[
criterion
.
print_and_compute_margin
(
iter_data_mean
,
k
)
for
k
,
criterion
in
sla_comb
.
items
()
]
history
[
val
]
=
margin
=
max
(
sla_margins
)
if
margin
<=
0
:
if
margin
<=
0
:
print
(
f
"SLA criteria are met. (
{
margin
=
:.
2
f
}
)"
)
print
(
f
"SLA criteria are met. (
{
margin
=
:.
2
f
}
)"
)
else
:
else
:
print
(
f
"SLA criteria are not met. (
{
margin
=
:.
2
f
}
)"
)
print
(
f
"SLA criteria are not met. (
{
margin
=
:.
2
f
}
)"
)
sla_data
.
extend
(
iter_data
)
history
[
val
]
=
margin
return
sla_data
,
history
return
sla_data
,
history
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment