Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
39d90449
Unverified
Commit
39d90449
authored
Apr 13, 2025
by
Yineng Zhang
Committed by
GitHub
Apr 13, 2025
Browse files
feat: update experiment_runner (#5360)
parent
39e41138
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
81 additions
and
1 deletion
+81
-1
test/srt/configs/llama_405b.yaml
test/srt/configs/llama_405b.yaml
+28
-0
test/srt/experiment_runner.py
test/srt/experiment_runner.py
+7
-1
test/srt/parse_results.py
test/srt/parse_results.py
+46
-0
No files found.
test/srt/configs/llama_405b.yaml
0 → 100644
View file @
39d90449
tasks
:
-
name
:
sglang-8192-1024-concurrency1
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency2
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency4
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency8
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency16
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency24
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl
-
name
:
sglang-8192-1024-concurrency32
server_cmd
:
python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp
8
client_cmd
:
python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl
test/srt/experiment_runner.py
View file @
39d90449
...
...
@@ -317,6 +317,11 @@ def format_results(results: List[TaskResult]) -> str:
return
"
\n
"
.
join
(
output
)
def
get_bool_env_var
(
name
:
str
,
default
:
str
=
"false"
)
->
bool
:
value
=
os
.
getenv
(
name
,
default
)
return
value
.
lower
()
in
(
"true"
,
"1"
)
def
write_in_github_step_summary
(
results
:
List
[
TaskResult
]):
"""Write formatted results to GitHub step summary."""
if
not
os
.
environ
.
get
(
"GITHUB_STEP_SUMMARY"
):
...
...
@@ -349,7 +354,8 @@ def main():
result
=
runner
.
run_task
(
config
)
results
.
append
(
result
)
write_in_github_step_summary
(
results
)
if
get_bool_env_var
(
"SGLANG_IS_IN_CI"
):
write_in_github_step_summary
(
results
)
except
Exception
as
e
:
logger
.
error
(
f
"Error:
{
e
}
"
)
raise
...
...
test/srt/parse_results.py
0 → 100644
View file @
39d90449
import
json
import
pandas
as
pd
import
argparse
import
os
from
tabulate
import
tabulate
# Parse command-line arguments
parser
=
argparse
.
ArgumentParser
(
description
=
"Parse JSONL benchmark and summarize."
)
parser
.
add_argument
(
"input_file"
,
type
=
str
,
help
=
"Path to input JSONL file"
)
args
=
parser
.
parse_args
()
input_file
=
args
.
input_file
base_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
input_file
))[
0
]
output_file
=
f
"
{
base_name
}
_summary.csv"
fields
=
[
"max_concurrency"
,
"output_throughput"
,
"mean_ttft_ms"
,
"median_ttft_ms"
,
"p99_ttft_ms"
,
"mean_tpot_ms"
,
"median_tpot_ms"
,
"p99_tpot_ms"
,
]
# Read JSONL and parse
results
=
[]
with
open
(
input_file
,
"r"
)
as
f
:
for
line
in
f
:
data
=
json
.
loads
(
line
)
row
=
{
field
:
data
.
get
(
field
,
None
)
for
field
in
fields
}
max_conc
=
data
.
get
(
"max_concurrency"
)
out_tp
=
data
.
get
(
"output_throughput"
)
row
[
"per_user_throughput"
]
=
out_tp
/
max_conc
if
max_conc
else
None
results
.
append
(
row
)
# Convert to DataFrame
df
=
pd
.
DataFrame
(
results
)
# Save to CSV
df
.
to_csv
(
output_file
,
index
=
False
)
print
(
f
"
\n
Saved summary to:
{
output_file
}
\n
"
)
# Print ASCII table
print
(
tabulate
(
df
,
headers
=
"keys"
,
tablefmt
=
"grid"
,
floatfmt
=
".3f"
))
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment