Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
easystart_v0.1
Commits
f152de33
Commit
f152de33
authored
Jun 06, 2025
by
jerrrrry
Browse files
Update benchmark_throughput.py
parent
9f086524
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
10 deletions
+10
-10
3_env_check&batches_llm_inference/scripts/benchmark_throughput.py
...eck&batches_llm_inference/scripts/benchmark_throughput.py
+10
-10
No files found.
3_env_check&batches_llm_inference/scripts/benchmark_throughput.py
View file @
f152de33
...
@@ -248,18 +248,18 @@ def run_vllm(
...
@@ -248,18 +248,18 @@ def run_vllm(
))
))
end
=
time
.
perf_counter
()
end
=
time
.
perf_counter
()
total_ttfts
=
[]
total_ttfts
=
[]
total_tpo
p
s
=
[]
total_tpo
t
s
=
[]
total_output_token_throughput
=
[]
total_output_token_throughput
=
[]
total_inout_token_throughput
=
[]
total_inout_token_throughput
=
[]
for
output
in
real_output
:
for
output
in
real_output
:
ttft_
=
output
.
metrics
.
first_token_time
-
output
.
metrics
.
arrival_time
ttft_
=
output
.
metrics
.
first_token_time
-
output
.
metrics
.
arrival_time
tpo
p
_
=
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
-
ttft_
)
/
(
ELEoutput
-
1
)
tpo
t
_
=
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
-
ttft_
)
/
(
ELEoutput
-
1
)
output_token_throughput
=
(
ELEoutput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
output_token_throughput
=
(
ELEoutput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
inout_token_throughput
=
(
ELEoutput
+
ELEinput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
inout_token_throughput
=
(
ELEoutput
+
ELEinput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
total_ttfts
.
append
(
ttft_
)
total_ttfts
.
append
(
ttft_
)
total_tpo
p
s
.
append
(
tpo
p
_
)
total_tpo
t
s
.
append
(
tpo
t
_
)
total_output_token_throughput
.
append
(
output_token_throughput
)
total_output_token_throughput
.
append
(
output_token_throughput
)
total_inout_token_throughput
.
append
(
inout_token_throughput
)
total_inout_token_throughput
.
append
(
inout_token_throughput
)
...
@@ -294,9 +294,9 @@ def run_vllm(
...
@@ -294,9 +294,9 @@ def run_vllm(
info
[
"ttft_median"
]
=
np
.
around
(
np
.
median
(
total_ttfts
or
0
),
5
)
info
[
"ttft_median"
]
=
np
.
around
(
np
.
median
(
total_ttfts
or
0
),
5
)
info
[
"ttft_p99"
]
=
np
.
around
(
np
.
percentile
(
total_ttfts
or
0
,
99
),
5
)
info
[
"ttft_p99"
]
=
np
.
around
(
np
.
percentile
(
total_ttfts
or
0
,
99
),
5
)
info
[
"tpo
p
_mean"
]
=
np
.
around
(
np
.
mean
(
total_tpo
p
s
),
4
)
info
[
"tpo
t
_mean"
]
=
np
.
around
(
np
.
mean
(
total_tpo
t
s
),
4
)
info
[
"tpo
p
_median"
]
=
np
.
around
(
np
.
median
(
total_tpo
p
s
or
0
),
5
)
info
[
"tpo
t
_median"
]
=
np
.
around
(
np
.
median
(
total_tpo
t
s
or
0
),
5
)
info
[
"tpo
p
_p99"
]
=
np
.
around
(
np
.
percentile
(
total_tpo
p
s
or
0
,
99
),
5
)
info
[
"tpo
t
_p99"
]
=
np
.
around
(
np
.
percentile
(
total_tpo
t
s
or
0
,
99
),
5
)
info
[
"output_token_throughput_mean"
]
=
np
.
around
(
np
.
mean
(
total_output_token_throughput
),
2
)
info
[
"output_token_throughput_mean"
]
=
np
.
around
(
np
.
mean
(
total_output_token_throughput
),
2
)
info
[
"output_token_throughput_median"
]
=
np
.
around
(
np
.
median
(
total_output_token_throughput
or
0
),
2
)
info
[
"output_token_throughput_median"
]
=
np
.
around
(
np
.
median
(
total_output_token_throughput
or
0
),
2
)
...
@@ -319,14 +319,14 @@ def run_vllm(
...
@@ -319,14 +319,14 @@ def run_vllm(
print
(
f
"TTFT_mean:
{
info
[
'ttft_mean'
]
*
1000
:
.
2
f
}
ms"
)
# 首字延时
print
(
f
"TTFT_mean:
{
info
[
'ttft_mean'
]
*
1000
:
.
2
f
}
ms"
)
# 首字延时
print
(
f
"ttft_p99:
{
info
[
'ttft_p99'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"ttft_p99:
{
info
[
'ttft_p99'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"ttft_median:
{
info
[
'ttft_median'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"ttft_median:
{
info
[
'ttft_median'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"TPOT_mean:
{
info
[
'tpo
p
_mean'
]
*
1000
:
.
2
f
}
ms"
)
# 单字decode时间
print
(
f
"TPOT_mean:
{
info
[
'tpo
t
_mean'
]
*
1000
:
.
2
f
}
ms"
)
# 单字decode时间
print
(
f
"tpot_median:
{
info
[
'tpo
p
_median'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"tpot_median:
{
info
[
'tpo
t
_median'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"tpot_p99:
{
info
[
'tpo
p
_p99'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"tpot_p99:
{
info
[
'tpo
t
_p99'
]
*
1000
:
.
2
f
}
ms"
)
print
(
f
"output_token_throughput_mean:
{
info
[
'output_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路生成吞吐
print
(
f
"output_token_throughput_mean:
{
info
[
'output_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路生成吞吐
print
(
f
"output_token_throughput_median:
{
info
[
'output_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"output_token_throughput_median:
{
info
[
'output_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"output_token_throughput_p99:
{
info
[
'output_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
f
"output_token_throughput_p99:
{
info
[
'output_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_mean:
{
info
[
'inout_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路总吞吐
print
(
f
"inout_token_throughput_mean:
{
info
[
'inout_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路总吞吐
print
(
f
"
t
inout_token_throughput_median:
{
info
[
'inout_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_median:
{
info
[
'inout_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_p99:
{
info
[
'inout_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_p99:
{
info
[
'inout_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
"=============================================="
)
print
(
"=============================================="
)
print
(
"
\n
"
)
print
(
"
\n
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment