Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
vllm_test_tools
Commits
f3ce07a4
Commit
f3ce07a4
authored
Jul 02, 2025
by
jerrrrry
Browse files
Update benchmark_throughput_0.8.5.py
parent
e8491994
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
14 deletions
+14
-14
085-offline/benchmark_throughput_0.8.5.py
085-offline/benchmark_throughput_0.8.5.py
+14
-14
No files found.
085-offline/benchmark_throughput_0.8.5.py
View file @
f3ce07a4
...
...
@@ -248,18 +248,18 @@ def run_vllm(
))
end
=
time
.
perf_counter
()
total_ttfts
=
[]
total_tpo
p
s
=
[]
total_tpo
t
s
=
[]
total_output_token_throughput
=
[]
total_inout_token_throughput
=
[]
for
output
in
real_output
:
ttft_
=
output
.
metrics
.
first_token_time
-
output
.
metrics
.
arrival_time
tpo
p
_
=
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
-
ttft_
)
/
(
ELEoutput
-
1
)
tpo
t
_
=
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
-
ttft_
)
/
(
ELEoutput
-
1
)
output_token_throughput
=
(
ELEoutput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
inout_token_throughput
=
(
ELEoutput
+
ELEinput
)
/
(
output
.
metrics
.
finished_time
-
output
.
metrics
.
arrival_time
)
total_ttfts
.
append
(
ttft_
)
total_tpo
p
s
.
append
(
tpo
p
_
)
total_tpo
t
s
.
append
(
tpo
t
_
)
total_output_token_throughput
.
append
(
output_token_throughput
)
total_inout_token_throughput
.
append
(
inout_token_throughput
)
...
...
@@ -294,9 +294,9 @@ def run_vllm(
info
[
"ttft_median"
]
=
np
.
around
(
np
.
median
(
total_ttfts
or
0
),
5
)
info
[
"ttft_p99"
]
=
np
.
around
(
np
.
percentile
(
total_ttfts
or
0
,
99
),
5
)
info
[
"tpo
p
_mean"
]
=
np
.
around
(
np
.
mean
(
total_tpo
p
s
),
4
)
info
[
"tpo
p
_median"
]
=
np
.
around
(
np
.
median
(
total_tpo
p
s
or
0
),
5
)
info
[
"tpo
p
_p99"
]
=
np
.
around
(
np
.
percentile
(
total_tpo
p
s
or
0
,
99
),
5
)
info
[
"tpo
t
_mean"
]
=
np
.
around
(
np
.
mean
(
total_tpo
t
s
),
4
)
info
[
"tpo
t
_median"
]
=
np
.
around
(
np
.
median
(
total_tpo
t
s
or
0
),
5
)
info
[
"tpo
t
_p99"
]
=
np
.
around
(
np
.
percentile
(
total_tpo
t
s
or
0
,
99
),
5
)
info
[
"output_token_throughput_mean"
]
=
np
.
around
(
np
.
mean
(
total_output_token_throughput
),
2
)
info
[
"output_token_throughput_median"
]
=
np
.
around
(
np
.
median
(
total_output_token_throughput
or
0
),
2
)
...
...
@@ -316,17 +316,17 @@ def run_vllm(
print
(
"=============================================="
)
print
(
f
"total_out_tokens:
{
total_output_tokens
:
.
2
f
}
tokens"
)
print
(
f
"elapsed_time:
{
info
[
'elapsed_time'
]:
.
2
f
}
s"
)
# 总耗时
print
(
f
"TTFT_mean:
{
info
[
'ttft_mean'
]:
.
5
f
}
s"
)
# 首字延时
print
(
f
"ttft_p99:
{
info
[
'ttft_p99'
]:
.
5
f
}
s"
)
print
(
f
"ttft_median:
{
info
[
'ttft_median'
]:
.
5
f
}
s"
)
print
(
f
"TPO
P
_mean:
{
info
[
'tpo
p
_mean'
]:
.
5
f
}
s"
)
# 单字decode时间
print
(
f
"tpo
p
_median:
{
info
[
'tpo
p
_median'
]:
.
5
f
}
s"
)
print
(
f
"tpo
p
_p99:
{
info
[
'tpo
p
_p99'
]:
.
5
f
}
s"
)
print
(
f
"TTFT_mean:
{
info
[
'ttft_mean'
]
*
1000
:
.
2
f
}
m
s"
)
# 首字延时
print
(
f
"ttft_p99:
{
info
[
'ttft_p99'
]
*
1000
:
.
2
f
}
m
s"
)
print
(
f
"ttft_median:
{
info
[
'ttft_median'
]
*
1000
:
.
2
f
}
m
s"
)
print
(
f
"TPO
T
_mean:
{
info
[
'tpo
t
_mean'
]
*
1000
:
.
2
f
}
m
s"
)
# 单字decode时间
print
(
f
"tpo
t
_median:
{
info
[
'tpo
t
_median'
]
*
1000
:
.
2
f
}
m
s"
)
print
(
f
"tpo
t
_p99:
{
info
[
'tpo
t
_p99'
]
*
1000
:
.
2
f
}
m
s"
)
print
(
f
"output_token_throughput_mean:
{
info
[
'output_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路生成吞吐
print
(
f
"output_token_throughput_median:
{
info
[
'output_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"output_token_throughput_p99:
{
info
[
'output_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_mean:
{
info
[
'inout_token_throughput_mean'
]:.
2
f
}
tokens/s"
)
# 单路总吞吐
print
(
f
"
t
inout_token_throughput_median:
{
info
[
'inout_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_median:
{
info
[
'inout_token_throughput_median'
]:.
2
f
}
tokens/s"
)
print
(
f
"inout_token_throughput_p99:
{
info
[
'inout_token_throughput_p99'
]:.
2
f
}
tokens/s"
)
print
(
"=============================================="
)
print
(
"
\n
"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment