Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6e2da515
Unverified
Commit
6e2da515
authored
May 11, 2025
by
Lifu Huang
Committed by
GitHub
May 11, 2025
Browse files
Replace time.time() to time.perf_counter() for benchmarking. (#6178)
Signed-off-by:
Lifu Huang
<
lifu.hlf@gmail.com
>
parent
e9a47f4c
Changes
61
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
42 deletions
+42
-42
benchmark/llm_judge/bench_sglang.py
benchmark/llm_judge/bench_sglang.py
+2
-2
benchmark/long_json_decode/bench_other.py
benchmark/long_json_decode/bench_other.py
+2
-2
benchmark/long_json_decode/bench_sglang.py
benchmark/long_json_decode/bench_sglang.py
+2
-2
benchmark/mmlu/bench_other.py
benchmark/mmlu/bench_other.py
+3
-3
benchmark/mmlu/bench_sglang.py
benchmark/mmlu/bench_sglang.py
+2
-2
benchmark/mmmu/bench_sglang.py
benchmark/mmmu/bench_sglang.py
+2
-2
benchmark/mtbench/bench_other.py
benchmark/mtbench/bench_other.py
+2
-2
benchmark/mtbench/bench_sglang.py
benchmark/mtbench/bench_sglang.py
+2
-2
benchmark/mtbench/bench_sglang_eagle.py
benchmark/mtbench/bench_sglang_eagle.py
+2
-2
benchmark/multi_chain_reasoning/bench_other.py
benchmark/multi_chain_reasoning/bench_other.py
+3
-3
benchmark/multi_chain_reasoning/bench_sglang.py
benchmark/multi_chain_reasoning/bench_sglang.py
+2
-2
benchmark/multi_document_qa/bench_other.py
benchmark/multi_document_qa/bench_other.py
+2
-2
benchmark/multi_document_qa/bench_sglang.py
benchmark/multi_document_qa/bench_sglang.py
+2
-2
benchmark/multi_turn_chat/bench_other.py
benchmark/multi_turn_chat/bench_other.py
+2
-2
benchmark/multi_turn_chat/bench_sglang.py
benchmark/multi_turn_chat/bench_sglang.py
+2
-2
benchmark/multi_turn_chat/long_prompt_multi_turn.py
benchmark/multi_turn_chat/long_prompt_multi_turn.py
+2
-2
benchmark/react/bench_other.py
benchmark/react/bench_other.py
+2
-2
benchmark/react/bench_sglang.py
benchmark/react/bench_sglang.py
+2
-2
benchmark/reasoning_benchmark/bench_sglang.py
benchmark/reasoning_benchmark/bench_sglang.py
+2
-2
benchmark/tip_suggestion/bench_other.py
benchmark/tip_suggestion/bench_other.py
+2
-2
No files found.
benchmark/llm_judge/bench_sglang.py
View file @
6e2da515
...
...
@@ -59,7 +59,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_dimension_judge
.
run_batch
(
arguments
,
temperature
=
0
,
...
...
@@ -67,7 +67,7 @@ def main(args):
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/long_json_decode/bench_other.py
View file @
6e2da515
...
...
@@ -45,7 +45,7 @@ def main(args):
def
get_one_answer
(
i
):
states
[
i
]
=
json_decode
(
generate
=
call_generate
,
**
arguments
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
arguments
))):
get_one_answer
(
i
)
...
...
@@ -58,7 +58,7 @@ def main(args):
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/long_json_decode/bench_sglang.py
View file @
6e2da515
...
...
@@ -46,11 +46,11 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
json_decode
.
run_batch
(
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/mmlu/bench_other.py
View file @
6e2da515
...
...
@@ -76,7 +76,7 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
pred
=
call_generate
(
prompts
[
i
],
temperature
=
0
,
max_tokens
=
max_tokens
)
preds
[
i
]
=
pred
.
strip
()[
0
]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
range
(
len
(
prompts
)):
get_one_answer
(
i
)
...
...
@@ -94,9 +94,9 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
for
j
in
range
(
len
(
rets
)):
preds
[
i
+
j
]
=
rets
[
j
].
strip
()[
0
]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
asyncio
.
run
(
batched_call
(
batch_size
=
args
.
parallel
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
...
...
benchmark/mmlu/bench_sglang.py
View file @
6e2da515
...
...
@@ -116,7 +116,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
# Run
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
few_shot_mmlu
.
run_batch
(
arguments
,
temperature
=
0
,
...
...
@@ -128,7 +128,7 @@ def main(args):
preds
=
[
s
[
"answer"
].
strip
()[
0
]
if
len
(
s
[
"answer"
].
strip
())
>
0
else
""
for
s
in
states
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
...
...
benchmark/mmmu/bench_sglang.py
View file @
6e2da515
...
...
@@ -119,7 +119,7 @@ async def eval_mmmu(args) -> None:
api_key
=
"sk"
,
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
/v1"
)
semaphore
=
asyncio
.
Semaphore
(
args
.
concurrency
)
start
=
time
.
time
()
start
=
time
.
perf_counter
()
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
"
if
args
.
profile
:
...
...
@@ -147,7 +147,7 @@ async def eval_mmmu(args) -> None:
if
profile_output
.
success
:
print
(
"Profiler stopped"
)
print
(
f
"Benchmark time:
{
time
.
time
()
-
start
}
"
)
print
(
f
"Benchmark time:
{
time
.
perf_counter
()
-
start
}
"
)
args
.
output_path
=
f
"./val_sglang.json"
save_json
(
args
.
output_path
,
out_samples
)
eval_result
(
model_answer_path
=
args
.
output_path
,
answer_dict
=
answer_dict
)
...
...
benchmark/mtbench/bench_other.py
View file @
6e2da515
...
...
@@ -66,7 +66,7 @@ def main(args):
answers
[
i
]
=
cur_answers
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
questions
))):
get_answer
(
i
)
...
...
@@ -79,7 +79,7 @@ def main(args):
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
...
...
benchmark/mtbench/bench_sglang.py
View file @
6e2da515
...
...
@@ -57,7 +57,7 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
rets
=
answer_mt_bench
.
run_batch
(
arguments
,
temperature
=
0
,
...
...
@@ -66,7 +66,7 @@ def main(args):
progress_bar
=
True
,
)
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
...
...
benchmark/mtbench/bench_sglang_eagle.py
View file @
6e2da515
...
...
@@ -68,7 +68,7 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
rets
=
answer_mt_bench
.
run_batch
(
arguments
,
temperature
=
0
,
...
...
@@ -78,7 +78,7 @@ def main(args):
)
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
num_output_tokens
=
sum
(
s
.
get_meta_info
(
"answer_1"
)[
"completion_tokens"
]
+
s
.
get_meta_info
(
"answer_2"
)[
"completion_tokens"
]
...
...
benchmark/multi_chain_reasoning/bench_other.py
View file @
6e2da515
...
...
@@ -113,7 +113,7 @@ def main(args):
answer
=
multi_chain_gsm8k
(
questions
[
i
],
args
.
num_chains
,
call_generate
)
states
[
i
]
=
answer
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
questions
))):
get_one_answer
(
i
)
...
...
@@ -134,7 +134,7 @@ def main(args):
)
states
[
i
]
=
answer
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
loop
=
asyncio
.
get_event_loop
()
batches
=
[
list
(
range
(
i
,
min
(
i
+
args
.
parallel
,
len
(
questions
))))
...
...
@@ -144,7 +144,7 @@ def main(args):
tasks
=
[
get_one_answer_asyncio
(
k
)
for
k
in
bt
]
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
preds
=
[]
for
i
in
range
(
len
(
states
)):
...
...
benchmark/multi_chain_reasoning/bench_sglang.py
View file @
6e2da515
...
...
@@ -90,7 +90,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_chain_gsm8k
.
run_batch
(
arguments
,
temperature
=
0
,
...
...
@@ -98,7 +98,7 @@ def main(args):
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
preds
=
[]
for
i
in
range
(
len
(
states
)):
...
...
benchmark/multi_document_qa/bench_other.py
View file @
6e2da515
...
...
@@ -61,7 +61,7 @@ def main(args):
def
get_one_answer
(
i
):
states
[
i
]
=
multi_document_qa
(
generate
=
call_generate
,
**
arguments
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
labels
))):
get_one_answer
(
i
)
...
...
@@ -74,7 +74,7 @@ def main(args):
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
states
)
...
...
benchmark/multi_document_qa/bench_sglang.py
View file @
6e2da515
...
...
@@ -49,11 +49,11 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_document_qa
.
run_batch
(
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
([
s
[
"answer"
]
for
s
in
states
])
...
...
benchmark/multi_turn_chat/bench_other.py
View file @
6e2da515
...
...
@@ -35,7 +35,7 @@ def main(args):
def
get_one_answer
(
i
):
states
[
i
]
=
multi_turns
(
generate
=
call_generate
,
**
multi_qas
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
multi_qas
))):
get_one_answer
(
i
)
...
...
@@ -50,7 +50,7 @@ def main(args):
for
_
in
rets
:
pass
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/multi_turn_chat/bench_sglang.py
View file @
6e2da515
...
...
@@ -27,7 +27,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_turns
.
run_batch
(
multi_qas
,
temperature
=
0
,
...
...
@@ -35,7 +35,7 @@ def main(args):
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/multi_turn_chat/long_prompt_multi_turn.py
View file @
6e2da515
...
...
@@ -84,7 +84,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_turns
.
run_batch
(
multi_qas
,
temperature
=
0
,
...
...
@@ -92,7 +92,7 @@ def main(args):
num_threads
=
"auto"
,
progress_bar
=
True
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/react/bench_other.py
View file @
6e2da515
...
...
@@ -146,7 +146,7 @@ def main(args):
states
.
append
(
answer
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
backend
!=
"lmql"
:
if
args
.
parallel
==
1
:
...
...
@@ -173,7 +173,7 @@ def main(args):
tasks
=
[
run_single_agent_async
(
arg
)
for
arg
in
bt
]
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/react/bench_sglang.py
View file @
6e2da515
...
...
@@ -115,14 +115,14 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
states
=
[]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
webthink
.
run_batch
(
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/reasoning_benchmark/bench_sglang.py
View file @
6e2da515
...
...
@@ -51,7 +51,7 @@ def main(args):
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
reasoning_gen
.
run_batch
(
questions
,
num_threads
=
args
.
parallel
,
...
...
@@ -60,7 +60,7 @@ def main(args):
max_new_tokens
=
32768
,
top_p
=
0.95
,
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Extract results and record outcomes in a list.
outcomes
=
[]
...
...
benchmark/tip_suggestion/bench_other.py
View file @
6e2da515
...
...
@@ -68,7 +68,7 @@ def main(args):
call_generate
=
partial
(
get_call_generate
(
args
),
temperature
=
0
)
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
backend
!=
"lmql"
:
def
get_one_answer
(
i
):
...
...
@@ -102,7 +102,7 @@ def main(args):
loop
.
run_until_complete
(
asyncio
.
gather
(
*
[
get_one_answer_async
(
i
)
for
i
in
batch
])
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment