Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
6e2da515
Unverified
Commit
6e2da515
authored
May 11, 2025
by
Lifu Huang
Committed by
GitHub
May 11, 2025
Browse files
Replace time.time() to time.perf_counter() for benchmarking. (#6178)
Signed-off-by:
Lifu Huang
<
lifu.hlf@gmail.com
>
parent
e9a47f4c
Changes
61
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
42 additions
and
42 deletions
+42
-42
benchmark/llm_judge/bench_sglang.py
benchmark/llm_judge/bench_sglang.py
+2
-2
benchmark/long_json_decode/bench_other.py
benchmark/long_json_decode/bench_other.py
+2
-2
benchmark/long_json_decode/bench_sglang.py
benchmark/long_json_decode/bench_sglang.py
+2
-2
benchmark/mmlu/bench_other.py
benchmark/mmlu/bench_other.py
+3
-3
benchmark/mmlu/bench_sglang.py
benchmark/mmlu/bench_sglang.py
+2
-2
benchmark/mmmu/bench_sglang.py
benchmark/mmmu/bench_sglang.py
+2
-2
benchmark/mtbench/bench_other.py
benchmark/mtbench/bench_other.py
+2
-2
benchmark/mtbench/bench_sglang.py
benchmark/mtbench/bench_sglang.py
+2
-2
benchmark/mtbench/bench_sglang_eagle.py
benchmark/mtbench/bench_sglang_eagle.py
+2
-2
benchmark/multi_chain_reasoning/bench_other.py
benchmark/multi_chain_reasoning/bench_other.py
+3
-3
benchmark/multi_chain_reasoning/bench_sglang.py
benchmark/multi_chain_reasoning/bench_sglang.py
+2
-2
benchmark/multi_document_qa/bench_other.py
benchmark/multi_document_qa/bench_other.py
+2
-2
benchmark/multi_document_qa/bench_sglang.py
benchmark/multi_document_qa/bench_sglang.py
+2
-2
benchmark/multi_turn_chat/bench_other.py
benchmark/multi_turn_chat/bench_other.py
+2
-2
benchmark/multi_turn_chat/bench_sglang.py
benchmark/multi_turn_chat/bench_sglang.py
+2
-2
benchmark/multi_turn_chat/long_prompt_multi_turn.py
benchmark/multi_turn_chat/long_prompt_multi_turn.py
+2
-2
benchmark/react/bench_other.py
benchmark/react/bench_other.py
+2
-2
benchmark/react/bench_sglang.py
benchmark/react/bench_sglang.py
+2
-2
benchmark/reasoning_benchmark/bench_sglang.py
benchmark/reasoning_benchmark/bench_sglang.py
+2
-2
benchmark/tip_suggestion/bench_other.py
benchmark/tip_suggestion/bench_other.py
+2
-2
No files found.
benchmark/llm_judge/bench_sglang.py
View file @
6e2da515
...
@@ -59,7 +59,7 @@ def main(args):
...
@@ -59,7 +59,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
backend
=
select_sglang_backend
(
args
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_dimension_judge
.
run_batch
(
states
=
multi_dimension_judge
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
...
@@ -67,7 +67,7 @@ def main(args):
...
@@ -67,7 +67,7 @@ def main(args):
num_threads
=
args
.
parallel
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
progress_bar
=
True
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/long_json_decode/bench_other.py
View file @
6e2da515
...
@@ -45,7 +45,7 @@ def main(args):
...
@@ -45,7 +45,7 @@ def main(args):
def
get_one_answer
(
i
):
def
get_one_answer
(
i
):
states
[
i
]
=
json_decode
(
generate
=
call_generate
,
**
arguments
[
i
])
states
[
i
]
=
json_decode
(
generate
=
call_generate
,
**
arguments
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
arguments
))):
for
i
in
tqdm
(
range
(
len
(
arguments
))):
get_one_answer
(
i
)
get_one_answer
(
i
)
...
@@ -58,7 +58,7 @@ def main(args):
...
@@ -58,7 +58,7 @@ def main(args):
)
)
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/long_json_decode/bench_sglang.py
View file @
6e2da515
...
@@ -46,11 +46,11 @@ def main(args):
...
@@ -46,11 +46,11 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
sgl
.
set_default_backend
(
backend
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
json_decode
.
run_batch
(
states
=
json_decode
.
run_batch
(
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/mmlu/bench_other.py
View file @
6e2da515
...
@@ -76,7 +76,7 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
...
@@ -76,7 +76,7 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
pred
=
call_generate
(
prompts
[
i
],
temperature
=
0
,
max_tokens
=
max_tokens
)
pred
=
call_generate
(
prompts
[
i
],
temperature
=
0
,
max_tokens
=
max_tokens
)
preds
[
i
]
=
pred
.
strip
()[
0
]
preds
[
i
]
=
pred
.
strip
()[
0
]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
range
(
len
(
prompts
)):
for
i
in
range
(
len
(
prompts
)):
get_one_answer
(
i
)
get_one_answer
(
i
)
...
@@ -94,9 +94,9 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
...
@@ -94,9 +94,9 @@ def evaluate(args, subject, dev_df, test_df, call_generate):
for
j
in
range
(
len
(
rets
)):
for
j
in
range
(
len
(
rets
)):
preds
[
i
+
j
]
=
rets
[
j
].
strip
()[
0
]
preds
[
i
+
j
]
=
rets
[
j
].
strip
()[
0
]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
asyncio
.
run
(
batched_call
(
batch_size
=
args
.
parallel
))
asyncio
.
run
(
batched_call
(
batch_size
=
args
.
parallel
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
...
...
benchmark/mmlu/bench_sglang.py
View file @
6e2da515
...
@@ -116,7 +116,7 @@ def main(args):
...
@@ -116,7 +116,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
backend
=
select_sglang_backend
(
args
)
# Run
# Run
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
few_shot_mmlu
.
run_batch
(
states
=
few_shot_mmlu
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
...
@@ -128,7 +128,7 @@ def main(args):
...
@@ -128,7 +128,7 @@ def main(args):
preds
=
[
preds
=
[
s
[
"answer"
].
strip
()[
0
]
if
len
(
s
[
"answer"
].
strip
())
>
0
else
""
for
s
in
states
s
[
"answer"
].
strip
()[
0
]
if
len
(
s
[
"answer"
].
strip
())
>
0
else
""
for
s
in
states
]
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
cors
=
[
pred
==
label
for
pred
,
label
in
zip
(
preds
,
labels
)]
...
...
benchmark/mmmu/bench_sglang.py
View file @
6e2da515
...
@@ -119,7 +119,7 @@ async def eval_mmmu(args) -> None:
...
@@ -119,7 +119,7 @@ async def eval_mmmu(args) -> None:
api_key
=
"sk"
,
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
/v1"
api_key
=
"sk"
,
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
/v1"
)
)
semaphore
=
asyncio
.
Semaphore
(
args
.
concurrency
)
semaphore
=
asyncio
.
Semaphore
(
args
.
concurrency
)
start
=
time
.
time
()
start
=
time
.
perf_counter
()
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
"
base_url
=
f
"http://127.0.0.1:
{
args
.
port
}
"
if
args
.
profile
:
if
args
.
profile
:
...
@@ -147,7 +147,7 @@ async def eval_mmmu(args) -> None:
...
@@ -147,7 +147,7 @@ async def eval_mmmu(args) -> None:
if
profile_output
.
success
:
if
profile_output
.
success
:
print
(
"Profiler stopped"
)
print
(
"Profiler stopped"
)
print
(
f
"Benchmark time:
{
time
.
time
()
-
start
}
"
)
print
(
f
"Benchmark time:
{
time
.
perf_counter
()
-
start
}
"
)
args
.
output_path
=
f
"./val_sglang.json"
args
.
output_path
=
f
"./val_sglang.json"
save_json
(
args
.
output_path
,
out_samples
)
save_json
(
args
.
output_path
,
out_samples
)
eval_result
(
model_answer_path
=
args
.
output_path
,
answer_dict
=
answer_dict
)
eval_result
(
model_answer_path
=
args
.
output_path
,
answer_dict
=
answer_dict
)
...
...
benchmark/mtbench/bench_other.py
View file @
6e2da515
...
@@ -66,7 +66,7 @@ def main(args):
...
@@ -66,7 +66,7 @@ def main(args):
answers
[
i
]
=
cur_answers
answers
[
i
]
=
cur_answers
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
questions
))):
for
i
in
tqdm
(
range
(
len
(
questions
))):
get_answer
(
i
)
get_answer
(
i
)
...
@@ -79,7 +79,7 @@ def main(args):
...
@@ -79,7 +79,7 @@ def main(args):
)
)
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
...
...
benchmark/mtbench/bench_sglang.py
View file @
6e2da515
...
@@ -57,7 +57,7 @@ def main(args):
...
@@ -57,7 +57,7 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
sgl
.
set_default_backend
(
backend
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
rets
=
answer_mt_bench
.
run_batch
(
rets
=
answer_mt_bench
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
...
@@ -66,7 +66,7 @@ def main(args):
...
@@ -66,7 +66,7 @@ def main(args):
progress_bar
=
True
,
progress_bar
=
True
,
)
)
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
print
(
f
"#questions:
{
len
(
questions
)
}
, Latency:
{
latency
:.
2
f
}
"
)
...
...
benchmark/mtbench/bench_sglang_eagle.py
View file @
6e2da515
...
@@ -68,7 +68,7 @@ def main(args):
...
@@ -68,7 +68,7 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
sgl
.
set_default_backend
(
backend
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
rets
=
answer_mt_bench
.
run_batch
(
rets
=
answer_mt_bench
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
...
@@ -78,7 +78,7 @@ def main(args):
...
@@ -78,7 +78,7 @@ def main(args):
)
)
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
answers
=
[[
s
[
"answer_1"
],
s
[
"answer_2"
]]
for
s
in
rets
]
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
num_output_tokens
=
sum
(
num_output_tokens
=
sum
(
s
.
get_meta_info
(
"answer_1"
)[
"completion_tokens"
]
s
.
get_meta_info
(
"answer_1"
)[
"completion_tokens"
]
+
s
.
get_meta_info
(
"answer_2"
)[
"completion_tokens"
]
+
s
.
get_meta_info
(
"answer_2"
)[
"completion_tokens"
]
...
...
benchmark/multi_chain_reasoning/bench_other.py
View file @
6e2da515
...
@@ -113,7 +113,7 @@ def main(args):
...
@@ -113,7 +113,7 @@ def main(args):
answer
=
multi_chain_gsm8k
(
questions
[
i
],
args
.
num_chains
,
call_generate
)
answer
=
multi_chain_gsm8k
(
questions
[
i
],
args
.
num_chains
,
call_generate
)
states
[
i
]
=
answer
states
[
i
]
=
answer
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
questions
))):
for
i
in
tqdm
(
range
(
len
(
questions
))):
get_one_answer
(
i
)
get_one_answer
(
i
)
...
@@ -134,7 +134,7 @@ def main(args):
...
@@ -134,7 +134,7 @@ def main(args):
)
)
states
[
i
]
=
answer
states
[
i
]
=
answer
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
loop
=
asyncio
.
get_event_loop
()
loop
=
asyncio
.
get_event_loop
()
batches
=
[
batches
=
[
list
(
range
(
i
,
min
(
i
+
args
.
parallel
,
len
(
questions
))))
list
(
range
(
i
,
min
(
i
+
args
.
parallel
,
len
(
questions
))))
...
@@ -144,7 +144,7 @@ def main(args):
...
@@ -144,7 +144,7 @@ def main(args):
tasks
=
[
get_one_answer_asyncio
(
k
)
for
k
in
bt
]
tasks
=
[
get_one_answer_asyncio
(
k
)
for
k
in
bt
]
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
preds
=
[]
preds
=
[]
for
i
in
range
(
len
(
states
)):
for
i
in
range
(
len
(
states
)):
...
...
benchmark/multi_chain_reasoning/bench_sglang.py
View file @
6e2da515
...
@@ -90,7 +90,7 @@ def main(args):
...
@@ -90,7 +90,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
backend
=
select_sglang_backend
(
args
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_chain_gsm8k
.
run_batch
(
states
=
multi_chain_gsm8k
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
...
@@ -98,7 +98,7 @@ def main(args):
...
@@ -98,7 +98,7 @@ def main(args):
num_threads
=
args
.
parallel
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
progress_bar
=
True
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
preds
=
[]
preds
=
[]
for
i
in
range
(
len
(
states
)):
for
i
in
range
(
len
(
states
)):
...
...
benchmark/multi_document_qa/bench_other.py
View file @
6e2da515
...
@@ -61,7 +61,7 @@ def main(args):
...
@@ -61,7 +61,7 @@ def main(args):
def
get_one_answer
(
i
):
def
get_one_answer
(
i
):
states
[
i
]
=
multi_document_qa
(
generate
=
call_generate
,
**
arguments
[
i
])
states
[
i
]
=
multi_document_qa
(
generate
=
call_generate
,
**
arguments
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
labels
))):
for
i
in
tqdm
(
range
(
len
(
labels
))):
get_one_answer
(
i
)
get_one_answer
(
i
)
...
@@ -74,7 +74,7 @@ def main(args):
...
@@ -74,7 +74,7 @@ def main(args):
)
)
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
states
)
print
(
states
)
...
...
benchmark/multi_document_qa/bench_sglang.py
View file @
6e2da515
...
@@ -49,11 +49,11 @@ def main(args):
...
@@ -49,11 +49,11 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
sgl
.
set_default_backend
(
backend
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_document_qa
.
run_batch
(
states
=
multi_document_qa
.
run_batch
(
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
arguments
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
([
s
[
"answer"
]
for
s
in
states
])
print
([
s
[
"answer"
]
for
s
in
states
])
...
...
benchmark/multi_turn_chat/bench_other.py
View file @
6e2da515
...
@@ -35,7 +35,7 @@ def main(args):
...
@@ -35,7 +35,7 @@ def main(args):
def
get_one_answer
(
i
):
def
get_one_answer
(
i
):
states
[
i
]
=
multi_turns
(
generate
=
call_generate
,
**
multi_qas
[
i
])
states
[
i
]
=
multi_turns
(
generate
=
call_generate
,
**
multi_qas
[
i
])
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
for
i
in
tqdm
(
range
(
len
(
multi_qas
))):
for
i
in
tqdm
(
range
(
len
(
multi_qas
))):
get_one_answer
(
i
)
get_one_answer
(
i
)
...
@@ -50,7 +50,7 @@ def main(args):
...
@@ -50,7 +50,7 @@ def main(args):
for
_
in
rets
:
for
_
in
rets
:
pass
pass
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/multi_turn_chat/bench_sglang.py
View file @
6e2da515
...
@@ -27,7 +27,7 @@ def main(args):
...
@@ -27,7 +27,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
backend
=
select_sglang_backend
(
args
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_turns
.
run_batch
(
states
=
multi_turns
.
run_batch
(
multi_qas
,
multi_qas
,
temperature
=
0
,
temperature
=
0
,
...
@@ -35,7 +35,7 @@ def main(args):
...
@@ -35,7 +35,7 @@ def main(args):
num_threads
=
args
.
parallel
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
progress_bar
=
True
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/multi_turn_chat/long_prompt_multi_turn.py
View file @
6e2da515
...
@@ -84,7 +84,7 @@ def main(args):
...
@@ -84,7 +84,7 @@ def main(args):
backend
=
select_sglang_backend
(
args
)
backend
=
select_sglang_backend
(
args
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
multi_turns
.
run_batch
(
states
=
multi_turns
.
run_batch
(
multi_qas
,
multi_qas
,
temperature
=
0
,
temperature
=
0
,
...
@@ -92,7 +92,7 @@ def main(args):
...
@@ -92,7 +92,7 @@ def main(args):
num_threads
=
"auto"
,
num_threads
=
"auto"
,
progress_bar
=
True
,
progress_bar
=
True
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/react/bench_other.py
View file @
6e2da515
...
@@ -146,7 +146,7 @@ def main(args):
...
@@ -146,7 +146,7 @@ def main(args):
states
.
append
(
answer
)
states
.
append
(
answer
)
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
backend
!=
"lmql"
:
if
args
.
backend
!=
"lmql"
:
if
args
.
parallel
==
1
:
if
args
.
parallel
==
1
:
...
@@ -173,7 +173,7 @@ def main(args):
...
@@ -173,7 +173,7 @@ def main(args):
tasks
=
[
run_single_agent_async
(
arg
)
for
arg
in
bt
]
tasks
=
[
run_single_agent_async
(
arg
)
for
arg
in
bt
]
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
loop
.
run_until_complete
(
asyncio
.
gather
(
*
tasks
))
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/react/bench_sglang.py
View file @
6e2da515
...
@@ -115,14 +115,14 @@ def main(args):
...
@@ -115,14 +115,14 @@ def main(args):
sgl
.
set_default_backend
(
backend
)
sgl
.
set_default_backend
(
backend
)
states
=
[]
states
=
[]
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
webthink
.
run_batch
(
states
=
webthink
.
run_batch
(
arguments
,
arguments
,
temperature
=
0
,
temperature
=
0
,
num_threads
=
args
.
parallel
,
num_threads
=
args
.
parallel
,
progress_bar
=
True
,
progress_bar
=
True
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
benchmark/reasoning_benchmark/bench_sglang.py
View file @
6e2da515
...
@@ -51,7 +51,7 @@ def main(args):
...
@@ -51,7 +51,7 @@ def main(args):
)
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
states
=
reasoning_gen
.
run_batch
(
states
=
reasoning_gen
.
run_batch
(
questions
,
questions
,
num_threads
=
args
.
parallel
,
num_threads
=
args
.
parallel
,
...
@@ -60,7 +60,7 @@ def main(args):
...
@@ -60,7 +60,7 @@ def main(args):
max_new_tokens
=
32768
,
max_new_tokens
=
32768
,
top_p
=
0.95
,
top_p
=
0.95
,
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Extract results and record outcomes in a list.
# Extract results and record outcomes in a list.
outcomes
=
[]
outcomes
=
[]
...
...
benchmark/tip_suggestion/bench_other.py
View file @
6e2da515
...
@@ -68,7 +68,7 @@ def main(args):
...
@@ -68,7 +68,7 @@ def main(args):
call_generate
=
partial
(
get_call_generate
(
args
),
temperature
=
0
)
call_generate
=
partial
(
get_call_generate
(
args
),
temperature
=
0
)
# Run requests
# Run requests
tic
=
time
.
time
()
tic
=
time
.
perf_counter
()
if
args
.
backend
!=
"lmql"
:
if
args
.
backend
!=
"lmql"
:
def
get_one_answer
(
i
):
def
get_one_answer
(
i
):
...
@@ -102,7 +102,7 @@ def main(args):
...
@@ -102,7 +102,7 @@ def main(args):
loop
.
run_until_complete
(
loop
.
run_until_complete
(
asyncio
.
gather
(
*
[
get_one_answer_async
(
i
)
for
i
in
batch
])
asyncio
.
gather
(
*
[
get_one_answer_async
(
i
)
for
i
in
batch
])
)
)
latency
=
time
.
time
()
-
tic
latency
=
time
.
perf_counter
()
-
tic
# Compute accuracy
# Compute accuracy
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
print
(
f
"Latency:
{
latency
:.
3
f
}
"
)
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment