Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
change
sglang
Commits
2b71531a
Unverified
Commit
2b71531a
authored
Oct 29, 2025
by
Kangyan-Zhou
Committed by
GitHub
Oct 29, 2025
Browse files
Allow benchmarking tool to handle empty response (#12174)
Co-authored-by:
Claude
<
noreply@anthropic.com
>
parent
25c50498
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
9 additions
and
3 deletions
+9
-3
python/sglang/test/simple_eval_common.py
python/sglang/test/simple_eval_common.py
+5
-3
python/sglang/test/simple_eval_humaneval.py
python/sglang/test/simple_eval_humaneval.py
+1
-0
python/sglang/test/simple_eval_math.py
python/sglang/test/simple_eval_math.py
+1
-0
python/sglang/test/simple_eval_mmlu.py
python/sglang/test/simple_eval_mmlu.py
+1
-0
python/sglang/test/simple_eval_mmmu_vlm.py
python/sglang/test/simple_eval_mmmu_vlm.py
+1
-0
No files found.
python/sglang/test/simple_eval_common.py
View file @
2b71531a
...
...
@@ -148,7 +148,7 @@ class ChatCompletionSampler(SamplerBase):
reasoning_effort
=
self
.
reasoning_effort
,
extra_body
=
self
.
extra_body
,
)
return
response
.
choices
[
0
].
message
.
content
return
response
.
choices
[
0
].
message
.
content
or
""
# NOTE: BadRequestError is triggered once for MMMU, please uncomment if you are rerunning MMMU
except
openai
.
BadRequestError
as
e
:
print
(
"Bad Request Error"
,
e
)
...
...
@@ -161,7 +161,9 @@ class ChatCompletionSampler(SamplerBase):
)
time
.
sleep
(
exception_backoff
)
trial
+=
1
# unknown error shall throw exception
# If all retries are exhausted, return empty string instead of None
print
(
f
"All retry attempts exhausted for request. Returning empty response."
)
return
""
QUERY_TEMPLATE_MULTICHOICE
=
"""
...
...
@@ -261,7 +263,7 @@ def format_multichoice_question(row):
def
check_equality
(
sampler
:
SamplerBase
,
expr1
:
str
,
expr2
:
str
):
prompt
=
EQUALITY_TEMPLATE
%
{
"expression1"
:
expr1
,
"expression2"
:
expr2
}
response
=
sampler
([
dict
(
content
=
prompt
,
role
=
"user"
)])
return
response
.
lower
().
strip
()
==
"yes"
return
(
response
or
""
)
.
lower
().
strip
()
==
"yes"
def
_compute_stat
(
values
:
list
,
stat
:
str
):
...
...
python/sglang/test/simple_eval_humaneval.py
View file @
2b71531a
...
...
@@ -80,6 +80,7 @@ class HumanEval(Eval):
instruction
=
"Read the following function signature and docstring, and fully implement the function described. Your response should only contain the code for this function.
\n
"
def
find_code
(
completion
):
completion
=
completion
or
""
pattern
=
re
.
compile
(
r
"```python\n(.*?)```"
,
re
.
DOTALL
)
matches
=
pattern
.
findall
(
completion
)
extracted_answer
=
matches
[
0
]
if
len
(
matches
)
>=
1
else
completion
...
...
python/sglang/test/simple_eval_math.py
View file @
2b71531a
...
...
@@ -54,6 +54,7 @@ class MathEval(Eval):
sampler
.
_pack_message
(
content
=
QUERY_TEMPLATE
.
format
(
**
row
),
role
=
"user"
)
]
response_text
=
sampler
(
prompt_messages
)
response_text
=
response_text
or
""
match
=
re
.
search
(
ANSWER_PATTERN
,
response_text
)
extracted_answer
=
match
.
group
(
1
)
if
match
else
None
score
=
float
(
...
...
python/sglang/test/simple_eval_mmlu.py
View file @
2b71531a
...
...
@@ -101,6 +101,7 @@ class MMLUEval(Eval):
)
]
response_text
=
sampler
(
prompt_messages
)
response_text
=
response_text
or
""
match
=
re
.
search
(
ANSWER_PATTERN_MULTICHOICE
,
response_text
)
extracted_answer
=
match
.
group
(
1
)
if
match
else
None
score
=
1.0
if
extracted_answer
==
row
[
"Answer"
]
else
0.0
...
...
python/sglang/test/simple_eval_mmmu_vlm.py
View file @
2b71531a
...
...
@@ -204,6 +204,7 @@ class MMMUVLMEval(Eval):
# Sample
response_text
=
sampler
(
prompt_messages
)
response_text
=
response_text
or
""
# Parse and score
gold
=
sample
[
"answer"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment