Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9d37bcff
Commit
9d37bcff
authored
Oct 11, 2025
by
Baber
Browse files
fix mbpp
parent
3e8135ce
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
45 additions
and
10 deletions
+45
-10
lm_eval/tasks/mbpp/mbpp_instruct.yaml
lm_eval/tasks/mbpp/mbpp_instruct.yaml
+21
-7
lm_eval/tasks/mbpp/utils.py
lm_eval/tasks/mbpp/utils.py
+24
-3
No files found.
lm_eval/tasks/mbpp/mbpp_instruct.yaml
View file @
9d37bcff
...
@@ -4,25 +4,39 @@ dataset_name: full
...
@@ -4,25 +4,39 @@ dataset_name: full
unsafe_code
:
true
unsafe_code
:
true
output_type
:
generate_until
output_type
:
generate_until
test_split
:
test
test_split
:
test
doc_to_text
:
"
You
are
an
expert
Python
programmer,
and
here
is
your
task:
\n
{{text}}
\n
Your
code
should
pass
these
tests:
\n
{{test_list[0]}}
\n
{{test_list[1]}}
\n
{{test_list[2]}}"
doc_to_text
:
!function
utils.doc_to_text
doc_to_target
:
"
{
%
if
is_fewshot
is
defined
%}{{code}}
\n
```{%
else
%}{{test_list[0]}}
\n
{
{test_list[1]}}
\n
{{test_list[2]}}
{%
endif
%}
"
doc_to_target
:
"
{{test_list[1]}}
\n
{{test_list[2]}}"
gen_prefix
:
"
\n
```python
\n
"
gen_prefix
:
"
\n
```python
\n
"
target_delimiter
:
"
"
target_delimiter
:
"
"
metric_list
:
metric_list
:
-
metric
:
!function
utils.pass_at_
k
-
metric
:
!function
utils.pass_at_
10
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
k
:
[
1
]
k
:
[
1
0
]
filter_list
:
filter_list
:
-
name
:
"
extract_code"
-
name
:
"
extract_code"
filter
:
filter
:
-
function
:
"
custom"
-
function
:
"
custom"
filter_fn
:
!function
utils.build_predictions
filter_fn
:
!function
utils.build_predictions
repeats
:
20
generation_kwargs
:
generation_kwargs
:
max_gen_toks
:
256
max_gen_toks
:
256
until
:
[
]
until
:
[
do_sample
:
false
"
\n
class"
,
num_fewshot
:
3
"
\n
assert"
,
'
\n"""'
,
"
\n
print"
,
"
\n
if"
,
"
\n
```"
,
"
\n
#"
,
"
\n
<|/"
,
"
<|eot_id|>"
,
]
do_sample
:
true
temperature
:
0.8
top_p
:
0.95
num_fewshot
:
0
fewshot_config
:
fewshot_config
:
sampler
:
first_n
sampler
:
first_n
samples
:
!function
utils.list_fewshot_samples
samples
:
!function
utils.list_fewshot_samples
...
...
lm_eval/tasks/mbpp/utils.py
View file @
9d37bcff
...
@@ -13,17 +13,26 @@ except Exception as e:
...
@@ -13,17 +13,26 @@ except Exception as e:
raise
e
raise
e
def
pass_at_k
(
references
:
list
[
str
],
predictions
:
list
[
list
[
str
]],
k
:
list
[
int
]
=
None
):
def
pass_at_10
(
references
:
list
[
str
],
predictions
:
list
[
list
[
str
]],
k
:
list
[
int
]
=
None
):
global
compute_
global
compute_
assert
k
is
not
None
assert
k
is
not
None
if
isinstance
(
k
,
int
):
if
isinstance
(
k
,
int
):
k
=
[
k
]
k
=
[
k
]
if
isinstance
(
references
,
str
):
references
=
[
references
]
if
isinstance
(
predictions
[
0
],
str
):
predictions
=
[[
p
]
for
p
in
predictions
]
print
(
f
"
{
references
=
}
"
)
print
(
f
"
{
predictions
=
}
"
)
print
(
f
"
{
k
=
}
"
)
res
=
compute_
.
compute
(
res
=
compute_
.
compute
(
references
=
references
,
references
=
references
,
predictions
=
predictions
,
predictions
=
predictions
,
k
=
k
,
k
=
k
,
)
)
return
res
[
0
]
return
res
[
0
]
[
f
"pass@
{
str
(
k
[
0
])
}
"
]
def
extract_python_block
(
text
:
str
)
->
str
:
def
extract_python_block
(
text
:
str
)
->
str
:
...
@@ -51,8 +60,20 @@ def extract_code_blocks(text: str) -> str:
...
@@ -51,8 +60,20 @@ def extract_code_blocks(text: str) -> str:
return
ignore_annotations
+
matches
[
0
]
return
ignore_annotations
+
matches
[
0
]
def
doc_to_text
(
doc
:
dict
)
->
str
:
text
=
(
doc
[
"text"
]
+
"
\n
"
+
doc
[
"code"
].
split
(
":"
)[
0
]
+
":"
+
"
\n
"
+
"Here is the completed function:
\n\n
```python
\n
"
)
return
text
def
build_predictions
(
resps
:
list
[
list
[
str
]],
docs
:
list
[
dict
])
->
list
[
list
[
str
]]:
def
build_predictions
(
resps
:
list
[
list
[
str
]],
docs
:
list
[
dict
])
->
list
[
list
[
str
]]:
return
[[
extract_
code
_block
s
(
r
)
for
r
in
resp
]
for
resp
in
resps
]
return
[[
extract_
python
_block
(
r
)
for
r
in
resp
]
for
resp
in
resps
]
def
list_fewshot_samples
():
def
list_fewshot_samples
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment