Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2b40017b
Commit
2b40017b
authored
Jan 15, 2024
by
haileyschoelkopf
Browse files
Merge branch 'main' into add-chat-templating
parents
bbcdffb8
ff739414
Changes
180
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
77 additions
and
16 deletions
+77
-16
lm_eval/tasks/code_x_glue/code-text/java.yaml
lm_eval/tasks/code_x_glue/code-text/java.yaml
+1
-1
lm_eval/tasks/code_x_glue/code-text/javascript.yaml
lm_eval/tasks/code_x_glue/code-text/javascript.yaml
+1
-1
lm_eval/tasks/code_x_glue/code-text/php.yaml
lm_eval/tasks/code_x_glue/code-text/php.yaml
+1
-1
lm_eval/tasks/code_x_glue/code-text/python.yaml
lm_eval/tasks/code_x_glue/code-text/python.yaml
+1
-1
lm_eval/tasks/code_x_glue/code-text/ruby.yaml
lm_eval/tasks/code_x_glue/code-text/ruby.yaml
+1
-1
lm_eval/tasks/coqa/default.yaml
lm_eval/tasks/coqa/default.yaml
+1
-1
lm_eval/tasks/drop/default.yaml
lm_eval/tasks/drop/default.yaml
+1
-1
lm_eval/tasks/fld/fld_default.yaml
lm_eval/tasks/fld/fld_default.yaml
+1
-1
lm_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml
lm_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml
+1
-1
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
+1
-1
lm_eval/tasks/gsm8k/gsm8k.yaml
lm_eval/tasks/gsm8k/gsm8k.yaml
+1
-1
lm_eval/tasks/ifeval/ifeval.yaml
lm_eval/tasks/ifeval/ifeval.yaml
+1
-1
lm_eval/tasks/medmcqa/medmcqa.yaml
lm_eval/tasks/medmcqa/medmcqa.yaml
+18
-0
lm_eval/tasks/medmcqa/utils_medmcqa.py
lm_eval/tasks/medmcqa/utils_medmcqa.py
+19
-0
lm_eval/tasks/medqa/medqa.yaml
lm_eval/tasks/medqa/medqa.yaml
+16
-0
lm_eval/tasks/medqa/preprocess_medqa.py
lm_eval/tasks/medqa/preprocess_medqa.py
+8
-0
lm_eval/tasks/mgsm/direct/direct_yaml
lm_eval/tasks/mgsm/direct/direct_yaml
+1
-1
lm_eval/tasks/mgsm/en_cot/cot_yaml
lm_eval/tasks/mgsm/en_cot/cot_yaml
+1
-1
lm_eval/tasks/mgsm/native_cot/cot_yaml
lm_eval/tasks/mgsm/native_cot/cot_yaml
+1
-1
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+1
-1
No files found.
lm_eval/tasks/code_x_glue/code-text/java.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
True
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/code_x_glue/code-text/javascript.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
True
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/code_x_glue/code-text/php.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
True
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/code_x_glue/code-text/python.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
True
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/code_x_glue/code-text/ruby.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
True
metadata
:
version
:
2
.0
version
:
3
.0
lm_eval/tasks/coqa/default.yaml
View file @
2b40017b
...
...
@@ -19,4 +19,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
version
:
2
.0
version
:
3
.0
lm_eval/tasks/drop/default.yaml
View file @
2b40017b
...
...
@@ -21,4 +21,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
version
:
2
.0
version
:
3
.0
lm_eval/tasks/fld/fld_default.yaml
View file @
2b40017b
...
...
@@ -18,4 +18,4 @@ filter_list:
-
function
:
remove_whitespace
-
function
:
take_first
metadata
:
version
:
1
.0
version
:
2
.0
lm_eval/tasks/gsm8k/gsm8k-cot-self-consistency.yaml
View file @
2b40017b
...
...
@@ -31,4 +31,4 @@ filter_list:
-
function
:
"
majority_vote"
-
function
:
"
take_first"
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
View file @
2b40017b
...
...
@@ -41,4 +41,4 @@ filter_list:
regex_pattern
:
"
The
answer
is
(
\\
-?[0-9
\\
.
\\
,]+)."
-
function
:
"
take_first"
metadata
:
version
:
0
.0
version
:
1
.0
lm_eval/tasks/gsm8k/gsm8k.yaml
View file @
2b40017b
...
...
@@ -34,4 +34,4 @@ filter_list:
regex_pattern
:
"
####
(
\\
-?[0-9
\\
.
\\
,]+)"
-
function
:
"
take_first"
metadata
:
version
:
1
.0
version
:
2
.0
lm_eval/tasks/ifeval/ifeval.yaml
View file @
2b40017b
...
...
@@ -26,4 +26,4 @@ metric_list:
aggregation
:
!function
utils.agg_inst_level_acc
higher_is_better
:
true
metadata
:
version
:
1
.0
version
:
2
.0
lm_eval/tasks/medmcqa/medmcqa.yaml
0 → 100644
View file @
2b40017b
task
:
medmcqa
dataset_path
:
medmcqa
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
test_split
:
validation
doc_to_text
:
!function
utils_medmcqa.doc_to_text
doc_to_target
:
cop
doc_to_choice
:
[
'
A'
,
'
B'
,
'
C'
,
'
D'
]
should_decontaminate
:
true
doc_to_decontamination_query
:
"
{{question}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
-
metric
:
acc_norm
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/medmcqa/utils_medmcqa.py
0 → 100644
View file @
2b40017b
# Copied from Master
def
doc_to_text
(
doc
)
->
str
:
"""
Question: <question>
Choices:
A. <choice1>
B. <choice2>
C. <choice3>
D. <choice4>
Answer:
"""
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
option_choices
=
{
'A'
:
choices
[
0
],
'B'
:
choices
[
1
],
'C'
:
choices
[
2
],
'D'
:
choices
[
3
]}
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
for
choice
,
option
in
option_choices
.
items
():
prompt
+=
f
"
{
choice
.
upper
()
}
.
{
option
}
\n
"
prompt
+=
"Answer:"
return
prompt
lm_eval/tasks/medqa/medqa.yaml
0 → 100644
View file @
2b40017b
task
:
medqa_4options
dataset_path
:
GBaker/MedQA-USMLE-4-options-hf
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
test_split
:
test
doc_to_text
:
!function
preprocess_medqa.doc_to_text
doc_to_target
:
!function
preprocess_medqa.doc_to_target
doc_to_choice
:
[
'
A'
,
'
B'
,
'
C'
,
'
D'
]
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
-
metric
:
acc_norm
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/medqa/preprocess_medqa.py
0 → 100644
View file @
2b40017b
def
doc_to_text
(
doc
)
->
str
:
option_choices
=
{
'A'
:
doc
[
"ending0"
],
'B'
:
doc
[
"ending1"
],
'C'
:
doc
[
"ending2"
],
'D'
:
doc
[
"ending3"
]}
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
def
doc_to_target
(
doc
)
->
int
:
return
doc
[
"label"
]
lm_eval/tasks/mgsm/direct/direct_yaml
View file @
2b40017b
...
...
@@ -26,4 +26,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
version:
0
.0
version:
1
.0
lm_eval/tasks/mgsm/en_cot/cot_yaml
View file @
2b40017b
...
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version:
0
.0
version:
1
.0
lm_eval/tasks/mgsm/native_cot/cot_yaml
View file @
2b40017b
...
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version:
1
.0
version:
2
.0
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
View file @
2b40017b
...
...
@@ -21,4 +21,4 @@ metric_list:
higher_is_better
:
true
num_fewshot
:
0
metadata
:
version
:
0
.0
version
:
1
.0
Prev
1
…
4
5
6
7
8
9
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment