Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0d1ef037
Commit
0d1ef037
authored
Jan 17, 2024
by
lintangsutawika
Browse files
solved merge conflict
parents
aa44be3f
ada4a31d
Changes
424
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
75 additions
and
18 deletions
+75
-18
lm_eval/tasks/lambada/lambada_standard.yaml
lm_eval/tasks/lambada/lambada_standard.yaml
+1
-1
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
+1
-1
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
+1
-1
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
+1
-1
lm_eval/tasks/logiqa/logiqa.yaml
lm_eval/tasks/logiqa/logiqa.yaml
+1
-1
lm_eval/tasks/logiqa2/logieval.yaml
lm_eval/tasks/logiqa2/logieval.yaml
+1
-1
lm_eval/tasks/logiqa2/logiqa2.yaml
lm_eval/tasks/logiqa2/logiqa2.yaml
+1
-1
lm_eval/tasks/mathqa/mathqa.yaml
lm_eval/tasks/mathqa/mathqa.yaml
+1
-1
lm_eval/tasks/mc_taco/default.yaml
lm_eval/tasks/mc_taco/default.yaml
+1
-1
lm_eval/tasks/medmcqa/medmcqa.yaml
lm_eval/tasks/medmcqa/medmcqa.yaml
+18
-0
lm_eval/tasks/medmcqa/utils_medmcqa.py
lm_eval/tasks/medmcqa/utils_medmcqa.py
+19
-0
lm_eval/tasks/medqa/medqa.yaml
lm_eval/tasks/medqa/medqa.yaml
+16
-0
lm_eval/tasks/medqa/preprocess_medqa.py
lm_eval/tasks/medqa/preprocess_medqa.py
+8
-0
lm_eval/tasks/mgsm/direct/direct_yaml
lm_eval/tasks/mgsm/direct/direct_yaml
+1
-1
lm_eval/tasks/mgsm/en_cot/cot_yaml
lm_eval/tasks/mgsm/en_cot/cot_yaml
+1
-1
lm_eval/tasks/mgsm/native_cot/cot_yaml
lm_eval/tasks/mgsm/native_cot/cot_yaml
+1
-1
lm_eval/tasks/mgsm/utils.py
lm_eval/tasks/mgsm/utils.py
+0
-1
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
+1
-1
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+0
-3
lm_eval/tasks/mmlu/default/_default_template_yaml
lm_eval/tasks/mmlu/default/_default_template_yaml
+1
-1
No files found.
lm_eval/tasks/lambada/lambada_standard.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
View file @
0d1ef037
...
...
@@ -17,4 +17,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
View file @
0d1ef037
...
...
@@ -17,4 +17,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/logiqa/logiqa.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/logiqa2/logieval.yaml
View file @
0d1ef037
...
...
@@ -24,4 +24,4 @@ filter_list:
regex_pattern
:
"
^
\\
s*([A-D])"
-
function
:
"
take_first"
metadata
:
-
version
:
0.0
version
:
0.0
lm_eval/tasks/logiqa2/logiqa2.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
0.0
version
:
0.0
lm_eval/tasks/mathqa/mathqa.yaml
View file @
0d1ef037
...
...
@@ -19,4 +19,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/mc_taco/default.yaml
View file @
0d1ef037
...
...
@@ -12,4 +12,4 @@ metric_list:
-
metric
:
acc
-
metric
:
f1
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/medmcqa/medmcqa.yaml
0 → 100644
View file @
0d1ef037
task
:
medmcqa
dataset_path
:
medmcqa
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
test_split
:
validation
doc_to_text
:
!function
utils_medmcqa.doc_to_text
doc_to_target
:
cop
doc_to_choice
:
[
'
A'
,
'
B'
,
'
C'
,
'
D'
]
should_decontaminate
:
true
doc_to_decontamination_query
:
"
{{question}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
-
metric
:
acc_norm
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/medmcqa/utils_medmcqa.py
0 → 100644
View file @
0d1ef037
# Copied from Master
def
doc_to_text
(
doc
)
->
str
:
"""
Question: <question>
Choices:
A. <choice1>
B. <choice2>
C. <choice3>
D. <choice4>
Answer:
"""
choices
=
[
doc
[
"opa"
],
doc
[
"opb"
],
doc
[
"opc"
],
doc
[
"opd"
]]
option_choices
=
{
'A'
:
choices
[
0
],
'B'
:
choices
[
1
],
'C'
:
choices
[
2
],
'D'
:
choices
[
3
]}
prompt
=
"Question: "
+
doc
[
"question"
]
+
"
\n
Choices:
\n
"
for
choice
,
option
in
option_choices
.
items
():
prompt
+=
f
"
{
choice
.
upper
()
}
.
{
option
}
\n
"
prompt
+=
"Answer:"
return
prompt
lm_eval/tasks/medqa/medqa.yaml
0 → 100644
View file @
0d1ef037
task
:
medqa_4options
dataset_path
:
GBaker/MedQA-USMLE-4-options-hf
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
test_split
:
test
doc_to_text
:
!function
preprocess_medqa.doc_to_text
doc_to_target
:
!function
preprocess_medqa.doc_to_target
doc_to_choice
:
[
'
A'
,
'
B'
,
'
C'
,
'
D'
]
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
-
metric
:
acc_norm
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/medqa/preprocess_medqa.py
0 → 100644
View file @
0d1ef037
def
doc_to_text
(
doc
)
->
str
:
option_choices
=
{
'A'
:
doc
[
"ending0"
],
'B'
:
doc
[
"ending1"
],
'C'
:
doc
[
"ending2"
],
'D'
:
doc
[
"ending3"
]}
answers
=
""
.
join
((
f
"
{
k
}
.
{
v
}
\n
"
)
for
k
,
v
in
option_choices
.
items
())
return
f
"Question:
{
doc
[
'sent1'
]
}
\n
{
answers
}
Answer:"
def
doc_to_target
(
doc
)
->
int
:
return
doc
[
"label"
]
lm_eval/tasks/mgsm/direct/direct_yaml
View file @
0d1ef037
...
...
@@ -26,4 +26,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
-
version:
0
.0
version:
1
.0
lm_eval/tasks/mgsm/en_cot/cot_yaml
View file @
0d1ef037
...
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
-
version:
0
.0
version:
1
.0
lm_eval/tasks/mgsm/native_cot/cot_yaml
View file @
0d1ef037
...
...
@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
-
version:
1
.0
version:
2
.0
lm_eval/tasks/mgsm/utils.py
View file @
0d1ef037
...
...
@@ -94,7 +94,6 @@ LANGUAGES = {
def
add_regex_pattern
(
regex_pattern
):
if
regex_pattern
is
None
:
return
{}
return
{
...
...
lm_eval/tasks/minerva_math/minerva_math_algebra.yaml
View file @
0d1ef037
...
...
@@ -21,4 +21,4 @@ metric_list:
higher_is_better
:
true
num_fewshot
:
0
metadata
:
-
version
:
0
.0
version
:
1
.0
lm_eval/tasks/mmlu/_generate_configs.py
View file @
0d1ef037
...
...
@@ -7,7 +7,6 @@ import argparse
from
tqdm
import
tqdm
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
SUBJECTS
=
{
...
...
@@ -82,7 +81,6 @@ def parse_args():
if
__name__
==
"__main__"
:
args
=
parse_args
()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
...
...
@@ -98,7 +96,6 @@ if __name__ == "__main__":
ALL_CATEGORIES
=
[]
for
subject
,
category
in
tqdm
(
SUBJECTS
.
items
()):
if
category
not
in
ALL_CATEGORIES
:
ALL_CATEGORIES
.
append
(
category
)
...
...
lm_eval/tasks/mmlu/default/_default_template_yaml
View file @
0d1ef037
...
...
@@ -12,4 +12,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
-
version: 0.0
version: 0.0
Prev
1
…
10
11
12
13
14
15
16
17
18
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment