Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e5161a6d
Commit
e5161a6d
authored
Jul 14, 2023
by
lintangsutawika
Browse files
changes
parent
57f08e40
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
33 additions
and
53 deletions
+33
-53
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+24
-15
lm_eval/tasks/benchmarks/pythia.yaml
lm_eval/tasks/benchmarks/pythia.yaml
+3
-3
lm_eval/tasks/winogrande/default.yaml
lm_eval/tasks/winogrande/default.yaml
+6
-7
lm_eval/tasks/winogrande/preprocess.py
lm_eval/tasks/winogrande/preprocess.py
+0
-15
lm_eval/tasks/winogrande/winogrande.yaml
lm_eval/tasks/winogrande/winogrande.yaml
+0
-13
No files found.
lm_eval/tasks/__init__.py
View file @
e5161a6d
...
@@ -61,6 +61,7 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
...
@@ -61,6 +61,7 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
if
(
subdirs
==
[]
or
subdirs
==
[
"__pycache__"
])
and
(
len
(
file_list
)
>
0
):
if
(
subdirs
==
[]
or
subdirs
==
[
"__pycache__"
])
and
(
len
(
file_list
)
>
0
):
for
f
in
file_list
:
for
f
in
file_list
:
if
f
.
endswith
(
".yaml"
):
if
f
.
endswith
(
".yaml"
):
try
:
benchmark_path
=
os
.
path
.
join
(
root
,
f
)
benchmark_path
=
os
.
path
.
join
(
root
,
f
)
with
open
(
benchmark_path
,
"rb"
)
as
file
:
with
open
(
benchmark_path
,
"rb"
)
as
file
:
...
@@ -76,6 +77,14 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
...
@@ -76,6 +77,14 @@ def include_benchmarks(task_dir, benchmark_dir="benchmarks"):
GROUP_REGISTRY
[
group
].
append
(
task
)
GROUP_REGISTRY
[
group
].
append
(
task
)
else
:
else
:
GROUP_REGISTRY
[
group
]
=
[
task
]
GROUP_REGISTRY
[
group
]
=
[
task
]
ALL_TASKS
.
add
(
group
)
except
Exception
as
error
:
eval_logger
.
warning
(
"Failed to load benchmark in
\n
"
f
"
{
benchmark_path
}
\n
"
" Benchmark will not be added to registry
\n
"
f
" Error:
{
error
}
"
)
task_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
task_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
+
"/"
...
...
lm_eval/tasks/benchmarks/pythia.yaml
View file @
e5161a6d
...
@@ -8,6 +8,6 @@ task:
...
@@ -8,6 +8,6 @@ task:
-
winogrande
-
winogrande
-
arc_challenge
-
arc_challenge
-
arc_easy
-
arc_easy
-
logiqa
#
- logiqa
-
blimp_*
#
- blimp_*
-
hendrycksTest*
#
- hendrycksTest*
lm_eval/tasks/winogrande/default.yaml
View file @
e5161a6d
group
:
-
super-glue-lm-eval-v1
task
:
winogrande
task
:
winogrande
dataset_path
:
winogrande
dataset_path
:
winogrande
dataset_name
:
winogrande_xl
dataset_name
:
winogrande_xl
output_type
:
multiple_choice
output_type
:
multiple_choice
should_decontaminate
:
true
doc_to_decontamination_query
:
"
{{sentence}}"
training_split
:
train
training_split
:
train
validation_split
:
validation
validation_split
:
validation
doc_to_text
:
!function
preprocess_winogrande.doc_to_text
doc_to_target
:
!function
preprocess_winogrande.doc_to_target
doc_to_choice
:
!function
preprocess_winogrande.doc_to_choice
should_decontaminate
:
true
doc_to_decontamination_query
:
sentence
metric_list
:
metric_list
:
-
metric
:
exact_match
-
metric
:
acc
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
true
ignore_punctuation
:
true
lm_eval/tasks/winogrande/preprocess.py
deleted
100644 → 0
View file @
57f08e40
import
re
from
lm_eval.utils
import
general_detokenize
def
partial_context
(
doc
,
option
):
# Substitute the pronoun in the sentence with the specified option
# and ignore everything after.
pronoun_loc
=
doc
[
"sentence"
].
index
(
"_"
)
return
doc
[
"sentence"
][:
pronoun_loc
]
+
option
def
partial_target
(
doc
):
# The target is everything after the document specified pronoun.
pronoun_loc
=
doc
[
"sentence"
].
index
(
"_"
)
+
1
return
" "
+
doc
[
"sentence"
][
pronoun_loc
:].
strip
()
lm_eval/tasks/winogrande/winogrande.yaml
deleted
100644 → 0
View file @
57f08e40
task
:
winogrande
dataset_path
:
winogrande
dataset_name
:
winogrande_xl
output_type
:
multiple_choice
training_split
:
train
validation_split
:
validation
doc_to_text
:
!function
preprocess_winogrande.doc_to_text
doc_to_target
:
!function
preprocess_winogrande.doc_to_target
doc_to_choice
:
!function
preprocess_winogrande.doc_to_choice
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment