Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
98c5411c
Commit
98c5411c
authored
Jul 18, 2023
by
haileyschoelkopf
Browse files
remove template_aliases from existing tasks
parent
2bd45fd9
Changes
23
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
34 additions
and
144 deletions
+34
-144
lm_eval/api/task.py
lm_eval/api/task.py
+24
-14
lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
+0
-1
lm_eval/tasks/arithmetic/arithmetic_2da.yaml
lm_eval/tasks/arithmetic/arithmetic_2da.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_3da.yaml
lm_eval/tasks/arithmetic/arithmetic_3da.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_4da.yaml
lm_eval/tasks/arithmetic/arithmetic_4da.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_5da.yaml
lm_eval/tasks/arithmetic/arithmetic_5da.yaml
+1
-13
lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
+1
-13
lm_eval/tasks/hendrycks_ethics/commonsense.yaml
lm_eval/tasks/hendrycks_ethics/commonsense.yaml
+1
-1
lm_eval/tasks/hendrycks_ethics/deontology.yaml
lm_eval/tasks/hendrycks_ethics/deontology.yaml
+0
-1
lm_eval/tasks/hendrycks_ethics/justice.yaml
lm_eval/tasks/hendrycks_ethics/justice.yaml
+0
-1
lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
+0
-4
lm_eval/tasks/lambada/lambada_openai.yaml
lm_eval/tasks/lambada/lambada_openai.yaml
+0
-1
lm_eval/tasks/lambada/lambada_standard.yaml
lm_eval/tasks/lambada/lambada_standard.yaml
+0
-1
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
+0
-1
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
+0
-1
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
+0
-1
No files found.
lm_eval/api/task.py
View file @
98c5411c
...
@@ -65,7 +65,7 @@ class TaskConfig(dict):
...
@@ -65,7 +65,7 @@ class TaskConfig(dict):
fewshot_split
:
str
=
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
fewshot_split
:
str
=
None
# TODO: assert that this not None if num_fewshot > 0. (?) assert if this is same split as one evaling (?)
# formatting / prompting options.
# formatting / prompting options.
# see docs/advanced_task_guide.md for more info
# see docs/advanced_task_guide.md for more info
template_aliases
:
Union
[
str
,
list
]
=
None
process_docs
:
Callable
=
None
doc_to_text
:
Union
[
Callable
,
str
]
=
None
doc_to_text
:
Union
[
Callable
,
str
]
=
None
doc_to_target
:
Union
[
Callable
,
str
]
=
None
doc_to_target
:
Union
[
Callable
,
str
]
=
None
doc_to_choice
:
Union
[
Callable
,
str
,
dict
,
list
]
=
None
doc_to_choice
:
Union
[
Callable
,
str
,
dict
,
list
]
=
None
...
@@ -91,15 +91,15 @@ class TaskConfig(dict):
...
@@ -91,15 +91,15 @@ class TaskConfig(dict):
# allow user-specified aliases so that users can
# allow user-specified aliases so that users can
# force prompt-compatibility for some prompt regardless of
# force prompt-compatibility for some prompt regardless of
# field names in prompt
# field names in prompt
if
self
.
template_aliases
:
#
if self.template_aliases:
if
type
(
self
.
doc_to_text
)
==
str
:
#
if type(self.doc_to_text) == str:
self
.
doc_to_text
=
self
.
template_aliases
+
self
.
doc_to_text
#
self.doc_to_text = self.template_aliases + self.doc_to_text
if
type
(
self
.
doc_to_target
)
==
str
:
#
if type(self.doc_to_target) == str:
self
.
doc_to_target
=
self
.
template_aliases
+
self
.
doc_to_target
#
self.doc_to_target = self.template_aliases + self.doc_to_target
if
type
(
self
.
gold_alias
)
==
str
:
#
if type(self.gold_alias) == str:
self
.
gold_alias
=
self
.
template_aliases
+
self
.
gold_alias
#
self.gold_alias = self.template_aliases + self.gold_alias
if
self
.
generation_kwargs
is
not
None
:
if
self
.
generation_kwargs
is
not
None
:
if
self
.
output_type
!=
"greedy_until"
:
if
self
.
output_type
!=
"greedy_until"
:
...
@@ -619,9 +619,9 @@ class ConfigurableTask(Task):
...
@@ -619,9 +619,9 @@ class ConfigurableTask(Task):
list
(
self
.
fewshot_docs
()),
self
,
rnd
=
random
.
Random
(
1234
)
list
(
self
.
fewshot_docs
()),
self
,
rnd
=
random
.
Random
(
1234
)
)
)
if
self
.
_config
.
template_aliases
is
not
None
:
#
if self._config.template_aliases is not None:
for
key
,
alias
in
self
.
_config
.
template_aliases
:
#
for key, alias in self._config.template_aliases:
self
.
dataset
.
rename_column
(
key
,
alias
)
#
self.dataset.rename_column(key, alias)
if
self
.
has_test_docs
():
if
self
.
has_test_docs
():
docs
=
self
.
test_docs
()
docs
=
self
.
test_docs
()
...
@@ -680,15 +680,25 @@ class ConfigurableTask(Task):
...
@@ -680,15 +680,25 @@ class ConfigurableTask(Task):
return
False
return
False
def
training_docs
(
self
):
def
training_docs
(
self
):
if
self
.
_config
.
training_split
is
not
None
:
if
self
.
has_training_docs
():
if
self
.
_config
.
process_docs
:
return
self
.
_config
.
process_docs
(
self
.
dataset
[
self
.
_config
.
training_split
]
)
return
self
.
dataset
[
self
.
_config
.
training_split
]
return
self
.
dataset
[
self
.
_config
.
training_split
]
def
validation_docs
(
self
):
def
validation_docs
(
self
):
if
self
.
_config
.
validation_split
is
not
None
:
if
self
.
has_validation_docs
():
if
self
.
_config
.
process_docs
:
return
self
.
_config
.
process_docs
(
self
.
dataset
[
self
.
_config
.
validation_split
]
)
return
self
.
dataset
[
self
.
_config
.
validation_split
]
return
self
.
dataset
[
self
.
_config
.
validation_split
]
def
test_docs
(
self
):
def
test_docs
(
self
):
if
self
.
_config
.
test_split
is
not
None
:
if
self
.
has_test_docs
():
if
self
.
_config
.
process_docs
:
return
self
.
_config
.
process_docs
(
self
.
dataset
[
self
.
_config
.
test_split
])
return
self
.
dataset
[
self
.
_config
.
test_split
]
return
self
.
dataset
[
self
.
_config
.
test_split
]
def
fewshot_docs
(
self
):
def
fewshot_docs
(
self
):
...
...
lm_eval/tasks/arithmetic/arithmetic_1dc.yaml
View file @
98c5411c
...
@@ -6,7 +6,6 @@ dataset_name: arithmetic_1dc
...
@@ -6,7 +6,6 @@ dataset_name: arithmetic_1dc
output_type
:
loglikelihood
output_type
:
loglikelihood
validation_split
:
validation
validation_split
:
validation
test_split
:
null
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
metric_list
:
...
...
lm_eval/tasks/arithmetic/arithmetic_2da.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_2da
task
:
arithmetic_2da
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_2da
dataset_name
:
arithmetic_2da
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_2dm.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_2dm
task
:
arithmetic_2dm
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_2dm
dataset_name
:
arithmetic_2dm
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_2ds.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_2ds
task
:
arithmetic_2ds
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_2ds
dataset_name
:
arithmetic_2ds
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_3da.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_3da
task
:
arithmetic_3da
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_3da
dataset_name
:
arithmetic_3da
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_3ds.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_3ds
task
:
arithmetic_3ds
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_3ds
dataset_name
:
arithmetic_3ds
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_4da.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_4da
task
:
arithmetic_4da
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_4da
dataset_name
:
arithmetic_4da
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_4ds.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_4ds
task
:
arithmetic_4ds
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_4ds
dataset_name
:
arithmetic_4ds
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_5da.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_5da
task
:
arithmetic_5da
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_5da
dataset_name
:
arithmetic_5da
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/arithmetic/arithmetic_5ds.yaml
View file @
98c5411c
group
:
include
:
arithmetic_1dc.yaml
-
arithmetic
task
:
arithmetic_5ds
task
:
arithmetic_5ds
dataset_path
:
EleutherAI/arithmetic
dataset_name
:
arithmetic_5ds
dataset_name
:
arithmetic_5ds
output_type
:
loglikelihood
validation_split
:
validation
test_split
:
null
template_aliases
:
"
"
doc_to_text
:
"
{{context}}"
doc_to_target
:
"
{{completion}}"
metric_list
:
-
metric
:
acc
aggregation
:
mean
higher_is_better
:
true
lm_eval/tasks/hendrycks_ethics/commonsense.yaml
View file @
98c5411c
group
:
group
:
-
hendrycks_ethics
-
hendrycks_ethics
task
:
ethics_cm
task
:
ethics_cm
dataset_path
:
hails
/hendrycks_ethics
dataset_path
:
EleutherAI
/hendrycks_ethics
dataset_name
:
commonsense
dataset_name
:
commonsense
output_type
:
multiple_choice
output_type
:
multiple_choice
training_split
:
train
training_split
:
train
...
...
lm_eval/tasks/hendrycks_ethics/deontology.yaml
View file @
98c5411c
include
:
commonsense.yaml
include
:
commonsense.yaml
task
:
ethics_deontology
task
:
ethics_deontology
dataset_path
:
hails/hendrycks_ethics
dataset_name
:
deontology
dataset_name
:
deontology
doc_to_text
:
"
Question:
Would
most
people
believe
this
reasonable
or
unreasonable
to
say?
\"
{{scenario}}
{{excuse.rstrip()}}
\"\n
Answer:"
doc_to_text
:
"
Question:
Would
most
people
believe
this
reasonable
or
unreasonable
to
say?
\"
{{scenario}}
{{excuse.rstrip()}}
\"\n
Answer:"
doc_to_target
:
label
doc_to_target
:
label
...
...
lm_eval/tasks/hendrycks_ethics/justice.yaml
View file @
98c5411c
...
@@ -3,6 +3,5 @@ group:
...
@@ -3,6 +3,5 @@ group:
-
hendrycks_ethics
-
hendrycks_ethics
task
:
ethics_justice
task
:
ethics_justice
dataset_name
:
justice
dataset_name
:
justice
output_type
:
multiple_choice
doc_to_text
:
"
Question:
Would
most
people
believe
this
reasonable
or
unreasonable
to
say?
\"
{{scenario}}
\"\n
Answer:"
doc_to_text
:
"
Question:
Would
most
people
believe
this
reasonable
or
unreasonable
to
say?
\"
{{scenario}}
\"\n
Answer:"
# TODO: impl. exact match for this and deontology
# TODO: impl. exact match for this and deontology
lm_eval/tasks/hendrycks_ethics/utilitarianism.yaml
View file @
98c5411c
...
@@ -2,11 +2,7 @@ include: commonsense.yaml
...
@@ -2,11 +2,7 @@ include: commonsense.yaml
group
:
group
:
-
hendrycks_ethics
-
hendrycks_ethics
task
:
ethics_utilitarianism
task
:
ethics_utilitarianism
dataset_path
:
hails/hendrycks_ethics
dataset_name
:
utilitarianism
dataset_name
:
utilitarianism
output_type
:
multiple_choice
training_split
:
train
test_split
:
test
doc_to_text
:
!function
utils.doc_to_text
doc_to_text
:
!function
utils.doc_to_text
doc_to_target
:
!function
utils.doc_to_target
doc_to_target
:
!function
utils.doc_to_target
doc_to_choice
:
[
'
no'
,
'
yes'
]
doc_to_choice
:
[
'
no'
,
'
yes'
]
...
...
lm_eval/tasks/lambada/lambada_openai.yaml
View file @
98c5411c
...
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
...
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name
:
default
dataset_name
:
default
output_type
:
loglikelihood
output_type
:
loglikelihood
test_split
:
test
test_split
:
test
template_aliases
:
"
"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
should_decontaminate
:
true
should_decontaminate
:
true
...
...
lm_eval/tasks/lambada/lambada_standard.yaml
View file @
98c5411c
...
@@ -8,7 +8,6 @@ dataset_name: null
...
@@ -8,7 +8,6 @@ dataset_name: null
output_type
:
loglikelihood
output_type
:
loglikelihood
validation_split
:
validation
validation_split
:
validation
test_split
:
test
test_split
:
test
template_aliases
:
"
"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
should_decontaminate
:
true
should_decontaminate
:
true
...
...
lm_eval/tasks/lambada_cloze/lambada_openai_cloze.yaml
View file @
98c5411c
...
@@ -6,7 +6,6 @@ dataset_path: EleutherAI/lambada_openai
...
@@ -6,7 +6,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name
:
default
dataset_name
:
default
output_type
:
loglikelihood
output_type
:
loglikelihood
test_split
:
test
test_split
:
test
template_aliases
:
"
"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}
____.
->"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}
____.
->"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
should_decontaminate
:
true
should_decontaminate
:
true
...
...
lm_eval/tasks/lambada_cloze/lambada_standard_cloze.yaml
View file @
98c5411c
...
@@ -7,7 +7,6 @@ dataset_name: null
...
@@ -7,7 +7,6 @@ dataset_name: null
output_type
:
loglikelihood
output_type
:
loglikelihood
validation_split
:
validation
validation_split
:
validation
test_split
:
test
test_split
:
test
template_aliases
:
"
"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}
____.
->"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}
____.
->"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
should_decontaminate
:
true
should_decontaminate
:
true
...
...
lm_eval/tasks/lambada_multilingual/lambada_mt_en.yaml
View file @
98c5411c
...
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
...
@@ -7,7 +7,6 @@ dataset_path: EleutherAI/lambada_openai
dataset_name
:
en
dataset_name
:
en
output_type
:
loglikelihood
output_type
:
loglikelihood
test_split
:
test
test_split
:
test
template_aliases
:
"
"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_text
:
"
{{text.split('
')[:-1]|join('
')}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
doc_to_target
:
"
{{'
'+text.split('
')[-1]}}"
should_decontaminate
:
true
should_decontaminate
:
true
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment