Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
37ac5f46
Commit
37ac5f46
authored
Nov 03, 2023
by
haileyschoelkopf
Browse files
remove gold_alias from codebase
parent
c7b3f538
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
5 additions
and
35 deletions
+5
-35
lm_eval/api/task.py
lm_eval/api/task.py
+0
-21
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
+3
-2
lm_eval/tasks/gsm8k/gsm8k.yaml
lm_eval/tasks/gsm8k/gsm8k.yaml
+2
-2
lm_eval/tasks/hendrycks_ethics/utilitarianism_original_yaml
lm_eval/tasks/hendrycks_ethics/utilitarianism_original_yaml
+0
-1
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
+0
-9
No files found.
lm_eval/api/task.py
View file @
37ac5f46
...
@@ -69,7 +69,6 @@ class TaskConfig(dict):
...
@@ -69,7 +69,6 @@ class TaskConfig(dict):
doc_to_text
:
Union
[
Callable
,
str
]
=
None
doc_to_text
:
Union
[
Callable
,
str
]
=
None
doc_to_target
:
Union
[
Callable
,
str
]
=
None
doc_to_target
:
Union
[
Callable
,
str
]
=
None
doc_to_choice
:
Union
[
Callable
,
str
,
dict
,
list
]
=
None
doc_to_choice
:
Union
[
Callable
,
str
,
dict
,
list
]
=
None
gold_alias
:
Union
[
Callable
,
str
]
=
None
process_results
:
Union
[
Callable
,
str
]
=
None
process_results
:
Union
[
Callable
,
str
]
=
None
use_prompt
:
str
=
None
use_prompt
:
str
=
None
description
:
str
=
""
description
:
str
=
""
...
@@ -893,26 +892,6 @@ class ConfigurableTask(Task):
...
@@ -893,26 +892,6 @@ class ConfigurableTask(Task):
else
:
else
:
raise
TypeError
raise
TypeError
def
gold_alias
(
self
,
doc
):
# returns a version of the gold target answer to a document,
# which should be passed into metric for scoring as the ground truth.
# in multiple_choice tasks, this should be castable to an int corresponding to the index
# within the answer choices, while doc_to_target is the string version of {{answer_choices[gold]}}.
if
self
.
config
.
gold_alias
is
not
None
:
doc_to_target
=
self
.
config
.
gold_alias
else
:
return
self
.
doc_to_target
(
doc
)
if
type
(
doc_to_target
)
==
str
:
return
utils
.
apply_template
(
doc_to_target
,
doc
)
elif
callable
(
doc_to_target
):
return
doc_to_target
(
doc
)
elif
hasattr
(
doc_to_target
,
"apply"
):
return
doc_to_target
.
apply
(
doc
)[
1
]
else
:
raise
TypeError
def
construct_requests
(
def
construct_requests
(
self
,
doc
:
dict
,
ctx
:
str
,
**
kwargs
self
,
doc
:
dict
,
ctx
:
str
,
**
kwargs
)
->
Union
[
List
[
Instance
],
Instance
]:
)
->
Union
[
List
[
Instance
],
Instance
]:
...
...
lm_eval/tasks/gsm8k/gsm8k-cot.yaml
View file @
37ac5f46
...
@@ -14,17 +14,18 @@ Q: There were nine computers in the server room. Five more computers were instal
...
@@ -14,17 +14,18 @@ Q: There were nine computers in the server room. Five more computers were instal
Q:
Michael
had
58
golf
balls.
On
tuesday,
he
lost
23
golf
balls.
On
wednesday,
he
lost
2
more.
How
many
golf
balls
did
he
have
at
the
end
of
wednesday?
\n\n
A:
Michael
started
with
58
golf
balls.
After
losing
23
on
tuesday,
he
had
58
-
23
=
35.
After
losing
2
more,
he
had
35
-
2
=
33
golf
balls.
The
answer
is
33.
\n\n\
Q:
Michael
had
58
golf
balls.
On
tuesday,
he
lost
23
golf
balls.
On
wednesday,
he
lost
2
more.
How
many
golf
balls
did
he
have
at
the
end
of
wednesday?
\n\n
A:
Michael
started
with
58
golf
balls.
After
losing
23
on
tuesday,
he
had
58
-
23
=
35.
After
losing
2
more,
he
had
35
-
2
=
33
golf
balls.
The
answer
is
33.
\n\n\
Q:
Olivia
has
$23.
She
bought
five
bagels
for
$3
each.
How
much
money
does
she
have
left?
\n\n
A:
Olivia
had
23
dollars.
5
bagels
for
3
dollars
each
will
be
5
x
3
=
15
dollars.
So
she
has
23
-
15
dollars
left.
23
-
15
is
8.
The
answer
is
8.
\n\n\
Q:
Olivia
has
$23.
She
bought
five
bagels
for
$3
each.
How
much
money
does
she
have
left?
\n\n
A:
Olivia
had
23
dollars.
5
bagels
for
3
dollars
each
will
be
5
x
3
=
15
dollars.
So
she
has
23
-
15
dollars
left.
23
-
15
is
8.
The
answer
is
8.
\n\n\
Q:
{{question}}
\n\n
A:"
Q:
{{question}}
\n\n
A:"
doc_to_target
:
"
{{answer}}"
#" {{answer.split('### ')[-1].rstrip()}}"
doc_to_target
:
"
{{answer.split('###
')[-1].rstrip()}}"
gold_alias
:
"
{{answer.split('###
')[-1].rstrip()}}"
# this post-processes the reference that we'll score against
metric_list
:
metric_list
:
-
metric
:
exact_match
-
metric
:
exact_match
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
true
ignore_case
:
true
ignore_whitespace
:
true
ignore_punctuation
:
false
ignore_punctuation
:
false
regexes_to_ignore
:
regexes_to_ignore
:
-
"
,"
-
"
,"
-
"
\\
$"
-
"
\\
$"
-
"
.*###
"
generation_kwargs
:
generation_kwargs
:
until
:
until
:
-
"
Q:"
-
"
Q:"
...
...
lm_eval/tasks/gsm8k/gsm8k.yaml
View file @
37ac5f46
group
:
group
:
-
math_word_problems
-
math_word_problems
task
:
gsm8k
_yaml
task
:
gsm8k
dataset_path
:
gsm8k
dataset_path
:
gsm8k
dataset_name
:
main
dataset_name
:
main
output_type
:
generate_until
output_type
:
generate_until
...
@@ -9,12 +9,12 @@ fewshot_split: train
...
@@ -9,12 +9,12 @@ fewshot_split: train
test_split
:
test
test_split
:
test
doc_to_text
:
"
Question:
{{question}}
\n
Answer:"
doc_to_text
:
"
Question:
{{question}}
\n
Answer:"
doc_to_target
:
"
{{answer}}"
#" {{answer.split('### ')[-1].rstrip()}}"
doc_to_target
:
"
{{answer}}"
#" {{answer.split('### ')[-1].rstrip()}}"
gold_alias
:
"
{{answer.split('###
')[-1].rstrip()}}"
# this post-processes the reference that we'll score against
metric_list
:
metric_list
:
-
metric
:
exact_match
-
metric
:
exact_match
aggregation
:
mean
aggregation
:
mean
higher_is_better
:
true
higher_is_better
:
true
ignore_case
:
true
ignore_case
:
true
ignore_whitespace
:
true
ignore_punctuation
:
false
ignore_punctuation
:
false
regexes_to_ignore
:
regexes_to_ignore
:
-
"
,"
-
"
,"
...
...
lm_eval/tasks/hendrycks_ethics/utilitarianism_original_yaml
View file @
37ac5f46
...
@@ -9,7 +9,6 @@
...
@@ -9,7 +9,6 @@
# template_aliases: #"{% set answer_choices = range(1, 11)|list %}"
# template_aliases: #"{% set answer_choices = range(1, 11)|list %}"
# doc_to_text: 'Activity: "{{activity}}"\nRating:'
# doc_to_text: 'Activity: "{{activity}}"\nRating:'
# doc_to_target: "{{answer_choices[label]}}"
# doc_to_target: "{{answer_choices[label]}}"
# gold_alias: "{{label}}" # this will be cast to an int.
# metric_list:
# metric_list:
# - metric: acc
# - metric: acc
# TODO: we want this to be implemented as a winograd_schema task type, actually
# TODO: we want this to be implemented as a winograd_schema task type, actually
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
View file @
37ac5f46
...
@@ -3,12 +3,3 @@ def doc_to_text(doc) -> str:
...
@@ -3,12 +3,3 @@ def doc_to_text(doc) -> str:
return
"Abstract: {}
\n
Question: {}
\n
Answer:"
.
format
(
return
"Abstract: {}
\n
Question: {}
\n
Answer:"
.
format
(
ctxs
,
doc
[
"QUESTION"
],
doc
[
"final_decision"
]
ctxs
,
doc
[
"QUESTION"
],
doc
[
"final_decision"
]
)
)
def
doc_to_target
(
doc
)
->
str
:
return
" {}"
.
format
(
doc
[
"final_decision"
])
def
gold_alias
(
doc
):
dict_to_label
=
{
"yes"
:
0
,
"no"
:
1
,
"maybe"
:
2
}
return
dict_to_label
[
doc
[
"final_decision"
]]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment