Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
6738b241
Commit
6738b241
authored
Jan 31, 2021
by
thefazzer
Browse files
Merge branch 'master' into fazz/refactor-task-coqa
parents
47384df7
6598967b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
27 additions
and
19 deletions
+27
-19
lm_eval/base.py
lm_eval/base.py
+3
-6
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-3
lm_eval/tasks/anli.py
lm_eval/tasks/anli.py
+21
-10
No files found.
lm_eval/base.py
View file @
6738b241
...
...
@@ -82,22 +82,19 @@ class Dataset(abc.ABC):
"""Whether the task has a test set"""
pass
@
abc
.
abstractmethod
def
training_docs
(
self
):
"""
:return: Iterable[obj]
A iterable of any object, that doc_to_text can handle
"""
pass
return
[]
@
abc
.
abstractmethod
def
validation_docs
(
self
):
pass
return
[]
@
abc
.
abstractmethod
def
test_docs
(
self
):
pass
return
[]
def
fewshot_examples
(
self
,
k
):
if
self
.
_traindocs
is
None
:
...
...
lm_eval/tasks/__init__.py
View file @
6738b241
...
...
@@ -59,9 +59,9 @@ TASK_REGISTRY = {
# "webqs": webqs.WebQs, # not implemented yet
# "wsc273": wsc273.WinogradSchemaChallenge273, # not implemented yet
# "winogrande": winogrande.Winogrande, # not implemented yet
#
"anli_r1": anli.ANLIRound1,
# not implemented yet
#
"anli_r2": anli.ANLIRound2,
# not implemented yet
#
"anli_r3": anli.ANLIRound3,
# not implemented yet
"anli_r1"
:
anli
.
ANLIRound1
,
"anli_r2"
:
anli
.
ANLIRound2
,
"anli_r3"
:
anli
.
ANLIRound3
,
# arithmetic
"arithmetic_2da"
:
arithmetic
.
Arithmetic2DPlus
,
"arithmetic_2ds"
:
arithmetic
.
Arithmetic2DMinus
,
...
...
lm_eval/tasks/anli.py
View file @
6738b241
import
numpy
as
np
from
lm_eval.base
import
rf
,
mean
from
.
common
import
HFTask
class
ANLIBase
(
HFTask
):
...
...
@@ -33,7 +35,6 @@ class ANLIBase(HFTask):
return
""
def
doc_to_text
(
self
,
doc
):
print
(
doc
)
# OA does this a bit weirdly: they prepend "anli 1: anli 1: " to the beginning
# of the prompt (yes, repeating it!). also, " True, False, or Neither?" is directly
# appended onto the question, with no "Answer:" or even a newline. Do we *really*
...
...
@@ -41,6 +42,9 @@ class ANLIBase(HFTask):
return
doc
[
'premise'
]
+
'
\n
Question: '
+
doc
[
'hypothesis'
]
+
'
\n
True, False, or Neither?'
def
doc_to_target
(
self
,
doc
):
# True = entailment
# False = contradiction
# Neither = neutral
return
" "
+
[
"True"
,
"Neither"
,
"False"
][
doc
[
'label'
]]
def
construct_requests
(
self
,
doc
,
ctx
):
...
...
@@ -54,8 +58,10 @@ class ANLIBase(HFTask):
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
ll_true
,
_
=
rf
.
loglikelihood
(
ctx
,
" True"
)
ll_neither
,
_
=
rf
.
loglikelihood
(
ctx
,
" Neither"
)
ll_false
,
_
=
rf
.
loglikelihood
(
ctx
,
" False"
)
return
ll_true
,
ll_neither
,
ll_false
def
process_results
(
self
,
doc
,
results
):
"""Take a single document and the LM results and evaluates, returning a
...
...
@@ -67,8 +73,11 @@ class ANLIBase(HFTask):
:param results:
The results of the requests created in construct_requests.
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
gold
=
doc
[
"label"
]
pred
=
np
.
argmax
(
results
)
return
{
"acc"
:
pred
==
gold
}
def
aggregation
(
self
):
"""
...
...
@@ -76,8 +85,9 @@ class ANLIBase(HFTask):
A dictionary where keys are the names of submetrics and values are
functions that aggregate a list of metrics
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
return
{
"acc"
:
mean
}
def
higher_is_better
(
self
):
"""
...
...
@@ -85,8 +95,9 @@ class ANLIBase(HFTask):
A dictionary where keys are the names of submetrics and values are
whether a higher value of the submetric is better
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
return
{
"acc"
:
True
}
class
ANLIRound1
(
ANLIBase
):
SPLIT
=
1
...
...
@@ -95,4 +106,4 @@ class ANLIRound2(ANLIBase):
SPLIT
=
2
class
ANLIRound3
(
ANLIBase
):
SPLIT
=
3
\ No newline at end of file
SPLIT
=
3
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment