Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0601d0bb
Commit
0601d0bb
authored
Jan 30, 2021
by
Anthony DiPofi
Browse files
add evaluation for TriviaQA dataset based on loglikelihood
parent
0f536808
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
16 additions
and
41 deletions
+16
-41
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/lambada.py
lm_eval/tasks/lambada.py
+0
-1
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+14
-39
lm_eval/utils_stream.py
lm_eval/utils_stream.py
+0
-1
No files found.
lm_eval/tasks/__init__.py
View file @
0601d0bb
...
...
@@ -15,6 +15,7 @@ from . import sat
from
.
import
arithmetic
from
.
import
lambada
from
.
import
piqa
from
.
import
triviaqa
TASK_REGISTRY
=
{
# GLUE
...
...
@@ -42,6 +43,7 @@ TASK_REGISTRY = {
"lambada"
:
lambada
.
LAMBADA
,
"piqa"
:
piqa
.
PiQA
,
"triviaqa"
:
triviaqa
.
TriviaQA
,
# "arc_easy": arc.ARCEasy, # not implemented yet
# "arc_challenge": arc.ARCChallenge, # not implemented yet
# "quac": quac.QuAC, # not implemented yet
...
...
lm_eval/tasks/lambada.py
View file @
0601d0bb
...
...
@@ -2,7 +2,6 @@ from lm_eval.base import Dataset, rf, mean
from
lm_eval.utils
import
sh
import
json
import
requests
import
ftfy
import
math
from
best_download
import
download_file
...
...
lm_eval/tasks/triviaqa.py
View file @
0601d0bb
import
os
import
json
import
random
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Dataset
,
mean
,
rf
from
..utils
import
sh
class
TriviaQA
(
Dataset
):
...
...
@@ -37,52 +37,27 @@ class TriviaQA(Dataset):
return
""
def
doc_to_text
(
self
,
doc
):
return
''
.
join
([
'Q:
'
,
doc
[
'Question'
],
'
\n\n
'
,
'A:
'
])
return
''
.
join
([
'Q:'
,
doc
[
'Question'
],
'
\n\n
'
,
'A:'
])
def
doc_to_target
(
self
,
doc
):
return
doc
[
'Answer'
][
'Aliases'
][
0
]
def
construct_requests
(
self
,
doc
,
ctx
):
""" Uses RequestFactory to construct Requests and returns an iterable of
Requests which will be sent to the LM.
ll
,
is_prediction
=
rf
.
loglikelihood
(
ctx
,
doc
[
'Answer'
][
'Value'
])
return
is_prediction
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param ctx: str
The context string, generated by fewshot_context. This includes the natural
language description, as well as the few shot examples, and the question
part of the document for `doc`.
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
def
process_results
(
self
,
doc
,
results
):
"""Take a single document and the LM results and evaluates, returning a
dict where keys are the names of submetrics and values are the values of
the metric for that one document
:param doc:
The document as returned from training_docs, validation_docs, or test_docs.
:param results:
The results of the requests created in construct_requests.
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
is_prediction
=
results
return
{
"acc"
:
float
(
is_prediction
[
1
])
}
def
aggregation
(
self
):
"""
:returns: {str: [float] -> float}
A dictionary where keys are the names of submetrics and values are
functions that aggregate a list of metrics
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
return
{
"acc"
:
mean
,
}
def
higher_is_better
(
self
):
"""
:returns: {str: bool}
A dictionary where keys are the names of submetrics and values are
whether a higher value of the submetric is better
"""
# TODO: implement evaluation.
raise
NotImplementedError
(
'Evaluation not implemented'
)
\ No newline at end of file
return
{
"acc"
:
True
}
\ No newline at end of file
lm_eval/utils_stream.py
View file @
0601d0bb
import
os
from
functools
import
reduce
import
operator
import
lm_dataformat
as
lmd
from
tqdm
import
tqdm
import
json
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment