Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
12880f1c
Commit
12880f1c
authored
Jan 31, 2021
by
thefazzer
Browse files
Initial skeleton refactoring
parent
66558b35
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
16 deletions
+26
-16
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+26
-16
No files found.
lm_eval/tasks/coqa.py
View file @
12880f1c
...
@@ -4,19 +4,23 @@ import json
...
@@ -4,19 +4,23 @@ import json
import
random
import
random
from
lm_eval.base
import
Dataset
from
lm_eval.base
import
Dataset
from
..utils
import
sh
from
..utils
import
sh
import
itertools
class
CoQA
(
Dataset
):
class
CoQA
(
Dataset
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
download
()
self
.
download
()
def
download
(
self
):
def
download
(
self
):
#TODO: don't download if files already there
sh
(
"""
sh
(
"""
mkdir -p data/coqa
mkdir -p data/coqa
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O data/coqa/coqa-train-v1.0.json
wget
--no-clobber
http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O data/coqa/coqa-train-v1.0.json
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json -O data/coqa/coqa-dev-v1.0.json
wget
--no-clobber
http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json -O data/coqa/coqa-dev-v1.0.json
"""
)
"""
)
@
classmethod
def
get_answers
(
cls
,
doc
,
turn_id
):
answers
=
zip
(
doc
[
"answers"
],
zip
(
doc
[
"additional_answers"
]))
return
answers
[
turn_id
-
1
]
def
has_training_docs
(
self
):
def
has_training_docs
(
self
):
return
True
return
True
...
@@ -36,16 +40,15 @@ class CoQA(Dataset):
...
@@ -36,16 +40,15 @@ class CoQA(Dataset):
pass
pass
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
# TODO: figure out description
return
"Given a passage and a conversation so far, answer the next question in the conversation."
return
""
def
doc_to_text
(
self
,
doc
):
def
doc_to_text
(
self
,
doc
):
# TODO: implement.
qa_pairs
=
[(
q
,
a
)
in
zip_longest
(
doc
[
"questions"
],
doc
[
"answers"
][:
-
1
])]
# truncate target answer
r
aise
NotImplementedError
(
'doc_to_text not implemented'
)
r
eturn
"{}
\n\n
{}"
.
format
(
doc
[
"story"
],
f
"Q:
{
q
}
"
+
'
\n\n
'
+
f
"A:
{
a
}
"
)
def
doc_to_target
(
self
,
doc
):
def
doc_to_target
(
self
,
doc
):
# TODO:
implement.
# TODO:
all distinct answers taking into account whitespace?
r
aise
NotImplementedError
(
'doc_to_target not implemented'
)
r
eturn
get_answers
(
doc
,
len
(
doc
[
"questions"
])
)
def
construct_requests
(
self
,
doc
,
ctx
):
def
construct_requests
(
self
,
doc
,
ctx
):
""" Uses RequestFactory to construct Requests and returns an iterable of
""" Uses RequestFactory to construct Requests and returns an iterable of
...
@@ -58,8 +61,11 @@ class CoQA(Dataset):
...
@@ -58,8 +61,11 @@ class CoQA(Dataset):
language description, as well as the few shot examples, and the question
language description, as well as the few shot examples, and the question
part of the document for `doc`.
part of the document for `doc`.
"""
"""
# TODO: implement evaluation.
ll_alternative_answers
=
[
raise
NotImplementedError
(
'Evaluation not implemented'
)
rf
.
loglikelihood
(
ctx
,
" "
+
answer
)
for
answer
in
get_answers
(
doc
,
len
(
doc
[
"questions"
]))
]
return
ll_alternative_answers
def
process_results
(
self
,
doc
,
results
):
def
process_results
(
self
,
doc
,
results
):
"""Take a single document and the LM results and evaluates, returning a
"""Take a single document and the LM results and evaluates, returning a
...
@@ -71,8 +77,12 @@ class CoQA(Dataset):
...
@@ -71,8 +77,12 @@ class CoQA(Dataset):
:param results:
:param results:
The results of the requests created in construct_requests.
The results of the requests created in construct_requests.
"""
"""
# TODO: implement evaluation.
golds
=
get_answers
(
doc
,
len
(
doc
[
"questions"
]))
raise
NotImplementedError
(
'Evaluation not implemented'
)
pred
=
np
.
argmax
(
results
)
return
{
"acc"
:
pred
in
golds
,
# "f1": (golds, pred), # TODO: Fix
}
def
aggregation
(
self
):
def
aggregation
(
self
):
"""
"""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment