Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
ec4d3615
Commit
ec4d3615
authored
Oct 04, 2020
by
Anish Thite
Browse files
add drop data + doc2text
parent
5888a695
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
1 deletion
+84
-1
download_all.sh
download_all.sh
+9
-1
lm_eval/tasks/drop.py
lm_eval/tasks/drop.py
+75
-0
No files found.
download_all.sh
View file @
ec4d3615
...
@@ -3,4 +3,12 @@
...
@@ -3,4 +3,12 @@
#coqa
#coqa
mkdir
-p
data/coqa
mkdir
-p
data/coqa
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json
-O
data/coqa/coqa-train-v1.0.json
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json
-O
data/coqa/coqa-train-v1.0.json
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json
-O
data/coqa/coqa-dev-v1.0.json
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-dev-v1.0.json
-O
data/coqa/coqa-dev-v1.0.json
\ No newline at end of file
#drop
mkdir
-p
data/drop
wget https://s3-us-west-2.amazonaws.com/allennlp/datasets/drop/drop_dataset.zip
-O
data/drop.zip
unzip data/drop.zip
-d
data/drop
rm
data/drop.zip
mv
data/drop/drop_dataset/
*
data/drop
rm
-rf
data/drop/drop_dataset
lm_eval/tasks/drop.py
0 → 100644
View file @
ec4d3615
import
numpy
as
np
import
json
from
scipy.stats
import
pearsonr
,
spearmanr
from
sklearn.metrics
import
f1_score
,
matthews_corrcoef
from
tqdm
import
auto
as
tqdm_lib
from
.
common
import
NLP_TASK
,
simple_accuracy_metric
,
yesno
from
pathlib
import
Path
from
..base
import
Dataset
class
DROP
(
Dataset
):
DATAFOLDER
=
Path
(
__file__
).
parent
/
"../../data/drop"
def
has_training_docs
(
self
):
"""Whether the task has a training set"""
return
True
def
has_validation_docs
(
self
):
"""Whether the task has a validation set"""
return
True
def
has_test_docs
(
self
):
"""Whether the task has a test set"""
return
False
def
training_docs
(
self
):
docs
=
json
.
load
(
open
(
self
.
DATAFOLDER
/
'drop_dataset_train.json'
))
return
[
docs
[
k
]
for
k
in
docs
.
keys
()]
def
validation_docs
(
self
):
docs
=
json
.
load
(
open
(
self
.
DATAFOLDER
/
'drop_dataset_dev.json'
))
return
[
docs
[
k
]
for
k
in
docs
.
keys
()]
def
test_docs
(
self
):
pass
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
doctext
=
"Passage: {}
\n\n
"
.
format
(
doc
[
"passage"
])
qa_texts
=
[]
for
pair
in
doc
[
"qa_pairs"
]:
text
=
''
.
join
([
'Q: '
,
pair
[
'question'
],
'
\n
A: '
])
if
include_target
:
def
get_answer
(
ans_dict
):
if
ans_dict
[
'number'
]
!=
''
:
return
ans_dict
[
'number'
]
if
ans_dict
[
'spans'
]
!=
[]:
if
len
(
ans_dict
[
'spans'
])
>
0
:
return
', '
.
join
(
ans_dict
[
'spans'
])
return
ans_dict
[
'spans'
][
0
]
return
' '
.
join
([
ans_dict
[
'date'
][
'day'
],
ans_dict
[
'date'
][
'month'
],
ans_dict
[
'date'
][
'year'
]]).
strip
()
text
=
''
.
join
([
text
,
get_answer
(
pair
[
'answer'
])])
qa_texts
.
append
(
text
)
return
''
.
join
([
doctext
,
'
\n\n
'
.
join
(
qa_texts
)])
def
evaluate
(
self
,
docs
,
lm
,
provide_description
,
num_fewshot
):
"""Take iterable of docs and evaluates, returning a dict with the following format:
{
"major": float,
"minor": dict,
"higher_is_better": bool,
}
* `major` should be a single, representative number, for programmatic comparison
* `minor` should be a dictionary containing all relevant sub-metrics
* `higher_is_better` determines whether a higher metric is better
"""
pass
def
fewshot_description
(
self
):
return
"Read the passage and answer the questions "
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment