Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5d601e14
Commit
5d601e14
authored
Oct 24, 2020
by
Anish Thite
Browse files
update coqa to be consistent with gpt3 paper
parent
01608cf4
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
27 deletions
+12
-27
lm_eval/tasks/coqa.py
lm_eval/tasks/coqa.py
+12
-27
No files found.
lm_eval/tasks/coqa.py
View file @
5d601e14
...
@@ -5,8 +5,10 @@ from ..utils import sh
...
@@ -5,8 +5,10 @@ from ..utils import sh
class
CoQA
(
Dataset
):
class
CoQA
(
Dataset
):
def
__init__
(
self
):
self
.
download
()
def
download
(
self
):
def
download
(
self
):
#TODO: don't download if files already there
sh
(
"""
sh
(
"""
mkdir -p data/coqa
mkdir -p data/coqa
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O data/coqa/coqa-train-v1.0.json
wget http://downloads.cs.stanford.edu/nlp/data/coqa/coqa-train-v1.0.json -O data/coqa/coqa-train-v1.0.json
...
@@ -23,42 +25,25 @@ class CoQA(Dataset):
...
@@ -23,42 +25,25 @@ class CoQA(Dataset):
return
False
return
False
def
training_docs
(
self
):
def
training_docs
(
self
):
myjson
=
json
.
load
(
open
(
'data/coqa/coqa-train-v1.0.json'
))[
'data'
]
return
json
.
load
(
open
(
'data/coqa/coqa-train-v1.0.json'
))[
'data'
]
return
self
.
load_doc
(
myjson
)
def
validation_docs
(
self
):
def
validation_docs
(
self
):
pass
pass
def
test_docs
(
self
):
def
test_docs
(
self
):
myjson
=
json
.
load
(
open
(
'data/coqa/coqa-dev-v1.0.json'
))[
'data'
]
return
json
.
load
(
open
(
'data/coqa/coqa-dev-v1.0.json'
))[
'data'
]
return
self
.
load_doc
(
myjson
)
def
fewshot_examples
(
self
,
k
):
traindocs
=
list
(
self
.
training_docs
())
random
.
seed
(
123
)
random
.
shuffle
(
traindocs
)
return
traindocs
[:
k
]
def
fewshot_description
(
self
):
def
fewshot_description
(
self
):
pass
pass
def
load_doc
(
self
,
myjson
):
docs
=
[]
for
item
in
myjson
:
new_instance
=
[
item
[
'story'
]]
qa_pairs
=
zip
(
item
[
'questions'
],
item
[
'answers'
])
for
pair
in
qa_pairs
:
new_instance
.
append
(
'
\n
'
)
new_instance
.
append
(
''
.
join
([
'Q: '
,
pair
[
0
][
'input_text'
]]))
new_instance
.
append
(
''
.
join
([
'A: '
,
pair
[
1
][
'input_text'
]]))
docs
.
append
(
new_instance
)
return
docs
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
def
doc_to_text
(
self
,
doc
,
include_target
=
True
):
text
=
'
\n
<|endoftext|>
\n
'
.
join
([
'
\n
'
.
join
(
instance
)
for
instance
in
doc
])
text
=
[
doc
[
'story'
]]
text
=
text
+
'
\n
<|endoftext|>'
for
pair
in
zip
(
doc
[
'questions'
],
doc
[
'answers'
]):
return
text
text
.
append
(
'
\n\n
'
)
text
.
append
(
''
.
join
([
'Q: '
,
pair
[
0
][
'input_text'
],
'
\n\n
'
]))
text
.
append
(
''
.
join
([
'A: '
,
pair
[
1
][
'input_text'
]]))
return
''
.
join
(
text
)
def
evaluate
(
self
,
docs
,
lm
):
def
evaluate
(
self
,
docs
,
lm
):
pass
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment