Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
e971baac
Commit
e971baac
authored
Apr 09, 2021
by
Leo Gao
Browse files
Fix triviaqa memory consumption problem
(or rather, move it to the data download phase)
parent
2b8956b8
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
11 additions
and
5 deletions
+11
-5
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+1
-1
lm_eval/tasks/triviaqa.py
lm_eval/tasks/triviaqa.py
+10
-4
No files found.
lm_eval/tasks/__init__.py
View file @
e971baac
...
...
@@ -101,7 +101,7 @@ TASK_REGISTRY = {
"qa4mre_2012"
:
qa4mre
.
QA4MRE_2012
,
"qa4mre_2013"
:
qa4mre
.
QA4MRE_2013
,
#
"triviaqa": triviaqa.TriviaQA,
"triviaqa"
:
triviaqa
.
TriviaQA
,
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
# "quac": quac.QuAC, # not implemented yet
...
...
lm_eval/tasks/triviaqa.py
View file @
e971baac
...
...
@@ -14,6 +14,12 @@ class TriviaQA(Task):
tar -xf data/triviaqa/trivia_qa-unfiltered.tar.gz
mv triviaqa-unfiltered/ data/triviaqa/
"""
)
# convert to streamable jsonl
for
subset
in
[
'train'
,
'dev'
]:
with
open
(
f
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-
{
subset
}
.jsonl'
,
'w'
)
as
fh
:
for
d
in
json
.
load
(
open
(
f
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-
{
subset
}
.json'
))[
'Data'
]:
fh
.
write
(
json
.
dumps
(
d
)
+
"
\n
"
)
def
has_training_docs
(
self
):
return
True
...
...
@@ -25,20 +31,20 @@ class TriviaQA(Task):
return
False
def
training_docs
(
self
):
return
json
.
load
(
open
(
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-train.json'
))
[
'Data'
]
return
map
(
json
.
load
s
,
open
(
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-train.json
l
'
))
def
validation_docs
(
self
):
return
json
.
load
(
open
(
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-
dev
.json'
))
[
'Data'
]
return
map
(
json
.
load
s
,
open
(
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-
val
.json
l
'
))
def
test_docs
(
self
):
r
eturn
json
.
load
(
open
(
'data/triviaqa/triviaqa-unfiltered/unfiltered-web-test.json'
))[
'Data'
]
r
aise
NotImplementedError
()
def
fewshot_description
(
self
):
# TODO: figure out fewshot description
return
""
def
doc_to_text
(
self
,
doc
):
return
''
.
join
([
'Q:'
,
doc
[
'Question'
],
'
\n\n
'
,
'A:'
])
return
f
"Question:
{
doc
[
'Question'
]
}
\n
Answer:"
def
doc_to_target
(
self
,
doc
):
return
" "
+
doc
[
'Answer'
][
'Value'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment