Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
299e9505
"vscode:/vscode.git/clone" did not exist on "e172f095ba4af2c98d7744ce4ffcf4cd3a8e123c"
Commit
299e9505
authored
Dec 12, 2022
by
jon-tow
Browse files
Replace stale `triviaqa` dataset link
parent
62ca1840
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
46 additions
and
6 deletions
+46
-6
lm_eval/datasets/triviaqa/README.md
lm_eval/datasets/triviaqa/README.md
+40
-0
lm_eval/datasets/triviaqa/triviaqa.py
lm_eval/datasets/triviaqa/triviaqa.py
+6
-6
No files found.
lm_eval/datasets/triviaqa/README.md
0 → 100644
View file @
299e9505
---
dataset_info
:
features
:
-
name
:
question_id
dtype
:
string
-
name
:
question_source
dtype
:
string
-
name
:
question
dtype
:
string
-
name
:
answer
struct
:
-
name
:
aliases
sequence
:
string
-
name
:
value
dtype
:
string
-
name
:
search_results
sequence
:
-
name
:
description
dtype
:
string
-
name
:
filename
dtype
:
string
-
name
:
rank
dtype
:
int32
-
name
:
title
dtype
:
string
-
name
:
url
dtype
:
string
-
name
:
search_context
dtype
:
string
config_name
:
triviaqa
splits
:
-
name
:
train
num_bytes
:
1270894387
num_examples
:
87622
-
name
:
validation
num_bytes
:
163755044
num_examples
:
11313
download_size
:
632549060
dataset_size
:
1434649431
---
lm_eval/datasets/triviaqa/triviaqa.py
View file @
299e9505
...
@@ -46,13 +46,13 @@ _HOMEPAGE = "https://nlp.cs.washington.edu/triviaqa/"
...
@@ -46,13 +46,13 @@ _HOMEPAGE = "https://nlp.cs.washington.edu/triviaqa/"
_LICENSE
=
"Apache License 2.0"
_LICENSE
=
"Apache License 2.0"
_URLS
=
"http://
eaidata.bmk.sh
/data/triviaqa-unfiltered.tar.gz"
_URLS
=
"http
s
://
nlp.cs.washington.edu/triviaqa
/data/triviaqa-unfiltered.tar.gz"
class
Triviaqa
(
datasets
.
GeneratorBasedBuilder
):
class
Triviaqa
(
datasets
.
GeneratorBasedBuilder
):
"""TriviaQA is a reading comprehension dataset containing over 650K question-answer-evidence triples"""
"""TriviaQA is a reading comprehension dataset containing over 650K question-answer-evidence triples"""
VERSION
=
datasets
.
Version
(
"0.0.
1
"
)
VERSION
=
datasets
.
Version
(
"0.0.
2
"
)
BUILDER_CONFIGS
=
[
BUILDER_CONFIGS
=
[
datasets
.
BuilderConfig
(
datasets
.
BuilderConfig
(
...
@@ -100,14 +100,14 @@ class Triviaqa(datasets.GeneratorBasedBuilder):
...
@@ -100,14 +100,14 @@ class Triviaqa(datasets.GeneratorBasedBuilder):
name
=
datasets
.
Split
.
TRAIN
,
name
=
datasets
.
Split
.
TRAIN
,
# These kwargs will be passed to _generate_examples
# These kwargs will be passed to _generate_examples
gen_kwargs
=
{
gen_kwargs
=
{
"filepath"
:
os
.
path
.
join
(
data_dir
,
"unfiltered-web-train.json
l
"
),
"filepath"
:
os
.
path
.
join
(
data_dir
,
"triviaqa-unfiltered"
,
"unfiltered-web-train.json"
),
},
},
),
),
datasets
.
SplitGenerator
(
datasets
.
SplitGenerator
(
name
=
datasets
.
Split
.
VALIDATION
,
name
=
datasets
.
Split
.
VALIDATION
,
# These kwargs will be passed to _generate_examples
# These kwargs will be passed to _generate_examples
gen_kwargs
=
{
gen_kwargs
=
{
"filepath"
:
os
.
path
.
join
(
data_dir
,
"unfiltered-web-dev.json
l
"
),
"filepath"
:
os
.
path
.
join
(
data_dir
,
"triviaqa-unfiltered"
,
"unfiltered-web-dev.json"
),
},
},
),
),
]
]
...
@@ -115,8 +115,8 @@ class Triviaqa(datasets.GeneratorBasedBuilder):
...
@@ -115,8 +115,8 @@ class Triviaqa(datasets.GeneratorBasedBuilder):
# method parameters are unpacked from `gen_kwargs` as given in `_split_generators`
# method parameters are unpacked from `gen_kwargs` as given in `_split_generators`
def
_generate_examples
(
self
,
filepath
):
def
_generate_examples
(
self
,
filepath
):
with
open
(
filepath
,
encoding
=
"utf-8"
)
as
f
:
with
open
(
filepath
,
encoding
=
"utf-8"
)
as
f
:
for
key
,
row
in
enumerate
(
f
):
json_data
=
json
.
load
(
f
)[
'Data'
]
data
=
json
.
loads
(
row
)
for
key
,
data
in
enumerate
(
json_data
):
search_results
=
[]
search_results
=
[]
for
search_result
in
data
[
"SearchResults"
]:
for
search_result
in
data
[
"SearchResults"
]:
search_results
.
append
(
search_results
.
append
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment