Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
a187cd44
Commit
a187cd44
authored
Mar 08, 2021
by
Jonathan Tow
Browse files
Implement `LogiQA` data download and evaluation
parent
36485d7a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
0 deletions
+72
-0
README.md
README.md
+1
-0
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+2
-0
lm_eval/tasks/logiqa.py
lm_eval/tasks/logiqa.py
+69
-0
No files found.
README.md
View file @
a187cd44
...
@@ -41,6 +41,7 @@ The goal of this project is to build a set of tools for evaluating LMs on typica
...
@@ -41,6 +41,7 @@ The goal of this project is to build a set of tools for evaluating LMs on typica
|qa4mre_2013 | | |✓ |acc |
|qa4mre_2013 | | |✓ |acc |
|arc_easy |✓ |✓ |✓ |acc |
|arc_easy |✓ |✓ |✓ |acc |
|arc_challenge |✓ |✓ |✓ |acc |
|arc_challenge |✓ |✓ |✓ |acc |
|logiqa |✓ |✓ |✓ |acc |
|hellaswag |✓ |✓ | |acc |
|hellaswag |✓ |✓ | |acc |
|openbookqa |✓ |✓ |✓ |acc |
|openbookqa |✓ |✓ |✓ |acc |
|race |✓ |✓ |✓ |acc |
|race |✓ |✓ |✓ |acc |
...
...
lm_eval/tasks/__init__.py
View file @
a187cd44
...
@@ -32,6 +32,7 @@ from . import mathqa
...
@@ -32,6 +32,7 @@ from . import mathqa
from
.
import
ethics
from
.
import
ethics
from
.
import
drop
from
.
import
drop
from
.
import
unscramble
from
.
import
unscramble
from
.
import
logiqa
########################################
########################################
# Translation tasks
# Translation tasks
...
@@ -102,6 +103,7 @@ TASK_REGISTRY = {
...
@@ -102,6 +103,7 @@ TASK_REGISTRY = {
"arc_easy"
:
arc
.
ARCEasy
,
"arc_easy"
:
arc
.
ARCEasy
,
"arc_challenge"
:
arc
.
ARCChallenge
,
"arc_challenge"
:
arc
.
ARCChallenge
,
# "quac": quac.QuAC, # not implemented yet
# "quac": quac.QuAC, # not implemented yet
"logiqa"
:
logiqa
.
LogiQA
,
"hellaswag"
:
hellaswag
.
HellaSwag
,
# not implemented yet
"hellaswag"
:
hellaswag
.
HellaSwag
,
# not implemented yet
"openbookqa"
:
openbookqa
.
OpenBookQA
,
"openbookqa"
:
openbookqa
.
OpenBookQA
,
# "sat": sat.SATAnalogies, # not implemented yet
# "sat": sat.SATAnalogies, # not implemented yet
...
...
lm_eval/tasks/logiqa.py
0 → 100644
View file @
a187cd44
from
lm_eval.base
import
MultipleChoiceTask
from
best_download
import
download_file
from
pathlib
import
Path
class
LogiQA
(
MultipleChoiceTask
):
DATASET_PATH
=
Path
(
"data/logiqa"
)
def
download
(
self
):
if
self
.
DATASET_PATH
.
exists
():
return
Path
.
mkdir
(
self
.
DATASET_PATH
)
base_url
=
"https://raw.githubusercontent.com/lgw863/LogiQA-dataset/master"
splits
=
[
{
"name"
:
"Train"
,
"checksum"
:
"7d5bb1f58278e33b395744cd2ad8d7600faa0b3c4d615c659a44ec1181d759fa"
},
{
"name"
:
"Eval"
,
"checksum"
:
"4c49e6753b7262c001506b9151135abf722247035ab075dad93acdea5789c01f"
},
{
"name"
:
"Test"
,
"checksum"
:
"359acb78c37802208f7fde9e2f6574b8526527c63d6a336f90a53f1932cb4701"
}
]
for
split
in
splits
:
file
=
self
.
DATASET_PATH
/
f
"
{
split
[
'name'
]
}
.txt"
download_file
(
f
"
{
base_url
}
/
{
split
[
'name'
]
}
.txt"
,
str
(
file
),
split
[
"checksum"
])
def
has_training_docs
(
self
):
return
True
def
has_validation_docs
(
self
):
return
True
def
has_test_docs
(
self
):
return
True
def
_convert_standard
(
self
,
doc
):
return
{
"query"
:
"Passage: "
+
doc
[
"passage"
]
+
"
\n
Question: "
+
doc
[
"question"
]
+
"
\n
Answer:"
,
"choices"
:
doc
[
"options"
],
"gold"
:
[
"a"
,
"b"
,
"c"
,
"d"
].
index
(
doc
[
"answerKey"
])
}
def
_load_docs
(
self
,
filename
):
def
normalize
(
text
):
return
text
.
replace
(
"."
,
". "
).
strip
()
with
open
(
filename
,
'r'
)
as
f
:
docs
=
f
.
read
().
strip
().
split
(
"
\n\n
"
)
for
rawdoc
in
docs
:
rawdoc
=
rawdoc
.
split
(
"
\n
"
)
doc
=
{
"answerKey"
:
rawdoc
[
0
].
strip
(),
"passage"
:
normalize
(
rawdoc
[
1
]),
"question"
:
normalize
(
rawdoc
[
2
]),
"options"
:
[
normalize
(
option
[
2
:])
for
option
in
rawdoc
[
3
:]]
}
yield
self
.
_convert_standard
(
doc
)
def
training_docs
(
self
):
return
self
.
_load_docs
(
self
.
DATASET_PATH
/
"Train.txt"
)
def
validation_docs
(
self
):
return
self
.
_load_docs
(
self
.
DATASET_PATH
/
"Eval.txt"
)
def
test_docs
(
self
):
return
self
.
_load_docs
(
self
.
DATASET_PATH
/
"Test.txt"
)
def
fewshot_description
(
self
):
# TODO: figure out actual description
return
""
def
doc_to_text
(
self
,
doc
):
return
doc
[
"query"
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment