Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
f6afabd5
Commit
f6afabd5
authored
Jul 16, 2023
by
guijinSON
Browse files
add csatqa
parent
11e650db
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
49 additions
and
72 deletions
+49
-72
.DS_Store
.DS_Store
+0
-0
lm_eval/tasks/.DS_Store
lm_eval/tasks/.DS_Store
+0
-0
lm_eval/tasks/csatqa.py
lm_eval/tasks/csatqa.py
+49
-72
No files found.
.DS_Store
View file @
f6afabd5
No preview for this file type
lm_eval/tasks/.DS_Store
0 → 100644
View file @
f6afabd5
File added
lm_eval/tasks/csatqa.py
View file @
f6afabd5
from
lm_eval.base
import
MultipleChoiceTask
import
os
import
datasets
class
CSATQA
(
MultipleChoiceTask
):
import
json
VERSION
=
0
DATASET_PATH
=
"EleutherAI/csatqa"
_CITATION
=
"""
\
def
has_training_docs
(
self
):
"""
return
False
_DESCRIPTION
=
"""
\
def
has_validation_docs
(
self
):
CSAT-QA
return
False
"""
def
has_test_docs
(
self
):
_HOMEPAGE
=
"https://huggingface.co/HAERAE-HUB"
return
True
_LICENSE
=
"Proprietary"
def
test_docs
(
self
):
return
map
(
self
.
_process_doc
,
self
.
dataset
[
"test"
])
split_names
=
[
"WR"
,
"GR"
,
"RCS"
,
"RCSS"
,
"RCH"
,
"LI"
]
def
_process_doc
(
self
,
doc
):
class
CSATQAConfig
(
datasets
.
BuilderConfig
):
choices
=
[
doc
[
"option#1"
],
doc
[
"option#2"
],
doc
[
"option#3"
],
doc
[
"option#4"
],
doc
[
"option#5"
]]
def
__init__
(
self
,
**
kwargs
):
out_doc
=
{
super
().
__init__
(
version
=
datasets
.
Version
(
"1.0.0"
),
**
kwargs
)
"question"
:
doc
[
"question"
],
"choices"
:
choices
,
"gold"
:
int
(
doc
[
'gold'
]),
class
CSATQA
(
datasets
.
GeneratorBasedBuilder
):
}
BUILDER_CONFIGS
=
[
return
out_doc
CSATQAConfig
(
name
=
name
,
def
doc_to_text
(
self
,
doc
):
)
return
doc
[
"question"
]
for
name
in
split_names
]
class
WR
(
CSATQA
):
def
_info
(
self
):
DATASET_NAME
=
"WR"
features
=
datasets
.
Features
(
{
class
GR
(
CSATQA
):
"question"
:
datasets
.
Value
(
"string"
),
DATASET_NAME
=
"GR"
"option#1"
:
datasets
.
Value
(
"string"
),
"option#2"
:
datasets
.
Value
(
"string"
),
class
RCS
(
CSATQA
):
"option#3"
:
datasets
.
Value
(
"string"
),
DATASET_NAME
=
"RCS"
"option#4"
:
datasets
.
Value
(
"string"
),
"option#5"
:
datasets
.
Value
(
"string"
),
class
RCSS
(
CSATQA
):
"gold"
:
datasets
.
Value
(
"int8"
),
DATASET_NAME
=
"RCSS"
}
)
class
RCH
(
CSATQA
):
return
datasets
.
DatasetInfo
(
DATASET_NAME
=
"RCH"
description
=
_DESCRIPTION
,
features
=
features
,
class
LI
(
CSATQA
):
homepage
=
_HOMEPAGE
,
DATASET_NAME
=
"LI"
license
=
_LICENSE
,
citation
=
_CITATION
,
)
def
_split_generators
(
self
,
dl_manager
):
data_dir
=
"HAERAE-HUB/CSAT-QA"
return
[
datasets
.
SplitGenerator
(
name
=
datasets
.
Split
.
TEST
,
gen_kwargs
=
{
"filepath"
:
os
.
path
.
join
(
data_dir
,
"data"
,
"data.jsonl"
),
},
),
]
def
_generate_examples
(
self
,
filepath
):
with
open
(
filepath
,
encoding
=
"utf-8"
)
as
f
:
for
key
,
row
in
enumerate
(
f
):
data
=
json
.
loads
(
row
)
if
data
[
"split"
]
==
self
.
config
.
name
:
data
[
"gold"
]
=
int
(
data
[
"gold"
])
-
1
data
.
pop
(
"split"
)
yield
key
,
data
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment