Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
bf79ff1c
Unverified
Commit
bf79ff1c
authored
Aug 11, 2023
by
Tong Gao
Committed by
GitHub
Aug 11, 2023
Browse files
[Feature] Add LEval datasets
Co-authored-by:
kennymckormick
<
dhd@pku.edu.cn
>
parent
8d9cee06
Changes
76
Show whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
454 additions
and
0 deletions
+454
-0
opencompass/datasets/LEval_gsm100.py
opencompass/datasets/LEval_gsm100.py
+58
-0
opencompass/datasets/LEval_legal_contract_qa.py
opencompass/datasets/LEval_legal_contract_qa.py
+27
-0
opencompass/datasets/LEval_meeting_summ.py
opencompass/datasets/LEval_meeting_summ.py
+27
-0
opencompass/datasets/LEval_multidoc_qa.py
opencompass/datasets/LEval_multidoc_qa.py
+27
-0
opencompass/datasets/LEval_narrattive_qa.py
opencompass/datasets/LEval_narrattive_qa.py
+27
-0
opencompass/datasets/LEval_natural_question.py
opencompass/datasets/LEval_natural_question.py
+27
-0
opencompass/datasets/LEval_news_summ.py
opencompass/datasets/LEval_news_summ.py
+27
-0
opencompass/datasets/LEval_paper_assistant.py
opencompass/datasets/LEval_paper_assistant.py
+27
-0
opencompass/datasets/LEval_patent_summ.py
opencompass/datasets/LEval_patent_summ.py
+27
-0
opencompass/datasets/LEval_quality.py
opencompass/datasets/LEval_quality.py
+27
-0
opencompass/datasets/LEval_review_summ.py
opencompass/datasets/LEval_review_summ.py
+27
-0
opencompass/datasets/LEval_scientific_qa.py
opencompass/datasets/LEval_scientific_qa.py
+27
-0
opencompass/datasets/LEval_topic_retrieval.py
opencompass/datasets/LEval_topic_retrieval.py
+27
-0
opencompass/datasets/LEval_tpo.py
opencompass/datasets/LEval_tpo.py
+27
-0
opencompass/datasets/LEval_tvshow_summ.py
opencompass/datasets/LEval_tvshow_summ.py
+27
-0
opencompass/datasets/__init__.py
opencompass/datasets/__init__.py
+18
-0
No files found.
opencompass/datasets/LEval_gsm100.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
,
TEXT_POSTPROCESSORS
from
.base
import
BaseDataset
@
TEXT_POSTPROCESSORS
.
register_module
(
'gsm100_dataset'
)
def
gsm100_dataset_postprocess
(
text
:
str
)
->
str
:
return
text
.
replace
(
','
,
''
)
@
TEXT_POSTPROCESSORS
.
register_module
(
'gsm100'
)
def
gsm100_postprocess
(
text
:
str
)
->
str
:
# text = text.split('\n\n')[0]
segs
=
text
.
split
(
'The answer is'
)
if
len
(
segs
)
<
2
:
return
''
text
=
segs
[
1
]
text
=
text
.
split
(
' '
)
flag
=
False
ret
=
''
for
i
in
range
(
len
(
text
)):
s
=
text
[
i
]
for
i
in
range
(
len
(
s
)):
if
s
[
i
].
isdigit
():
flag
=
True
ret
=
s
break
if
flag
:
break
ret1
=
''
for
i
in
range
(
len
(
ret
)):
if
ret
[
i
].
isdigit
():
ret1
+=
ret
[
i
]
return
ret1
@
LOAD_DATASET
.
register_module
()
class
LEvalGSM100Dataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_legal_contract_qa.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalLegalContractQADataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_meeting_summ.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalMeetingSummDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_multidoc_qa.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalMultidocQADataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_narrattive_qa.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalNarrativeQADataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_natural_question.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalNaturalQuestionDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_news_summ.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalNewsSummDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_paper_assistant.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalPaperAssistantDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_patent_summ.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalPatentSummDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_quality.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalQualityDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
[
1
]
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_review_summ.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalReviewSummDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_scientific_qa.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalScientificQADataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_topic_retrieval.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalTopicRetrievalDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_tpo.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalTPODataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/LEval_tvshow_summ.py
0 → 100644
View file @
bf79ff1c
from
datasets
import
Dataset
,
load_dataset
from
opencompass.registry
import
LOAD_DATASET
from
.base
import
BaseDataset
@
LOAD_DATASET
.
register_module
()
class
LEvalTVShowSummDataset
(
BaseDataset
):
@
staticmethod
def
load
(
**
kwargs
):
dataset
=
load_dataset
(
**
kwargs
)
split
=
'test'
raw_data
=
[]
for
i
in
range
(
len
(
dataset
[
split
])):
instructions
=
dataset
[
split
][
'instructions'
][
i
]
outputs
=
dataset
[
split
][
'outputs'
][
i
]
context
=
dataset
[
split
][
'input'
][
i
]
for
question
,
answer
in
zip
(
instructions
,
outputs
):
raw_data
.
append
({
'question'
:
question
,
'context'
:
context
,
'answer'
:
answer
})
dataset
[
split
]
=
Dataset
.
from_list
(
raw_data
)
return
dataset
opencompass/datasets/__init__.py
View file @
bf79ff1c
...
@@ -34,6 +34,24 @@ from .iwslt2017 import * # noqa: F401, F403
...
@@ -34,6 +34,24 @@ from .iwslt2017 import * # noqa: F401, F403
from
.jigsawmultilingual
import
*
# noqa: F401, F403
from
.jigsawmultilingual
import
*
# noqa: F401, F403
from
.lambada
import
*
# noqa: F401, F403
from
.lambada
import
*
# noqa: F401, F403
from
.lcsts
import
*
# noqa: F401, F403
from
.lcsts
import
*
# noqa: F401, F403
from
.LEval_coursera
import
*
# noqa: F401, F403
from
.LEval_financial_qa
import
*
# noqa: F401, F403
from
.LEval_gov_report_summ
import
*
# noqa: F401, F403
from
.LEval_gsm100
import
*
# noqa: F401, F403
from
.LEval_legal_contract_qa
import
*
# noqa: F401, F403
from
.LEval_meeting_summ
import
*
# noqa: F401, F403
from
.LEval_multidoc_qa
import
*
# noqa: F401, F403
from
.LEval_narrattive_qa
import
*
# noqa: F401, F403
from
.LEval_natural_question
import
*
# noqa: F401, F403
from
.LEval_news_summ
import
*
# noqa: F401, F403
from
.LEval_paper_assistant
import
*
# noqa: F401, F403
from
.LEval_patent_summ
import
*
# noqa: F401, F403
from
.LEval_quality
import
*
# noqa: F401, F403
from
.LEval_review_summ
import
*
# noqa: F401, F403
from
.LEval_scientific_qa
import
*
# noqa: F401, F403
from
.LEval_topic_retrieval
import
*
# noqa: F401, F403
from
.LEval_tpo
import
*
# noqa: F401, F403
from
.LEval_tvshow_summ
import
*
# noqa: F401, F403
from
.math
import
*
# noqa: F401, F403
from
.math
import
*
# noqa: F401, F403
from
.mbpp
import
*
# noqa: F401, F403
from
.mbpp
import
*
# noqa: F401, F403
from
.mmlu
import
*
# noqa: F401, F403
from
.mmlu
import
*
# noqa: F401, F403
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment