Unverified Commit 689ffe5b authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Feature] Use dataset in local path (#570)

* update commonsenseqa

* update drop

* update flores_first100

* update gsm8k

* update humaneval

* update lambda

* update obqa

* update piqa

* update race

* update siqa

* update story_cloze

* update strategyqa

* update tydiqa

* update winogrande

* update doc

* update hellaswag

* fix obqa

* update collections

* update .zip name
parent d6aaac22
......@@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
......@@ -29,17 +32,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
......
......@@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer')
output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict(
prompt_template=dict(
......@@ -28,17 +31,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [
dict(
type=RaceDataset,
abbr='race-middle',
path='race',
type=RaceDataset,
path='./data/race',
name='middle',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg),
dict(
type=RaceDataset,
abbr='race-high',
path='race',
type=RaceDataset,
path='./data/race',
name='high',
reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg,
......
......@@ -34,7 +34,7 @@ siqa_datasets = [
dict(
abbr="siqa",
type=siqaDataset_V2,
path="social_i_qa",
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
......@@ -25,8 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
......@@ -25,9 +25,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
name='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
......@@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
from opencompass.datasets import siqaDataset
siqa_reader_cfg = dict(
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
......@@ -37,8 +37,8 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
type=HFDataset,
path='social_i_qa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
infer_cfg=siqa_infer_cfg,
eval_cfg=siqa_eval_cfg)
......
......@@ -37,8 +37,8 @@ storycloze_datasets = [
dict(
abbr="story_cloze",
type=storyclozeDataset_V2,
path="juletxara/xstory_cloze",
name="en",
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg,
......
......@@ -31,8 +31,8 @@ storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)
......
......@@ -28,8 +28,8 @@ storycloze_datasets = [
dict(
abbr='story_cloze',
type=storyclozeDataset,
path='juletxara/xstory_cloze',
name='en',
path='./data/xstory_cloze',
lang='en',
reader_cfg=storycloze_reader_cfg,
infer_cfg=storycloze_infer_cfg,
eval_cfg=storycloze_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict(
input_columns=['question'],
......@@ -86,8 +86,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
type=StrategyQADataset,
path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
......
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
from opencompass.datasets import StrategyQADataset, strategyqa_pred_postprocess, strategyqa_dataset_postprocess
strategyqa_reader_cfg = dict(
input_columns=['question'],
......@@ -50,8 +50,8 @@ strategyqa_eval_cfg = dict(
strategyqa_datasets = [
dict(
abbr='strategyqa',
type=HFDataset,
path='wics/strategy-qa',
type=StrategyQADataset,
path='./data/strategyqa/strategyQA_train.json',
reader_cfg=strategyqa_reader_cfg,
infer_cfg=strategyqa_infer_cfg,
eval_cfg=strategyqa_eval_cfg)
......
......@@ -6,9 +6,8 @@ from opencompass.datasets import TydiQADataset, TydiQAEvaluator
# All configs are for TydiQA Goldp task
tydiqa_reader_cfg = dict(
input_columns=["passage_text", "question_text"],
output_column="answer",
test_split='validation',
train_split='validation',)
output_column="answer"
)
langs = ['arabic', 'bengali', 'english', 'finnish', 'indonesian', 'japanese', 'korean', 'russian', 'swahili', 'telugu', 'thai']
......@@ -33,19 +32,25 @@ for _lang in langs:
prompt_template=dict(
type=PromptTemplate,
template=f"{_hint[0]}\n\n</E>{_hint[1]}{{passage_text}}\n{_hint[2]} {{question_text}}\n{_hint[3]} {{answer}}" ,
ice_token='</E>'),
ice_token='</E>'
),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer), max_out_len=50)
inferencer=dict(type=GenInferencer), max_out_len=50
)
tydiqa_eval_cfg = dict(
evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_eval_cfg = dict(evaluator=dict(type=TydiQAEvaluator),
ds_split='validation',
ds_column='answer',
)
tydiqa_datasets.append(
dict(abbr=f'tyidqa-goldp_{_lang}',
type=TydiQADataset,
path='khalidalt/tydiqa-goldp',
name=_lang,
reader_cfg=tydiqa_reader_cfg,
infer_cfg=tydiqa_infer_cfg,
eval_cfg=tydiqa_eval_cfg))
\ No newline at end of file
dict(abbr=f'tyidqa-goldp_{_lang}',
type=TydiQADataset,
path='./data/tydiqa',
lang=_lang,
reader_cfg=tydiqa_reader_cfg,
infer_cfg=tydiqa_infer_cfg,
eval_cfg=tydiqa_eval_cfg
)
)
......@@ -7,8 +7,8 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
winogrande_reader_cfg = dict(
input_columns=["opt1", "opt2"],
output_column="label",
test_split="validation")
output_column="answer",
)
winogrande_infer_cfg = dict(
prompt_template=dict(
......@@ -35,8 +35,7 @@ winogrande_datasets = [
dict(
abbr="winogrande",
type=winograndeDataset_V2,
path="winogrande",
name="winogrande_xs",
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg,
......
......@@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'],
output_column='answer',
train_split='validation',
test_split='validation')
)
winogrande_infer_cfg = dict(
prompt_template=dict(
......@@ -28,8 +27,7 @@ winogrande_datasets = [
dict(
abbr='winogrande',
type=winograndeDataset,
path='winogrande',
name='winogrande_xs',
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg)
......
......@@ -7,8 +7,7 @@ from opencompass.datasets import winograndeDataset
winogrande_reader_cfg = dict(
input_columns=['opt1', 'opt2'],
output_column='answer',
train_split='validation',
test_split='validation')
)
winogrande_infer_cfg = dict(
prompt_template=dict(
......@@ -26,8 +25,7 @@ winogrande_datasets = [
dict(
abbr='winogrande',
type=winograndeDataset,
path='winogrande',
name='winogrande_xs',
path='./data/winogrande',
reader_cfg=winogrande_reader_cfg,
infer_cfg=winogrande_infer_cfg,
eval_cfg=winogrande_eval_cfg)
......
......@@ -87,17 +87,6 @@ summarizer = dict(
'eprstmt-dev',
'lambada',
'tnews-dev',
'--------- 安全 Safety ---------', # category
# '偏见', # subcategory
'crows_pairs',
# '有毒性(判别)', # subcategory
'civil_comments',
# '有毒性(判别)多语言', # subcategory
'jigsaw_multilingual',
# '有毒性(生成)', # subcategory
'real-toxicity-prompts',
# '真实性/有用性', # subcategory
'truthful_qa',
],
summary_groups=sum(
[v for k, v in locals().items() if k.endswith("_summary_groups")], []),
......
......@@ -56,8 +56,6 @@ summarizer = dict(
'openbookqa_fact',
'nq',
'triviaqa',
'--- Security ---',
'crows_pairs',
],
summary_groups=sum([v for k, v in locals().items() if k.endswith("_summary_groups")], []),
)
......@@ -66,10 +66,21 @@ Run the following commands to download and place the datasets in the `${OpenComp
```bash
# Run in the OpenCompass directory
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
If you need to use the more comprehensive dataset (~500M) provided by OpenCompass, You can download it using the following command:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
The list of datasets included in both `.zip` can be found [here](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)
OpenCompass has supported most of the datasets commonly used for performance comparison, please refer to `configs/dataset` for the specific list of supported datasets.
For next step, please read [Quick Start](./quick_start.md).
......@@ -66,10 +66,21 @@ OpenCompass 支持的数据集主要包括两个部分:
在 OpenCompass 项目根目录下运行下面命令,将数据集准备至 `${OpenCompass}/data` 目录下:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip
unzip OpenCompassData.zip
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData-core-20231110.zip
```
如果需要使用 OpenCompass 提供的更加完整的数据集 (~500M),可以使用下述命令进行下载:
```bash
wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-complete-20231110.zip
unzip OpenCompassData-complete-20231110.zip
cd ./data
unzip *.zip
```
两个 `.zip` 中所含数据集列表如[此处](https://github.com/open-compass/opencompass/releases/tag/0.1.8.rc1)所示。
OpenCompass 已经支持了大多数常用于性能比较的数据集,具体支持的数据集列表请直接在 `configs/datasets` 下进行查找。
接下来,你可以阅读[快速上手](./quick_start.md)了解 OpenCompass 的基本用法。
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment