"git@developer.sourcefind.cn:yangql/composable_kernel-1.git" did not exist on "67ad47e7c124eaa180161e35eaad8a6dbb7985e6"
Unverified Commit 689ffe5b authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Feature] Use dataset in local path (#570)

* update commonsenseqa

* update drop

* update flores_first100

* update gsm8k

* update humaneval

* update lambda

* update obqa

* update piqa

* update race

* update siqa

* update story_cloze

* update strategyqa

* update tydiqa

* update winogrande

* update doc

* update hellaswag

* fix obqa

* update collections

* update .zip name
parent d6aaac22
...@@ -83,8 +83,8 @@ git clone https://github.com/open-compass/opencompass opencompass ...@@ -83,8 +83,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass cd opencompass
pip install -e . pip install -e .
# Download dataset to data/ folder # Download dataset to data/ folder
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html). Some third-party features, like Humaneval and Llama, may require additional steps to work properly, for detailed steps please refer to the [Installation Guide](https://opencompass.readthedocs.io/en/latest/get_started/installation.html).
......
...@@ -85,8 +85,8 @@ git clone https://github.com/open-compass/opencompass opencompass ...@@ -85,8 +85,8 @@ git clone https://github.com/open-compass/opencompass opencompass
cd opencompass cd opencompass
pip install -e . pip install -e .
# 下载数据集到 data/ 处 # 下载数据集到 data/ 处
wget https://github.com/open-compass/opencompass/releases/download/0.1.1/OpenCompassData.zip wget https://github.com/open-compass/opencompass/releases/download/0.1.8.rc1/OpenCompassData-core-20231110.zip
unzip OpenCompassData.zip unzip OpenCompassData-core-20231110.zip
``` ```
有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html) 有部分第三方功能,如 Humaneval 以及 Llama,可能需要额外步骤才能正常运行,详细步骤请参考[安装指南](https://opencompass.readthedocs.io/zh_CN/latest/get_started/installation.html)
......
...@@ -52,10 +52,5 @@ with read_base(): ...@@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
...@@ -52,6 +52,5 @@ with read_base(): ...@@ -52,6 +52,5 @@ with read_base():
from ..nq.nq_gen_0356ec import nq_datasets from ..nq.nq_gen_0356ec import nq_datasets
from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
...@@ -34,6 +34,5 @@ with read_base(): ...@@ -34,6 +34,5 @@ with read_base():
from ..obqa.obqa_ppl_c7c154 import obqa_datasets from ..obqa.obqa_ppl_c7c154 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_ppl_e811e1 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
...@@ -52,10 +52,5 @@ with read_base(): ...@@ -52,10 +52,5 @@ with read_base():
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..flores.flores_gen_806ede import flores_datasets from ..flores.flores_gen_806ede import flores_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
from ..civilcomments.civilcomments_clp_a3c5fd import civilcomments_datasets
from ..jigsawmultilingual.jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets
from ..realtoxicprompts.realtoxicprompts_gen_7605e4 import realtoxicprompts_datasets
from ..truthfulqa.truthfulqa_gen_5ddc62 import truthfulqa_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
...@@ -35,6 +35,5 @@ with read_base(): ...@@ -35,6 +35,5 @@ with read_base():
from ..obqa.obqa_gen_9069e4 import obqa_datasets from ..obqa.obqa_gen_9069e4 import obqa_datasets
from ..nq.nq_gen_c788f6 import nq_datasets from ..nq.nq_gen_c788f6 import nq_datasets
from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
from ..crowspairs.crowspairs_gen_381af0 import crowspairs_datasets
datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), []) datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
...@@ -50,8 +50,9 @@ commonsenseqa_eval_cfg = dict( ...@@ -50,8 +50,9 @@ commonsenseqa_eval_cfg = dict(
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path="commonsense_qa", path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg, eval_cfg=commonsenseqa_eval_cfg,
......
...@@ -45,8 +45,9 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -45,8 +45,9 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)
......
...@@ -40,11 +40,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -40,11 +40,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=commonsenseqa_reader_cfg, reader_cfg=commonsenseqa_reader_cfg,
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)
] ]
del _ice_template
...@@ -4,6 +4,11 @@ from opencompass.openicl.icl_inferencer import PPLInferencer ...@@ -4,6 +4,11 @@ from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import commonsenseqaDataset from opencompass.datasets import commonsenseqaDataset
commonsenseqa_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation')
_ice_template = dict( _ice_template = dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
...@@ -31,15 +36,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -31,15 +36,10 @@ commonsenseqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
commonsenseqa_datasets = [ commonsenseqa_datasets = [
dict( dict(
abbr='commonsense_qa',
type=commonsenseqaDataset, type=commonsenseqaDataset,
path='commonsense_qa', path='./data/commonsenseqa',
reader_cfg=dict( reader_cfg=commonsenseqa_reader_cfg,
input_columns=['question', 'A', 'B', 'C', 'D', 'E'],
output_column='answerKey',
test_split='validation',
),
infer_cfg=commonsenseqa_infer_cfg, infer_cfg=commonsenseqa_infer_cfg,
eval_cfg=commonsenseqa_eval_cfg) eval_cfg=commonsenseqa_eval_cfg)
] ]
del _ice_template
...@@ -4,11 +4,18 @@ from opencompass.openicl.icl_inferencer import GenInferencer ...@@ -4,11 +4,18 @@ from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator from opencompass.openicl.icl_evaluator import EMEvaluator
from opencompass.datasets import dropDataset from opencompass.datasets import dropDataset
drop_reader_cfg = dict(
input_columns=['prompt', 'question'],
output_column='answers',
train_split='validation',
test_split='validation',
)
drop_infer_cfg = dict( drop_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template= template='''\
'''Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older. Text: In the county, the population was spread out with 23.50% under the age of 18, 8.70% from 18 to 24, 29.70% from 25 to 44, 24.70% from 45 to 64, and 13.30% who were 65 years of age or older.
Question: How many more percent are under the age of 18 compared to the 18 to 24 group? Question: How many more percent are under the age of 18 compared to the 18 to 24 group?
Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8. Anawer: According to the text, 23.5% are under the age of 18, and 8.7% are from ages 18 to 24. 23.5%-8.7%=14.8%. So the answer is 14.8.
...@@ -30,13 +37,8 @@ drop_datasets = [ ...@@ -30,13 +37,8 @@ drop_datasets = [
dict( dict(
abbr='drop', abbr='drop',
type=dropDataset, type=dropDataset,
path='drop', path='./data/drop/drop_dataset_dev.json',
reader_cfg=dict( reader_cfg=drop_reader_cfg,
input_columns=['prompt'],
output_column='answers',
train_split='validation',
test_split='validation',
),
infer_cfg=drop_infer_cfg, infer_cfg=drop_infer_cfg,
eval_cfg=drop_eval_cfg) eval_cfg=drop_eval_cfg)
] ]
...@@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks: ...@@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src] _, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt] _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict( flores_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
...@@ -146,16 +152,11 @@ for _flores_subtask in _flores_subtasks: ...@@ -146,16 +152,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores") flores_eval_cfg["dataset_postprocessor"] = dict(type="flores")
flores_datasets.append( flores_datasets.append(
dict( dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}", abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}", name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict( reader_cfg=flores_reader_cfg.copy(),
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(), infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(), eval_cfg=flores_eval_cfg.copy(),
)) ))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
...@@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks: ...@@ -118,6 +118,12 @@ for _flores_subtask in _flores_subtasks:
_, _flores_source, _src_inst, _ = flores_lang_map[_src] _, _flores_source, _src_inst, _ = flores_lang_map[_src]
_, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt] _, _flores_target, _tgt_inst, _ = flores_lang_map[_tgt]
flores_reader_cfg = dict(
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"
)
flores_infer_cfg = dict( flores_infer_cfg = dict(
ice_template=dict( ice_template=dict(
type=PromptTemplate, type=PromptTemplate,
...@@ -139,16 +145,11 @@ for _flores_subtask in _flores_subtasks: ...@@ -139,16 +145,11 @@ for _flores_subtask in _flores_subtasks:
flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese") flores_eval_cfg["dataset_postprocessor"] = dict(type="flores-chinese")
flores_datasets.append( flores_datasets.append(
dict( dict(
type=FloresFirst100Dataset,
abbr=f"flores_100_{_src}-{_tgt}", abbr=f"flores_100_{_src}-{_tgt}",
type=FloresFirst100Dataset,
path='./data/flores_first100',
name=f"{_flores_source}-{_flores_target}", name=f"{_flores_source}-{_flores_target}",
reader_cfg=dict( reader_cfg=flores_reader_cfg.copy(),
input_columns=f"sentence_{_flores_source}",
output_column=f"sentence_{_flores_target}",
train_split="dev",
test_split="devtest"),
infer_cfg=flores_infer_cfg.copy(), infer_cfg=flores_infer_cfg.copy(),
eval_cfg=flores_eval_cfg.copy(), eval_cfg=flores_eval_cfg.copy(),
)) ))
del _flores_lang_map, _flores_subtask, _src, _tgt, _, _flores_source, _src_inst, _flores_target, _tgt_inst
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
...@@ -32,9 +31,8 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), ...@@ -32,9 +31,8 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
...@@ -79,9 +78,8 @@ gsm8k_eval_cfg = dict( ...@@ -79,9 +78,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import AgentInferencer from opencompass.openicl.icl_inferencer import AgentInferencer
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kAgentEvaluator
# This config is for code interpreter # This config is for code interpreter
gsm8k_example = """ gsm8k_example = """
...@@ -76,9 +76,8 @@ gsm8k_eval_cfg = dict( ...@@ -76,9 +76,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)
......
...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate ...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import SCInferencer from opencompass.openicl.icl_inferencer import SCInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' ) gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer' )
generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40) generation_kwargs = dict(do_sample=True, temperature=0.7, top_k=40)
...@@ -81,9 +81,8 @@ gsm8k_eval_cfg = dict( ...@@ -81,9 +81,8 @@ gsm8k_eval_cfg = dict(
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
abbr='gsm8k', abbr='gsm8k',
type=HFDataset, type=GSM8KDataset,
path='gsm8k', path='./data/gsm8k',
name='main',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)
......
...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate ...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator from opencompass.datasets import GSM8KDataset, gsm8k_postprocess, gsm8k_dataset_postprocess, Gsm8kEvaluator
gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer') gsm8k_reader_cfg = dict(input_columns=['question'], output_column='answer')
...@@ -41,9 +41,9 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator), ...@@ -41,9 +41,9 @@ gsm8k_eval_cfg = dict(evaluator=dict(type=Gsm8kEvaluator),
gsm8k_datasets = [ gsm8k_datasets = [
dict( dict(
type=HFDataset, abbr='gsm8k',
path='gsm8k', type=GSM8KDataset,
name='main', path='./data/gsm8k',
reader_cfg=gsm8k_reader_cfg, reader_cfg=gsm8k_reader_cfg,
infer_cfg=gsm8k_infer_cfg, infer_cfg=gsm8k_infer_cfg,
eval_cfg=gsm8k_eval_cfg) eval_cfg=gsm8k_eval_cfg)
......
...@@ -8,7 +8,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -8,7 +8,7 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=["ctx", "A", "B", "C", "D"], input_columns=["ctx", "A", "B", "C", "D"],
output_column="label", output_column="label",
test_split="validation") )
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -35,8 +35,9 @@ hellaswag_eval_cfg = dict( ...@@ -35,8 +35,9 @@ hellaswag_eval_cfg = dict(
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset_V2, type=hellaswagDataset_V2,
path="hellaswag", path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment