Unverified Commit 689ffe5b authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Feature] Use dataset in local path (#570)

* update commonsenseqa

* update drop

* update flores_first100

* update gsm8k

* update humaneval

* update lambda

* update obqa

* update piqa

* update race

* update siqa

* update story_cloze

* update strategyqa

* update tydiqa

* update winogrande

* update doc

* update hellaswag

* fix obqa

* update collections

* update .zip name
parent d6aaac22
...@@ -27,8 +27,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -27,8 +27,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset, type=hellaswagDataset,
path='hellaswag', path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)
......
...@@ -6,9 +6,8 @@ from opencompass.datasets import hellaswagDataset ...@@ -6,9 +6,8 @@ from opencompass.datasets import hellaswagDataset
hellaswag_reader_cfg = dict( hellaswag_reader_cfg = dict(
input_columns=['ctx', 'A', 'B', 'C', 'D'], input_columns=['ctx', 'A', 'B', 'C', 'D'],
output_column='label', output_column='label'
train_split='validation', )
test_split='validation')
hellaswag_infer_cfg = dict( hellaswag_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -26,8 +25,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -26,8 +25,9 @@ hellaswag_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
hellaswag_datasets = [ hellaswag_datasets = [
dict( dict(
abbr='hellaswag',
type=hellaswagDataset, type=hellaswagDataset,
path='hellaswag', path='./data/hellaswag/hellaswag.jsonl',
reader_cfg=hellaswag_reader_cfg, reader_cfg=hellaswag_reader_cfg,
infer_cfg=hellaswag_infer_cfg, infer_cfg=hellaswag_infer_cfg,
eval_cfg=hellaswag_eval_cfg) eval_cfg=hellaswag_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
...@@ -32,8 +32,9 @@ humaneval_eval_cfg = dict( ...@@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
...@@ -27,8 +27,9 @@ humaneval_eval_cfg = dict( ...@@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
...@@ -27,8 +27,9 @@ humaneval_eval_cfg = dict( ...@@ -27,8 +27,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
...@@ -22,8 +22,9 @@ humaneval_eval_cfg = dict( ...@@ -22,8 +22,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)
......
from opencompass.openicl.icl_prompt_template import PromptTemplate from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess from opencompass.datasets import HumanevalDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict( humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test') input_columns=['prompt'], output_column='task_id', train_split='test')
...@@ -32,8 +32,9 @@ humaneval_eval_cfg = dict( ...@@ -32,8 +32,9 @@ humaneval_eval_cfg = dict(
humaneval_datasets = [ humaneval_datasets = [
dict( dict(
type=HFDataset, abbr='openai_humaneval',
path='openai_humaneval', type=HumanevalDataset,
path='./data/humaneval/human-eval-v2-20210705.jsonl',
reader_cfg=humaneval_reader_cfg, reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg, infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg) eval_cfg=humaneval_eval_cfg)
......
...@@ -26,7 +26,7 @@ lambada_datasets = [ ...@@ -26,7 +26,7 @@ lambada_datasets = [
dict( dict(
abbr='lambada', abbr='lambada',
type=lambadaDataset, type=lambadaDataset,
path='craffel/openai_lambada', path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg, reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg, infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg) eval_cfg=lambada_eval_cfg)
......
...@@ -22,7 +22,7 @@ lambada_datasets = [ ...@@ -22,7 +22,7 @@ lambada_datasets = [
dict( dict(
abbr='lambada', abbr='lambada',
type=lambadaDataset, type=lambadaDataset,
path='craffel/openai_lambada', path='./data/lambada/test.jsonl',
reader_cfg=lambada_reader_cfg, reader_cfg=lambada_reader_cfg,
infer_cfg=lambada_infer_cfg, infer_cfg=lambada_infer_cfg,
eval_cfg=lambada_eval_cfg) eval_cfg=lambada_eval_cfg)
......
...@@ -32,15 +32,12 @@ obqa_datasets = [ ...@@ -32,15 +32,12 @@ obqa_datasets = [
dict( dict(
abbr="openbookqa", abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Main/test.jsonl',
split="test",
), ),
dict( dict(
abbr="openbookqa_fact", abbr="openbookqa_fact",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Additional/test_complete.jsonl',
name="additional",
split="test",
), ),
] ]
......
...@@ -24,15 +24,12 @@ obqa_datasets = [ ...@@ -24,15 +24,12 @@ obqa_datasets = [
dict( dict(
abbr="openbookqa", abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Main/test.jsonl',
split="test",
), ),
dict( dict(
abbr="openbookqa_fact", abbr="openbookqa_fact",
type=OBQADataset, type=OBQADataset,
path="openbookqa", path='./data/openbookqa/Additional/test_complete.jsonl',
name="additional",
split="test",
), ),
] ]
for _i in range(2): for _i in range(2):
......
...@@ -33,9 +33,7 @@ obqa_datasets = [ ...@@ -33,9 +33,7 @@ obqa_datasets = [
dict( dict(
abbr='openbookqa_fact', abbr='openbookqa_fact',
type=OBQADataset_V2, type=OBQADataset_V2,
path='openbookqa', path='./data/openbookqa/Additional/test_complete.jsonl',
name='additional',
split='test',
reader_cfg=obqa_reader_cfg, reader_cfg=obqa_reader_cfg,
infer_cfg=obqa_infer_cfg, infer_cfg=obqa_infer_cfg,
eval_cfg=obqa_eval_cfg, eval_cfg=obqa_eval_cfg,
......
...@@ -37,16 +37,14 @@ _template = [ ...@@ -37,16 +37,14 @@ _template = [
obqa_datasets = [ obqa_datasets = [
dict( dict(
abbr="openbookqa",
type=OBQADataset, type=OBQADataset,
path='openbookqa', path='./data/openbookqa/Main/test.jsonl',
split='test',
), ),
dict( dict(
abbr='openbookqa_fact', abbr='openbookqa_fact',
type=OBQADataset, type=OBQADataset,
path='openbookqa', path='./data/openbookqa/Additional/test_complete.jsonl',
name='additional',
split='test',
), ),
] ]
for _i in range(2): for _i in range(2):
......
...@@ -34,7 +34,7 @@ piqa_datasets = [ ...@@ -34,7 +34,7 @@ piqa_datasets = [
dict( dict(
abbr="piqa", abbr="piqa",
type=piqaDataset_V2, type=piqaDataset_V2,
path="piqa", path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)
......
...@@ -30,7 +30,7 @@ piqa_datasets = [ ...@@ -30,7 +30,7 @@ piqa_datasets = [
dict( dict(
abbr='piqa', abbr='piqa',
type=piqaDataset_V3, type=piqaDataset_V3,
path='piqa', path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)
......
...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate ...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict( piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'], input_columns=['goal', 'sol1', 'sol2'],
...@@ -23,8 +23,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -23,8 +23,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [ piqa_datasets = [
dict( dict(
type=HFDataset, abbr='piqa',
path='piqa', type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)
......
...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate ...@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset from opencompass.datasets import piqaDataset
piqa_reader_cfg = dict( piqa_reader_cfg = dict(
input_columns=['goal', 'sol1', 'sol2'], input_columns=['goal', 'sol1', 'sol2'],
...@@ -33,8 +33,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -33,8 +33,9 @@ piqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
piqa_datasets = [ piqa_datasets = [
dict( dict(
type=HFDataset, abbr='piqa',
path='piqa', type=piqaDataset,
path='./data/piqa',
reader_cfg=piqa_reader_cfg, reader_cfg=piqa_reader_cfg,
infer_cfg=piqa_infer_cfg, infer_cfg=piqa_infer_cfg,
eval_cfg=piqa_eval_cfg) eval_cfg=piqa_eval_cfg)
......
...@@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_option_postprocess ...@@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -29,17 +32,17 @@ race_eval_cfg = dict( ...@@ -29,17 +32,17 @@ race_eval_cfg = dict(
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
......
...@@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess ...@@ -7,7 +7,10 @@ from opencompass.utils.text_postprocessors import first_capital_postprocess
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -23,17 +26,17 @@ race_eval_cfg = dict( ...@@ -23,17 +26,17 @@ race_eval_cfg = dict(
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
......
...@@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset ...@@ -6,7 +6,10 @@ from opencompass.datasets import RaceDataset
race_reader_cfg = dict( race_reader_cfg = dict(
input_columns=['article', 'question', 'A', 'B', 'C', 'D'], input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
output_column='answer') output_column='answer',
train_split="validation",
test_split="test"
)
race_infer_cfg = dict( race_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
...@@ -27,17 +30,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator)) ...@@ -27,17 +30,17 @@ race_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
race_datasets = [ race_datasets = [
dict( dict(
type=RaceDataset,
abbr='race-middle', abbr='race-middle',
path='race', type=RaceDataset,
path='./data/race',
name='middle', name='middle',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
eval_cfg=race_eval_cfg), eval_cfg=race_eval_cfg),
dict( dict(
type=RaceDataset,
abbr='race-high', abbr='race-high',
path='race', type=RaceDataset,
path='./data/race',
name='high', name='high',
reader_cfg=race_reader_cfg, reader_cfg=race_reader_cfg,
infer_cfg=race_infer_cfg, infer_cfg=race_infer_cfg,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment