Unverified Commit bf79ff1c authored by Tong Gao's avatar Tong Gao Committed by GitHub
Browse files

[Feature] Add LEval datasets


Co-authored-by: default avatarkennymckormick <dhd@pku.edu.cn>
parent 8d9cee06
from mmengine.config import read_base
with read_base():
from .LEval_coursera_gen_5c84a9 import LEval_coursera_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalCourseraDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_coursera_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_coursera_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_coursera_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess_multi),
pred_role='BOT'
)
LEval_coursera_datasets = [
dict(
type=LEvalCourseraDataset,
abbr='LEval_coursera',
path='L4NLP/LEval',
name='coursera',
reader_cfg=LEval_coursera_reader_cfg,
infer_cfg=LEval_coursera_infer_cfg,
eval_cfg=LEval_coursera_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_financialqa_gen_9f5404 import LEval_financialqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalFinancialQADataset
LEval_financialqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_financialqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_financialqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_financialqa_datasets = [
dict(
type=LEvalFinancialQADataset,
abbr='LEval_financialqa',
path='L4NLP/LEval',
name='financial_qa',
reader_cfg=LEval_financialqa_reader_cfg,
infer_cfg=LEval_financialqa_infer_cfg,
eval_cfg=LEval_financialqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_gsm100_gen_a4d1f8 import LEval_gsm100_datasets
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalGSM100Dataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
from opencompass.registry import TEXT_POSTPROCESSORS
from opencompass.datasets import gsm100_dataset_postprocess, gsm100_postprocess
LEval_gsm100_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_gsm100_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{question}\n'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_gsm100_eval_cfg = dict(evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=gsm100_postprocess),
dataset_postprocessor=dict(type=gsm100_dataset_postprocess)
)
LEval_gsm100_datasets = [
dict(
type=LEvalGSM100Dataset,
abbr='LEval_gsm100',
path='L4NLP/LEval',
name='gsm100',
reader_cfg=LEval_gsm100_reader_cfg,
infer_cfg=LEval_gsm100_infer_cfg,
eval_cfg=LEval_gsm100_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_gov_report_summ_gen_c68a56 import LEval_govreport_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalGovReportSummDataset
LEval_govreport_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_govreport_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='Government report: {context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_govreport_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_govreport_summ_datasets = [
dict(
type=LEvalGovReportSummDataset,
abbr='LEval_gov_report_summ',
path='L4NLP/LEval',
name='gov_report_summ',
reader_cfg=LEval_govreport_summ_reader_cfg,
infer_cfg=LEval_govreport_summ_infer_cfg,
eval_cfg=LEval_govreport_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_legalcontractqa_gen_f0bb20 import LEval_legalqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalLegalContractQADataset
LEval_legalqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_legalqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=128)
)
LEval_legalqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_legalqa_datasets = [
dict(
type=LEvalLegalContractQADataset,
abbr='LEval_legal_contract_qa',
path='L4NLP/LEval',
name='legal_contract_qa',
reader_cfg=LEval_legalqa_reader_cfg,
infer_cfg=LEval_legalqa_infer_cfg,
eval_cfg=LEval_legalqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_meetingsumm_gen_6c03d0 import LEval_meetingsumm_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalMeetingSummDataset
LEval_meetingsumm_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_meetingsumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_meetingsumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_meetingsumm_datasets = [
dict(
type=LEvalMeetingSummDataset,
abbr='LEval_meeting_summ',
path='L4NLP/LEval',
name='meeting_summ',
reader_cfg=LEval_meetingsumm_reader_cfg,
infer_cfg=LEval_meetingsumm_infer_cfg,
eval_cfg=LEval_meetingsumm_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_multidocqa_gen_87dc85 import LEval_multidocqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalMultidocQADataset
LEval_multidocqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_multidocqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)
LEval_multidocqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_multidocqa_datasets = [
dict(
type=LEvalMultidocQADataset,
abbr='LEval_multidocqa',
path='L4NLP/LEval',
name='multidoc_qa',
reader_cfg=LEval_multidocqa_reader_cfg,
infer_cfg=LEval_multidocqa_infer_cfg,
eval_cfg=LEval_multidocqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_narrativeqa_gen_9fec98 import LEval_narrativeqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalNarrativeQADataset
LEval_narrativeqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_narrativeqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50)
)
LEval_narrativeqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_narrativeqa_datasets = [
dict(
type=LEvalNarrativeQADataset,
abbr='LEval_narrativeqa',
path='L4NLP/LEval',
name='narrative_qa',
reader_cfg=LEval_narrativeqa_reader_cfg,
infer_cfg=LEval_narrativeqa_infer_cfg,
eval_cfg=LEval_narrativeqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_naturalquestion_gen_9fec98 import LEval_nq_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNaturalQuestionDataset
LEval_nq_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50)
)
LEval_nq_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_nq_datasets = [
dict(
type=LEvalNaturalQuestionDataset,
abbr='LEval_nq',
path='L4NLP/LEval',
name='natural_question',
reader_cfg=LEval_nq_reader_cfg,
infer_cfg=LEval_nq_infer_cfg,
eval_cfg=LEval_nq_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_newssumm_gen_db3565 import LEval_newssumm_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNewsSummDataset
LEval_newssumm_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_newssumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_newssumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_newssumm_datasets = [
dict(
type=LEvalNewsSummDataset,
abbr='LEval_news_summ',
path='L4NLP/LEval',
name='news_summ',
reader_cfg=LEval_newssumm_reader_cfg,
infer_cfg=LEval_newssumm_infer_cfg,
eval_cfg=LEval_newssumm_eval_cfg)
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment