"...git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "beb932c5d111872c5e45387e7b1b2b3dd0524a47"
Unverified Commit bf79ff1c authored by Tong Gao's avatar Tong Gao Committed by GitHub
Browse files

[Feature] Add LEval datasets


Co-authored-by: default avatarkennymckormick <dhd@pku.edu.cn>
parent 8d9cee06
from mmengine.config import read_base
with read_base():
from .LEval_paper_assistant_gen_6c03d0 import LEval_ps_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPaperAssistantDataset
LEval_ps_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_ps_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_ps_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_ps_summ_datasets = [
dict(
type=LEvalPaperAssistantDataset,
abbr='LEval_paper_assistant',
path='L4NLP/LEval',
name='paper_assistant',
reader_cfg=LEval_ps_summ_reader_cfg,
infer_cfg=LEval_ps_summ_infer_cfg,
eval_cfg=LEval_ps_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_patent_summ_gen_db3565 import LEval_patent_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPatentSummDataset
LEval_patent_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_patent_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_patent_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_patent_summ_datasets = [
dict(
type=LEvalPatentSummDataset,
abbr='LEval_patent_summ',
path='L4NLP/LEval',
name='patent_summ',
reader_cfg=LEval_patent_summ_reader_cfg,
infer_cfg=LEval_patent_summ_infer_cfg,
eval_cfg=LEval_patent_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_quality_gen_bd35f4 import LEval_quality_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalQualityDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_quality_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_quality_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_quality_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess),
pred_role='BOT'
)
LEval_quality_datasets = [
dict(
type=LEvalQualityDataset,
abbr='LEval_quality',
path='L4NLP/LEval',
name='quality',
reader_cfg=LEval_quality_reader_cfg,
infer_cfg=LEval_quality_infer_cfg,
eval_cfg=LEval_quality_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_review_summ_gen_6c03d0 import LEval_review_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalReviewSummDataset
LEval_review_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_review_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_review_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_review_summ_datasets = [
dict(
type=LEvalReviewSummDataset,
abbr='LEval_review_summ',
path='L4NLP/LEval',
name='review_summ',
reader_cfg=LEval_review_summ_reader_cfg,
infer_cfg=LEval_review_summ_infer_cfg,
eval_cfg=LEval_review_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_scientificqa_gen_0c6e71 import LEval_scientificqa_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalScientificQADataset
LEval_scientificqa_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_scientificqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=64)
)
LEval_scientificqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_scientificqa_datasets = [
dict(
type=LEvalScientificQADataset,
abbr='LEval_scientificqa',
path='L4NLP/LEval',
name='scientific_qa',
reader_cfg=LEval_scientificqa_reader_cfg,
infer_cfg=LEval_scientificqa_infer_cfg,
eval_cfg=LEval_scientificqa_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_tpo_gen_bd35f4 import LEval_tpo_datasets
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalTPODataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_tpo_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tpo_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=10)
)
LEval_tpo_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type=first_capital_postprocess),
pred_role='BOT'
)
LEval_tpo_datasets = [
dict(
type=LEvalTPODataset,
abbr='LEval_tpo',
path='L4NLP/LEval',
name='tpo',
reader_cfg=LEval_tpo_reader_cfg,
infer_cfg=LEval_tpo_infer_cfg,
eval_cfg=LEval_tpo_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_rouge import LEval_tvshow_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalTVShowSummDataset
LEval_tvshow_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tvshow_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}'),
dict(role='BOT', prompt='TL;DR:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_tvshow_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_tvshow_summ_datasets = [
dict(
type=LEvalTVShowSummDataset,
abbr='LEval_tvshow_summ',
path='L4NLP/LEval',
name='tv_show_summ',
reader_cfg=LEval_tvshow_summ_reader_cfg,
infer_cfg=LEval_tvshow_summ_infer_cfg,
eval_cfg=LEval_tvshow_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_049a5c import LEval_tvshow_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalTVShowSummDataset
LEval_tvshow_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tvshow_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nTL;DR:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_tvshow_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_tvshow_summ_datasets = [
dict(
type=LEvalTVShowSummDataset,
abbr='LEval_tvshow_summ',
path='L4NLP/LEval',
name='tv_show_summ',
reader_cfg=LEval_tvshow_summ_reader_cfg,
infer_cfg=LEval_tvshow_summ_infer_cfg,
eval_cfg=LEval_tvshow_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_topic_retrieval_gen_af0562 import LEval_tr_datasets
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalTopicRetrievalDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi, general_postprocess
LEval_tr_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tr_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=30)
)
LEval_tr_eval_cfg = dict(
evaluator=dict(type=EMEvaluator),
pred_postprocessor=dict(type=general_postprocess),
pred_role='BOT'
)
LEval_tr_datasets = [
dict(
type=LEvalTopicRetrievalDataset,
abbr='LEval_topic_retrieval',
path='L4NLP/LEval',
name='topic_retrieval_longchat',
reader_cfg=LEval_tr_reader_cfg,
infer_cfg=LEval_tr_infer_cfg,
eval_cfg=LEval_tr_eval_cfg)
]
from mmengine.config import read_base from mmengine.config import read_base
with read_base(): with read_base():
from .agieval_gen_397d81 import agieval_datasets # noqa: F401, F403 from .agieval_gen_64afd3 import agieval_datasets # noqa: F401, F403
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment