Unverified Commit 3f37c40a authored by philipwangOvO's avatar philipwangOvO Committed by GitHub
Browse files

[Dataset] Refactor LEval

parent 60c2d3d7
......@@ -22,6 +22,10 @@ repos:
rev: v2.2.1
hooks:
- id: codespell
exclude: >
(?x)^(
.*\.jsonl
)$
- repo: https://gitee.com/openmmlab/mirrors-pre-commit-hooks
rev: v4.3.0
hooks:
......
......@@ -22,6 +22,10 @@ repos:
rev: v2.2.1
hooks:
- id: codespell
exclude: >
(?x)^(
.*\.jsonl
)$
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
......
from mmengine.config import read_base
with read_base():
from .LEval_paper_assistant_gen_6c03d0 import LEval_ps_summ_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .LEval_patent_summ_gen_db3565 import LEval_patent_summ_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .LEval_review_summ_gen_6c03d0 import LEval_review_summ_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_rouge import LEval_tvshow_summ_datasets # noqa: F401, F403
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalTVShowSummDataset
LEval_tvshow_summ_reader_cfg = dict(
input_columns=['context', 'question'],
output_column='answer',
train_split='test',
test_split='test'
)
LEval_tvshow_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}'),
dict(role='BOT', prompt='TL;DR:'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
)
LEval_tvshow_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
pred_role='BOT'
)
LEval_tvshow_summ_datasets = [
dict(
type=LEvalTVShowSummDataset,
abbr='LEval_tvshow_summ',
path='L4NLP/LEval',
name='tv_show_summ',
reader_cfg=LEval_tvshow_summ_reader_cfg,
infer_cfg=LEval_tvshow_summ_infer_cfg,
eval_cfg=LEval_tvshow_summ_eval_cfg)
]
from mmengine.config import read_base
with read_base():
from .LEval_tvshow_summ_gen_049a5c import LEval_tvshow_summ_datasets # noqa: F401, F403
from mmengine.config import read_base
with read_base():
from .levalnaturalquestion.leval_naturalquestion_gen import LEval_nq_datasets
from .levalnarrativeqa.leval_narrativeqa_gen import LEval_narrativeqa_datasets
from .levalmultidocqa.leval_multidocqa_gen import LEval_multidocqa_datasets
from .levalcoursera.leval_coursera_gen import LEval_coursera_datasets
from .levaltpo.leval_tpo_gen import LEval_tpo_datasets
from .levalquality.leval_quality_gen import LEval_quality_datasets
from .levalgsm100.leval_gsm100_gen import LEval_gsm100_datasets
from .levaltopicretrieval.leval_topic_retrieval_gen import LEval_tr_datasets
from .levalfinancialqa.leval_financialqa_gen import LEval_financialqa_datasets
from .levalgovreportsumm.leval_gov_report_summ_gen import LEval_govreport_summ_datasets
from .levallegalcontractqa.leval_legalcontractqa_gen import LEval_legalqa_datasets
from .levalmeetingsumm.leval_meetingsumm_gen import LEval_meetingsumm_datasets
from .levalnewssumm.leval_newssumm_gen import LEval_newssumm_datasets
from .levalpaperassistant.leval_paper_assistant_gen import LEval_ps_summ_datasets
from .levalpatentsumm.leval_patent_summ_gen import LEval_patent_summ_datasets
from .levaltvshowsumm.leval_tvshow_summ_gen import LEval_tvshow_summ_datasets
from .levalscientificqa.leval_scientificqa_gen import LEval_scientificqa_datasets
from .levalreviewsumm.leval_review_summ_gen import LEval_review_summ_datasets
leval_datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
\ No newline at end of file
from mmengine.config import read_base
with read_base():
from .LEval_coursera_gen_5c84a9 import LEval_coursera_datasets # noqa: F401, F403
from .leval_coursera_gen_36a006 import LEval_coursera_datasets # noqa: F401, F403
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalCourseraDataset
from opencompass.datasets.leval import LEvalCourseraDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_coursera_reader_cfg = dict(
......@@ -16,8 +16,11 @@ LEval_coursera_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction based on this document. For multi-choice questions, there could be a single correct option or multiple correct options. Please only provide the letter corresponding to the answer (like A or AB) when answering.'),
],
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows.\n{context}\nQuestion:{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......
from mmengine.config import read_base
with read_base():
from .LEval_meetingsumm_gen_6c03d0 import LEval_meetingsumm_datasets # noqa: F401, F403
from .leval_financialqa_gen_b03798 import LEval_financialqa_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalFinancialQADataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalFinancialQADataset
LEval_financialqa_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_financialqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_financialqa_infer_cfg = dict(
)
LEval_financialqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_gov_report_summ_gen_c68a56 import LEval_govreport_summ_datasets # noqa: F401, F403
from .leval_gov_report_summ_gen_b03798 import LEval_govreport_summ_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalGovReportSummDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalGovReportSummDataset
LEval_govreport_summ_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_govreport_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='Government report: {context}\n{question}\nTL;DR:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_govreport_summ_infer_cfg = dict(
)
LEval_govreport_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_gsm100_gen_a4d1f8 import LEval_gsm100_datasets
from .leval_gsm100_gen_77dd94 import LEval_gsm100_datasets
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalGSM100Dataset
from opencompass.datasets.leval import LEvalGSM100Dataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
from opencompass.registry import TEXT_POSTPROCESSORS
from opencompass.datasets import gsm100_dataset_postprocess, gsm100_postprocess
......@@ -18,8 +18,11 @@ LEval_gsm100_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Given several question answer pairs, you need to follow a similar format to answer the last question. Make sure the response is end with The answer is _ . '),
],
round=[
dict(role='HUMAN', prompt='{question}\n'),
dict(role='HUMAN', prompt='{context}\n\n{question}\n'),
], )),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512)
......
from mmengine.config import read_base
with read_base():
from .LEval_narrativeqa_gen_9fec98 import LEval_narrativeqa_datasets # noqa: F401, F403
from .leval_legalcontractqa_gen_68a2ac import LEval_legalqa_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalLegalContractQADataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalLegalContractQADataset
LEval_legalqa_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_legalqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_legalqa_infer_cfg = dict(
)
LEval_legalqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_financialqa_gen_9f5404 import LEval_financialqa_datasets # noqa: F401, F403
from .leval_meetingsumm_gen_b03798 import LEval_meetingsumm_datasets # noqa: F401, F403
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment