Unverified Commit 3f37c40a authored by philipwangOvO's avatar philipwangOvO Committed by GitHub
Browse files

[Dataset] Refactor LEval

parent 60c2d3d7
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalMeetingSummDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalMeetingSummDataset
LEval_meetingsumm_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_meetingsumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_meetingsumm_infer_cfg = dict(
)
LEval_meetingsumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_multidocqa_gen_87dc85 import LEval_multidocqa_datasets # noqa: F401, F403
from .leval_multidocqa_gen_96bf3f import LEval_multidocqa_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalMultidocQADataset
from opencompass.datasets.leval import LEvalMultidocQADataset
LEval_multidocqa_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_multidocqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......
from mmengine.config import read_base
with read_base():
from .LEval_legalcontractqa_gen_f0bb20 import LEval_legalqa_datasets # noqa: F401, F403
from .leval_narrativeqa_gen_766dd0 import LEval_narrativeqa_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalNarrativeQADataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalNarrativeQADataset
LEval_narrativeqa_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_narrativeqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_narrativeqa_infer_cfg = dict(
)
LEval_narrativeqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator,),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_naturalquestion_gen_9fec98 import LEval_nq_datasets # noqa: F401, F403
from .leval_naturalquestion_gen_52c33f import LEval_nq_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNaturalQuestionDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalNaturalQuestionDataset
LEval_nq_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_nq_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}?\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}?\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_nq_infer_cfg = dict(
)
LEval_nq_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_newssumm_gen_db3565 import LEval_newssumm_datasets # noqa: F401, F403
from .leval_newssumm_gen_b03798 import LEval_newssumm_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalNewsSummDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalNewsSummDataset
LEval_newssumm_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_newssumm_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_newssumm_infer_cfg = dict(
)
LEval_newssumm_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .leval_paper_assistant_gen_b03798 import LEval_ps_summ_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPaperAssistantDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalPaperAssistantDataset
LEval_ps_summ_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_ps_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_ps_summ_infer_cfg = dict(
)
LEval_ps_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .leval_patent_summ_gen_b03798 import LEval_patent_summ_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalPatentSummDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalPatentSummDataset
LEval_patent_summ_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_patent_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\n{question}\nTL;DR:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_patent_summ_infer_cfg = dict(
)
LEval_patent_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_quality_gen_bd35f4 import LEval_quality_datasets # noqa: F401, F403
from .leval_quality_gen_36a006 import LEval_quality_datasets # noqa: F401, F403
......@@ -2,7 +2,7 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator, AccEvaluator
from opencompass.datasets import LEvalQualityDataset
from opencompass.datasets.leval import LEvalQualityDataset
from opencompass.utils.text_postprocessors import first_capital_postprocess, first_capital_postprocess_multi
LEval_quality_reader_cfg = dict(
......@@ -16,8 +16,11 @@ LEval_quality_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction based on this document. For multi-choice questions, there could be a single correct option or multiple correct options. Please only provide the letter corresponding to the answer (like A or AB) when answering.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows.\n{context}\nQuestion:{question}\nAnswer:'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......
from mmengine.config import read_base
with read_base():
from .leval_review_summ_gen_b03798 import LEval_review_summ_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator, SquadEvaluator
from opencompass.datasets import LEvalReviewSummDataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalReviewSummDataset
LEval_review_summ_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_review_summ_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_review_summ_infer_cfg = dict(
)
LEval_review_summ_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_scientificqa_gen_0c6e71 import LEval_scientificqa_datasets # noqa: F401, F403
from .leval_scientificqa_gen_96bf3f import LEval_scientificqa_datasets # noqa: F401, F403
......@@ -2,10 +2,10 @@ from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.openicl.icl_evaluator import EMEvaluator, RougeEvaluator
from opencompass.datasets import LEvalScientificQADataset
from opencompass.datasets.leval import LEvalGPTEvaluator, LEvalScientificQADataset
LEval_scientificqa_reader_cfg = dict(
input_columns=['context', 'question'],
input_columns=['context', 'question', 'length'],
output_column='answer',
train_split='test',
test_split='test'
......@@ -15,8 +15,11 @@ LEval_scientificqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(
begin=[
dict(role='SYSTEM', fallback_role='HUMAN', prompt='Now you are given a very long document. Please follow the instruction after this document. These instructions may include summarizing a document, answering questions based on the document, or writing a required paragraph.'),
],
round=[
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='HUMAN', prompt='Document is as follows. {context}\nInstruction: {question}\nAnswer this question with {length} words.'),
dict(role='BOT', prompt=''),
], )),
retriever=dict(type=ZeroRetriever),
......@@ -24,7 +27,7 @@ LEval_scientificqa_infer_cfg = dict(
)
LEval_scientificqa_eval_cfg = dict(
evaluator=dict(type=RougeEvaluator),
evaluator=dict(type=LEvalGPTEvaluator),
pred_role='BOT'
)
......
from mmengine.config import read_base
with read_base():
from .LEval_topic_retrieval_gen_af0562 import LEval_tr_datasets
from .leval_topic_retrieval_gen_bf433f import LEval_tr_datasets
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment