Unverified Commit 9058be07 authored by Tong Gao's avatar Tong Gao Committed by GitHub
Browse files

[Feature] Simplify entry script (#204)

* [Feature] Simply entry script

* update
parent f480b727
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import C3Dataset
C3_reader_cfg = dict(
input_columns=[
'question', 'content', 'choice0', 'choice1', 'choice2', 'choice3',
'choices'
],
output_column='label')
C3_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
0:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice0}",
1:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice1}",
2:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice2}",
3:
"阅读以下内容,选择合适的选项回答: {content} 问题:{question}\n 选项: -{choice0} -{choice1} -{choice2} -{choice3} 答: [MASK]-{choice3}",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
C3_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
C3_datasets = [
dict(
type=C3Dataset,
abbr='C3',
path='./data/CLUE/C3/dev_0.json',
reader_cfg=C3_reader_cfg,
infer_cfg=C3_infer_cfg,
eval_cfg=C3_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer, GLMChoiceInferencer
from opencompass.datasets import GaokaoBenchDataset
MCQ_TMPL = """\
请你做一道{type}。
请你一步一步思考并将思考过程写在[解析]和<eoe>之间。你将从A,B,C,D中选出正确的答案,并写在【答案]和<eoa>之间。
例如:[答案]: A <eoa>
完整的题目回答的格式如下:
回答:[解析] ... <eoe>
[答案] ... <eoa>
请你严格按照上述格式作答。
题目如下:{{question}}
回答:"""
MULTI_MCQ_TMPL = """\
请你做一道{type}。
请你一步一步思考。每一题你将从A,B,C,D中选出正确的答案,并写在[答案]和<eoa>之间。
例如:(1)[答案] A <eoa>
(2)[答案] B <eoa>
请你严格按照上述格式作答。
题目如下:{{question}}
回答:"""
CLOZE_TMPL = """\
请你做一道{type}。
请你一步一步思考。将符合题意的五个选项的字母写在[答案]和<eoa>之间。
例如:[答案] A B C D E <eoa>
请严格按照上述格式作答。
题目如下:{{question}}
回答:"""
_MCQ_prompts = [
{
"type": "single_choice",
"keyword": "2010-2022_Math_II_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_Math_I_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='数学选择题'),
"comment": ""
},
{
"type": "single_choice",
"keyword": "2010-2022_History_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='历史选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Biology_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='生物选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Political_Science_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='政治选择题'),
},
{
"type": "multi_choice",
"keyword": "2010-2022_Physics_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='物理选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2022_Chemistry_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='化学选择题'),
},
{
"type": "single_choice",
"keyword": "2010-2013_English_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='英语选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Modern_Lit",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='语文阅读理解题,其中包含三个小题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Fill_in_Blanks",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语完形填空题,其中包含二十个小题'),
},
{
"type": "five_out_of_seven",
"keyword": "2012-2022_English_Cloze_Test",
"prefix_prompt": CLOZE_TMPL.format(type='英语完形填空题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Geography_MCQs",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='地理选择题'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_English_Reading_Comp",
"prefix_prompt": MULTI_MCQ_TMPL.format(type='英语阅读理解题,其中包含三到五个小题。'),
},
{
"type": "multi_question_choice",
"keyword": "2010-2022_Chinese_Lang_and_Usage_MCQs",
"prefix_prompt": MCQ_TMPL.format(type='语文选择题'),
},
]
_FBQ_prompts = [{
"type": "cloze",
"keyword": "2010-2022_Math_I_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2010-2022_Math_II_Fill-in-the-Blank",
"prefix_prompt":
"请解答下面的数学填空题\n仔细阅读题目,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword":
"2010-2022_Chinese_Language_Famous_Passages_and_Sentences_Dictation",
"prefix_prompt":
"请回答下面的语文填空题\n请你仔细阅读题目,先找到题目对应的中国名篇,再从名篇中找到合适的句子填写到题目的空白处。请你将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n(2)【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}, {
"type": "cloze",
"keyword": "2014-2022_English_Language_Cloze_Passage",
"prefix_prompt":
"请回答下面的英语短文填词题\n仔细阅读题目,空白处请填入一个适当单词或者括号内单词的正确形式。请你一步步思考,将思考过程写在【解析】和<eoe>之间,将最终答案写在【答案】和<eoa>之间。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n(2)【解析】 ...<eoe>\n【答案】...<eoa>\n请严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}]
_OEQ_prompts = [
{
"type": "subjective",
"keyword": "2010-2022_Geography_Open-ended_Questions",
"prefix_prompt":
"请解答下面的地理解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。你的答案请写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chemistry_Open-ended_Questions",
"prefix_prompt":
"请解答下面的化学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_I_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_History_Open-ended_Questions",
"prefix_prompt":
"请解答下面的历史解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Biology_Open-ended_Questions",
"prefix_prompt":
"请解答下面的生物解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,同一小题的答案用\t分隔开。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...\t...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Math_II_Open-ended_Questions",
"prefix_prompt":
"请解答下面的数学解答题\n仔细阅读题目并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间,答案需要有完整的解题步骤。\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Physics_Open-ended_Questions",
"prefix_prompt":
"请解答下面的物理解答题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Political_Science_Open-ended_Questions",
"prefix_prompt":
"请解答下面的政治解答题\n仔细阅读材料和题目,并充分结合你已有的知识,解答其中的问题,请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的答案写在【答案】和<eoa>之间\n完整的题目回答格式如下:\n(1)【解析】 ...<eoe>\n【答案】...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "correction",
"keyword": "2012-2022_English_Language_Error_Correction",
"prefix_prompt":
"请解答下面的英语短文改错题,仔细阅读题目并充分结合你你已有的知识,找出其中10处需要改动的地方。请你一步步思考,把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "prefix_prompt": [
# "请解答下面的英语短文改错题,仔细阅读题目并充分结合你你已有的知识,找出其中10处需要改动的地方。请你一步步思考,把修改后的短文写在【答案】和<eoa>之间。\n完整的题目回答格式如下:【答案】 ...<eoa>\n 请你严格按照上述格式作答。\n题目如下:",
# "请比较下面两篇短文,找到第二篇和第一篇的10处不同,每处不同只涉及一个单词,请将结果写在【答案】和<eoa>之间。例如:【答案】1. 将play改为plays\n 2.增加了the\n ... <eoa>\n 完整的题目回答格式如下:【答案】(1) ... \n (2) ...\n ...(10) ...\n<eoa>\n请你严格按照上述格式作答。\n短文如下:"
# ],
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Ancient_Poetry_Reading",
"prefix_prompt":
"请解答下面的语文古代诗歌阅读题,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Practical_Text_Reading",
"prefix_prompt":
"请解答下面的语文实用类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Literary_Text_Reading",
"prefix_prompt":
"请解答下面的语文文学类文本阅读,仔细阅读题目,注意其中可能含有单选题和多选题。请你一步步思考并将最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword": "2010-2022_Chinese_Language_Classical_Chinese_Reading",
"prefix_prompt":
"请解答下面的语文文言文阅读,仔细阅读题目,前三题是单选题,最后一题要将文言文翻译为现代汉语。请你一步步思考并把最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。翻译题把翻译后的现代汉语句子写在【答案】后面,例如”【答案】今天天气很好 <eoa>”\n完整的题目回答格式如下:(1)[答案】 ...<eoa>\n (2)【答案】...<eoa>\n请你严格按照上述格式作答,如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
},
{
"type": "subjective",
"keyword":
"2010-2022_Chinese_Language_Language_and_Writing_Skills_Open-ended_Questions",
"prefix_prompt":
"请解答下面的语文解答题,仔细阅读题目,注意其中可能含有选择题。请你一步步思考并将思考过程写在【解析】和<eoe>之间。请把你的最终答案写在【答案】和<eoa>之间。选择题你要从选项中选出符合题意的答案,例如“【答案】A <eoa>”。\n完整的题目回答格式如下:(1)【解析】 ...<eoe>\n【答案】 ...<eoa>\n (2)【解析】 ...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。如果不止一道题,请分别作答。\n题目如下:",
"comment": ""
}
]
gaokao_bench_datasets = []
for _folder, _prompts in [
("Multiple-choice_Questions", _MCQ_prompts),
("Fill-in-the-blank_Questions", _FBQ_prompts),
("Open-ended_Questions", _OEQ_prompts),
]:
for _p in _prompts:
if _p['type'] == "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role":
"HUMAN",
"prompt":
_p['prefix_prompt'].format(question='</question>')
}]
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GenInferencer
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
# Single choice dataset
_folder = "Multiple-choice_Questions"
for _p in _MCQ_prompts:
if _p['type'] != "single_choice":
continue
_reader_cfg = {
"input_columns": ['question'],
"output_column": 'answer',
}
_infer_cfg = {
"ice_template": {
"type": PromptTemplate,
"template": {
answer: {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": f"【答案】{answer} <eoa>"
}]
}
for answer in ['A', 'B', 'C', 'D']
},
"column_token_map": {
"question": "</question>"
},
"ice_token": "</E>"
},
"prompt_template": {
"type": PromptTemplate,
"template": {
"round": [{
"role": "HUMAN",
"prompt": _p['prefix_prompt'] + '</question>'
}, {
"role": "BOT",
"prompt": "【答案】("
}]
},
"column_token_map": {
"question": "</question>"
},
},
"retriever": {
"type": ZeroRetriever
},
"inferencer": {
"type": GLMChoiceInferencer,
"choices": ['A', 'B', 'C', 'D'],
}
}
_eval_cfg = {
"evaluator": {
"type": "GaokaoBenchEvaluator" + "_" + _p['type'],
},
"pred_role": "BOT",
}
_base_path = './data/GAOKAO-BENCH/data'
_dataset = {
"type": GaokaoBenchDataset,
"abbr": "GaokaoBench_" + _p['keyword'],
"path": _base_path + '/' + _folder + '/' + _p['keyword'] + ".json",
"reader_cfg": _reader_cfg,
"infer_cfg": _infer_cfg,
"eval_cfg": _eval_cfg,
}
gaokao_bench_datasets.append(_dataset)
_temporary_variables = [k for k in globals() if k.startswith('_')]
for _t in _temporary_variables:
del globals()[_t]
del _temporary_variables, _t
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
afqmc_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
afqmc_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
# 0: "{sentence1},{sentence2}不同。",
# 1: "{sentence1},{sentence2}相似。"
0:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]no",
1:
"I received the questions \"{sentence1}\" and \"{sentence2}\". Are they duplicates?[MASK]yes",
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
afqmc_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
afqmc_datasets = [
dict(
type=HFDataset,
abbr='afqmc',
path='json',
data_files='./data/CLUE/AFQMC/test_public.json',
split='train',
reader_cfg=afqmc_reader_cfg,
infer_cfg=afqmc_infer_cfg,
eval_cfg=afqmc_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import AGIEvalDataset
agieval_reader_cfg = dict(
input_columns=['problem_input'], output_column='label')
agieval_single_choice_sets = [
'gaokao-chinese',
'gaokao-english',
'gaokao-geography',
'gaokao-history',
'gaokao-biology',
'gaokao-chemistry',
'gaokao-physics',
'gaokao-mathqa',
'logiqa-zh',
'lsat-ar',
'lsat-lr',
'lsat-rc',
'logiqa-en',
'sat-math',
'sat-en',
'sat-en-without-passage',
'aqua-rat',
]
agieval_multiple_choices_sets = [
'jec-qa-kd', # 数据需要额外处理
'jec-qa-ca', # 数据需要额外处理
]
agieval_cloze_sets = ['gaokao-mathcloze', 'math']
agieval_datasets = []
for name in agieval_single_choice_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
label: f'{{problem_input}} {label}'
for label in ['A', 'B', 'C', 'D']
}),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(
type=GLMChoiceInferencer, choices=['A', 'B', 'C', 'D']))
agieval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_multiple_choices_sets:
_hint = '答案是: '
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role='HUMAN', prompt=f'{{question}}\n{{options}}\n{_hint}')
]),
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_postprocessor=dict(type='first-capital-multi'))
agieval_datasets.append(
dict(
type='AGIEvalDataset_v2',
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
for name in agieval_cloze_sets:
agieval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='</E>{problem_input}',
ice_token='</E>'),
retriever=dict(type=ZeroRetriever
), # retriver 不起作用,以输入参数为准 (zero-shot / few-shot)
inferencer=dict(type='GenInferencer'))
agieval_eval_cfg = dict(evaluator=dict(type='AGIEvalEvaluator'))
agieval_datasets.append(
dict(
type=AGIEvalDataset,
path='./data/AGIEval/data/v1/',
name=name,
abbr='agieval-' + name,
setting_name='zero-shot',
reader_cfg=agieval_reader_cfg,
infer_cfg=agieval_infer_cfg.copy(),
eval_cfg=agieval_eval_cfg.copy()))
del name, agieval_infer_cfg, agieval_eval_cfg
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CEvalDataset
ceval_reader_cfg = dict(
input_columns=['question', 'A', 'B', 'C', 'D'],
output_column='answer',
train_split='dev',
test_split="val")
ceval_prompt_template = dict(
type=PromptTemplate,
template=None,
ice_token='</E>',
)
ceval_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
answer:
f'{{question}}\n(A) {{/A}}\n(B) {{/B}}\n(C) {{/C}}\n(D) {{/D}}\n答案: ({answer}) {{{answer}}}\n'
for answer in ['A', 'B', 'C', 'D']
}),
prompt_template=ceval_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GLMChoiceInferencer, fix_id_list=[0, 1, 2, 3, 4]))
ceval_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
ceval_all_sets = [
"操作系统",
"初中地理",
"初中化学",
"初中历史",
"初中生物",
"初中数学",
"初中物理",
"初中政治",
"大学编程",
"大学化学",
"大学经济学",
"大学物理",
"大学中国史",
"导游资格",
"法律职业资格",
"法学",
"概率统计",
"高等数学",
"高中地理",
"高中化学",
"高中历史",
"高中生物",
"高中数学",
"高中物理",
"高中语文",
"高中政治",
"公务员",
"工商管理",
"环境影响评价工程师",
"基础医学",
"计算机网络",
"计算机组成",
"教师资格",
"教育学",
"离散数学",
"临床医学",
"逻辑学",
"马克思主义基本原理",
"毛泽东思想和中国特色社会主义理论体系概论",
"兽医学",
"税务师",
"思想道德修养与法律基础",
"体育学",
"医师资格",
"艺术学",
"植物保护",
"中国语言文学",
"注册城乡规划师",
"注册电气工程师",
"注册会计师",
"注册计量师",
"注册消防工程师",
]
ceval_datasets = []
for _name in ceval_all_sets:
ceval_datasets.append(
dict(
type=CEvalDataset,
path="./data/ceval/release_ceval",
name=_name,
abbr='ceval-' + _name,
reader_cfg=ceval_reader_cfg,
infer_cfg=ceval_infer_cfg.copy(),
eval_cfg=ceval_eval_cfg.copy()))
ceval_datasets[-1]['infer_cfg'][
'prompt_template'] = ceval_prompt_template.copy()
ceval_datasets[-1]['infer_cfg']['prompt_template']['template'] = dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=f'以下是中国关于{_name}考试的单项选择题,请选出其中的正确答案。'),
'</E>',
],
round=[
dict(
role='HUMAN',
prompt=
'{question}\n(A) {A}\n(B) {B}\n(C) {C}\n(D) {D}\答案: ('),
],
)
del _name
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CHIDDataset
chid_reader_cfg = dict(
input_columns=[f'content{i}' for i in range(7)], output_column='answer')
chid_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={answer: f"{{content{answer}}}"
for answer in range(7)}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
chid_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
chid_datasets = [
dict(
type=CHIDDataset,
path='json',
abbr='chid',
data_files='./data/FewCLUE/chid/test_public.json',
split='train',
reader_cfg=chid_reader_cfg,
infer_cfg=chid_infer_cfg,
eval_cfg=chid_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
cmnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'],
output_column='label',
test_split='train')
cmnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
cmnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
cmnli_datasets = [
dict(
type=HFDataset,
abbr='cmnli',
path='json',
split='train',
data_files='./data/CLUE/cmnli/cmnli_public/dev.json',
reader_cfg=cmnli_reader_cfg,
infer_cfg=cmnli_infer_cfg,
eval_cfg=cmnli_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import CslDataset
csl_reader_cfg = dict(
input_columns=["abst", "keywords"], output_column='label')
csl_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
0: "</E>摘要:</A>",
1: "</E>摘要:</A>关键词:</K>"
},
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template=
'</E>Abstract: </A>\nKeyword: </K>\n Does all keywords come from the given abstract? (Yes or No)',
column_token_map={
"abst": '</A>',
'keywords': '</K>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=['No', 'Yes']))
csl_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
csl_datasets = [
dict(
type=CslDataset,
path='json',
abbr='csl',
data_files='./data/FewCLUE/csl/test_public.json',
split='train',
reader_cfg=csl_reader_cfg,
infer_cfg=csl_infer_cfg,
eval_cfg=csl_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HFDataset, HumanEvaluator, humaneval_postprocess
humaneval_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# TODO: allow empty output-column
humaneval_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template='{prompt}'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
humaneval_eval_cfg = dict(
evaluator=dict(type=HumanEvaluator),
k=[1, 10, 100], # the parameter only for humaneval
pred_postprocessor=dict(type=humaneval_postprocess),
)
humaneval_datasets = [
dict(
type=HFDataset,
path='openai_humaneval',
reader_cfg=humaneval_reader_cfg,
infer_cfg=humaneval_infer_cfg,
eval_cfg=humaneval_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import FixKRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
mmlu_reader_cfg = dict(
input_columns=['input', 'A', 'B', 'C', 'D'],
output_column='target',
train_split='validation')
mmlu_prompt_template = dict(
type=PromptTemplate,
template=None,
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
},
ice_token='</E>',
)
mmlu_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={
target: '</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\n'
f'Answer: ({target}) </{target}>\n'
for target in ['A', 'B', 'C', 'D']
},
column_token_map={
'input': '</input>',
'A': '</A>',
'B': '</B>',
'C': '</C>',
'D': '</D>',
'target': '</target>'
}),
prompt_template=mmlu_prompt_template,
retriever=dict(type=FixKRetriever),
inferencer=dict(type=GLMChoiceInferencer, fix_id_list=[0, 1, 2, 3, 4]))
mmlu_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
mmlu_all_sets = [
"college_biology",
# "college_chemistry",
# "college_computer_science",
# "college_mathematics",
# "college_physics",
# "electrical_engineering",
# "astronomy",
# "anatomy",
# "abstract_algebra",
# "machine_learning",
# "clinical_knowledge",
# "global_facts",
# "management",
# "nutrition",
# "marketing",
# "professional_accounting",
# "high_school_geography",
# "international_law",
# "moral_scenarios",
# "computer_security",
# "high_school_microeconomics",
# "professional_law",
# "medical_genetics",
# "professional_psychology",
# "jurisprudence",
# "world_religions",
# "philosophy",
# "virology",
# "high_school_chemistry",
# "public_relations",
# "high_school_macroeconomics",
# "human_sexuality",
# "elementary_mathematics",
# "high_school_physics",
# "high_school_computer_science",
# "high_school_european_history",
# "business_ethics",
# "moral_disputes",
# "high_school_statistics",
# "miscellaneous",
# "formal_logic",
# "high_school_government_and_politics",
# "prehistory",
# "security_studies",
# "high_school_biology",
# "logical_fallacies",
# "high_school_world_history",
# "professional_medicine",
# "high_school_mathematics",
# "college_medicine",
# "high_school_us_history",
# "sociology",
# "econometrics",
# "high_school_psychology",
# "human_aging",
# "us_foreign_policy",
# "conceptual_physics",
]
mmlu_key_sets = [
'college_biology',
'college_chemistry',
'college_computer_science',
'college_mathematics',
'college_physics',
'electrical_engineering',
'astronomy',
'anatomy',
'abstract_algebra',
'machine_learning',
'clinical_knowledge',
'global_facts',
'management',
'nutrition',
'marketing',
'professional_accounting',
]
mmlu_datasets = []
for name in mmlu_all_sets:
mmlu_datasets.append(
dict(
type=HFDataset,
path='lukaemon/mmlu',
name=name,
reader_cfg=mmlu_reader_cfg,
infer_cfg=mmlu_infer_cfg.copy(),
eval_cfg=mmlu_eval_cfg))
mmlu_datasets[-1]['infer_cfg'][
'prompt_template'] = mmlu_prompt_template.copy()
mmlu_datasets[-1]['infer_cfg']['prompt_template']['template'] = dict(
begin=[
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt=
f'The following are multiple choice questions (with answers) about {name.replace("_", " ")}.'
),
'</E>',
],
round=[
dict(
role='HUMAN',
prompt=
'</input>\n(A) </A>\n(B) </B>\n(C) </C>\n(D) </D>\nAnswer: ('),
],
)
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import NaturalQuestionDataset, NQEvaluator
nq_reader_cfg = dict(
input_columns=['question'], output_column='answer', train_split='test')
nq_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template="Q: </Q>?\nA: </A>",
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template="</E>Question: </Q>? Answer: ",
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer))
nq_eval_cfg = dict(evaluator=dict(type=NQEvaluator))
nq_datasets = [
dict(
type=NaturalQuestionDataset,
abbr='nq',
path='/mnt/petrelfs/wuzhiyong/datasets/nq/',
reader_cfg=nq_reader_cfg,
infer_cfg=nq_infer_cfg,
eval_cfg=nq_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import PPLInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import HFDataset
ocnli_reader_cfg = dict(
input_columns=['sentence1', 'sentence2'], output_column='label')
# TODO: two prompt templates for ocnli
ocnli_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
'contradiction':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]never',
'entailment':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]always',
'neutral':
'{sentence1}\nKeeping in mind the above text, consider: {sentence2}?\nIs this "always", "sometimes", or "never" correct? [MASK]sometimes'
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
ocnli_eval_cfg = dict(evaluator=dict(type=AccEvaluator), )
ocnli_datasets = [
dict(
type=HFDataset,
abbr='ocnli',
path='json',
split='train',
data_files='./data/CLUE/OCNLI/dev.json',
reader_cfg=ocnli_reader_cfg,
infer_cfg=ocnli_infer_cfg,
eval_cfg=ocnli_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GLMChoiceInferencer
from opencompass.openicl.icl_evaluator import AccEvaluator
from opencompass.datasets import TNewsDataset
tnews_reader_cfg = dict(input_columns='sentence', output_column='label_desc2')
tnews_labels = [
'农业新闻', '旅游新闻', '游戏新闻', '科技类别公司新闻', '体育类别新闻', '初升高教育新闻', '娱乐圈新闻', '投资资讯',
'军事类别常识', '车辆新闻', '楼市新闻', '环球不含中国类别新闻', '书籍文化历史类别新闻', '故事类别新闻', '股票市场类别新闻'
]
tnews_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template={lb: f'</E></S>这篇新闻属于:{lb}'
for lb in tnews_labels},
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
prompt_template=dict(
type=PromptTemplate,
template='</E></S>\n以上这篇新闻属于',
column_token_map={'sentence': '</S>'},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GLMChoiceInferencer, choices=tnews_labels))
tnews_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
tnews_datasets = [
dict(
type=TNewsDataset,
path='json',
abbr='tnews',
data_files='./data/FewCLUE/tnews/test_public.json',
split='train',
reader_cfg=tnews_reader_cfg,
infer_cfg=tnews_infer_cfg,
eval_cfg=tnews_eval_cfg)
]
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import TriviaQADataset, TriviaQAEvaluator
triviaqa_reader_cfg = dict(
input_columns=['question'],
output_column='answer',
train_split='dev',
test_split='dev')
triviaqa_infer_cfg = dict(
ice_template=dict(
type=PromptTemplate,
template='Q: </Q>\nA: </A>',
column_token_map={
'question': '</Q>',
'answer': '</A>'
}),
prompt_template=dict(
type=PromptTemplate,
template='</E>Question: </Q> Answer:',
column_token_map={
'question': '</Q>',
'answer': '</A>'
},
ice_token='</E>'),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=50))
triviaqa_eval_cfg = dict(evaluator=dict(type=TriviaQAEvaluator))
triviaqa_datasets = [
dict(
type=TriviaQADataset,
abbr='triviaqa',
path='./data/triviaqa/',
reader_cfg=triviaqa_reader_cfg,
infer_cfg=triviaqa_infer_cfg,
eval_cfg=triviaqa_eval_cfg)
]
...@@ -14,7 +14,7 @@ PromptType = Union[PromptList, str] ...@@ -14,7 +14,7 @@ PromptType = Union[PromptList, str]
@MODELS.register_module() @MODELS.register_module()
class HuggingFace(BaseModel): class HuggingFace(BaseModel):
"""Model wrapper around HuggingFace general models. """Model wrapper around HuggingFace models.
Args: Args:
path (str): The name or path to HuggingFace's model. path (str): The name or path to HuggingFace's model.
...@@ -104,10 +104,15 @@ class HuggingFace(BaseModel): ...@@ -104,10 +104,15 @@ class HuggingFace(BaseModel):
path: str, path: str,
model_kwargs: dict, model_kwargs: dict,
peft_path: Optional[str] = None): peft_path: Optional[str] = None):
from transformers import AutoModel from transformers import AutoModel, AutoModelForCausalLM
model_kwargs.setdefault('torch_dtype', torch.float16) model_kwargs.setdefault('torch_dtype', torch.float16)
try:
self.model = AutoModelForCausalLM.from_pretrained(
path, **model_kwargs)
except ValueError:
self.model = AutoModel.from_pretrained(path, **model_kwargs) self.model = AutoModel.from_pretrained(path, **model_kwargs)
if peft_path is not None: if peft_path is not None:
from peft import PeftModel from peft import PeftModel
self.model = PeftModel.from_pretrained(self.model, self.model = PeftModel.from_pretrained(self.model,
......
...@@ -2,8 +2,8 @@ from .abbr import * # noqa ...@@ -2,8 +2,8 @@ from .abbr import * # noqa
from .build import * # noqa from .build import * # noqa
from .collect_env import * # noqa from .collect_env import * # noqa
from .dependency import * # noqa from .dependency import * # noqa
from .file import * # noqa
from .fileio import * # noqa from .fileio import * # noqa
from .git import * # noqa
from .lark import * # noqa from .lark import * # noqa
from .logging import * # noqa from .logging import * # noqa
from .menu import * # noqa from .menu import * # noqa
......
import fnmatch
import os
from typing import List, Union
def match_files(path: str,
pattern: Union[str, List],
fuzzy: bool = False) -> List:
if isinstance(pattern, str):
pattern = [pattern]
if fuzzy:
pattern = [f'*{p}*' for p in pattern]
files_list = []
for root, _, files in os.walk(path):
for name in files:
for p in pattern:
if fnmatch.fnmatch(name.lower(), p.lower()):
files_list.append([name[:-3], os.path.join(root, name)])
break
return sorted(files_list, key=lambda x: x[0])
import subprocess
def get_git_root() -> str:
cmd = ['git', 'rev-parse', '--show-toplevel']
result = subprocess.run(cmd, stdout=subprocess.PIPE, check=True)
return result.stdout.decode('utf-8').strip()
def get_latest_commit(branch: str) -> str:
cmd = ['git', 'rev-parse', branch]
result = subprocess.run(cmd, stdout=subprocess.PIPE, check=True)
return result.stdout.decode('utf-8').strip()
from typing import List, Union
import tabulate
from mmengine.config import Config
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner
from opencompass.utils import get_logger, match_files
def match_cfg_file(workdir: str, pattern: Union[str, List[str]]) -> List[str]:
"""Match the config file in workdir recursively given the pattern.
Additionally, if the pattern itself points to an existing file, it will be
directly returned.
"""
if isinstance(pattern, str):
pattern = [pattern]
pattern = [p + '.py' if not p.endswith('.py') else p for p in pattern]
files = match_files(workdir, pattern, fuzzy=False)
if len(files) != len(pattern):
nomatched = []
ambiguous = []
err_msg = ('The provided pattern matches 0 or more than one '
'config. Please verify your pattern and try again. '
'You may use tools/list_configs.py to list or '
'locate the configurations.\n')
for p in pattern:
files = match_files(workdir, p, fuzzy=False)
if len(files) == 0:
nomatched.append([p[:-3]])
elif len(files) > 1:
ambiguous.append([p[:-3], '\n'.join(f[1] for f in files)])
if nomatched:
table = [['Not matched patterns'], *nomatched]
err_msg += tabulate.tabulate(table,
headers='firstrow',
tablefmt='psql')
if ambiguous:
table = [['Ambiguous patterns', 'Matched files'], *ambiguous]
err_msg += tabulate.tabulate(table,
headers='firstrow',
tablefmt='psql')
raise ValueError(err_msg)
return files
def get_config_from_arg(args) -> Config:
"""Get the config object given args.
Only a few argument combinations are accepted (priority from high to low)
1. args.config
2. args.models and args.datasets
3. Huggingface parameter groups and args.datasets
"""
if args.config:
return Config.fromfile(args.config, format_python_code=False)
if args.datasets is None:
raise ValueError('You must specify "--datasets" if you do not specify '
'a config file path.')
datasets = []
for dataset in match_cfg_file('configs/datasets/', args.datasets):
get_logger().info(f'Loading {dataset[0]}: {dataset[1]}')
cfg = Config.fromfile(dataset[1])
for k in cfg.keys():
if k.endswith('_datasets'):
datasets += cfg[k]
if not args.models and not args.hf_path:
raise ValueError('You must specify a config file path, '
'or specify --models and --datasets, or '
'specify HuggingFace model parameters and '
'--datasets.')
models = []
if args.models:
for model in match_cfg_file('configs/models/', args.models):
get_logger().info(f'Loading {model[0]}: {model[1]}')
cfg = Config.fromfile(model[1])
if 'models' not in cfg:
raise ValueError(
f'Config file {model[1]} does not contain "models" field')
models += cfg['models']
else:
from opencompass.models import HuggingFace
model = dict(type=f'{HuggingFace.__module__}.{HuggingFace.__name__}',
path=args.hf_path,
peft_path=args.peft_path,
tokenizer_path=args.tokenizer_path,
model_kwargs=args.model_kwargs,
tokenizer_kwargs=args.tokenizer_kwargs,
max_seq_len=args.max_seq_len,
max_out_len=args.max_out_len,
batch_padding=not args.no_batch_padding,
batch_size=args.batch_size,
run_cfg=dict(num_gpus=args.num_gpus))
models.append(model)
return Config(dict(models=models, datasets=datasets),
format_python_code=False)
def exec_mm_infer_runner(tasks, args, cfg):
"""execute multimodal infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='MultimodalInferTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
raise NotImplementedError('Currently, we do not support evaluating \
multimodal models on dlc.')
else:
runner = LocalRunner(task=dict(type='MultimodalInferTask'),
max_num_workers=args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
def exec_infer_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
qos=args.qos,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(task=dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
max_workers_per_gpu=args.max_workers_per_gpu,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
def exec_eval_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
qos=args.qos,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(task=dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
...@@ -4,18 +4,21 @@ import os ...@@ -4,18 +4,21 @@ import os
import os.path as osp import os.path as osp
from datetime import datetime from datetime import datetime
from mmengine.config import Config from mmengine.config import Config, DictAction
from opencompass.partitioners import (MultimodalNaivePartitioner, from opencompass.partitioners import (MultimodalNaivePartitioner,
NaivePartitioner, SizePartitioner) NaivePartitioner, SizePartitioner)
from opencompass.registry import PARTITIONERS, RUNNERS from opencompass.registry import PARTITIONERS, RUNNERS
from opencompass.runners import DLCRunner, LocalRunner, SlurmRunner from opencompass.runners import SlurmRunner
from opencompass.utils import LarkReporter, Summarizer, get_logger from opencompass.utils import LarkReporter, Summarizer, get_logger
from opencompass.utils.run import (exec_eval_runner, exec_infer_runner,
exec_mm_infer_runner, get_config_from_arg)
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(description='Run an evaluation task') parser = argparse.ArgumentParser(description='Run an evaluation task')
parser.add_argument('config', help='Train config file path') parser.add_argument('config', nargs='?', help='Train config file path')
# add mutually exclusive args `--slurm` `--dlc`, defaults to local runner # add mutually exclusive args `--slurm` `--dlc`, defaults to local runner
# if "infer" or "eval" not specified # if "infer" or "eval" not specified
launch_method = parser.add_mutually_exclusive_group() launch_method = parser.add_mutually_exclusive_group()
...@@ -31,6 +34,14 @@ def parse_args(): ...@@ -31,6 +34,14 @@ def parse_args():
help='Whether to force tasks to run on dlc. If ' help='Whether to force tasks to run on dlc. If '
'True, `--aliyun-cfg` must be set. Defaults' 'True, `--aliyun-cfg` must be set. Defaults'
' to False') ' to False')
# multi-modal support
parser.add_argument('--mm-eval',
help='Whether or not enable multimodal evaluation',
action='store_true',
default=False)
# Add shortcut parameters (models and datasets)
parser.add_argument('--models', nargs='+', help='', default=None)
parser.add_argument('--datasets', nargs='+', help='', default=None)
# add general args # add general args
parser.add_argument('--debug', parser.add_argument('--debug',
help='Debug mode, in which scheduler will run tasks ' help='Debug mode, in which scheduler will run tasks '
...@@ -38,10 +49,6 @@ def parse_args(): ...@@ -38,10 +49,6 @@ def parse_args():
'redirected to files', 'redirected to files',
action='store_true', action='store_true',
default=False) default=False)
parser.add_argument('--mm-eval',
help='Whether or not enable multimodal evaluation',
action='store_true',
default=False)
parser.add_argument('--dry-run', parser.add_argument('--dry-run',
help='Dry run mode, in which the scheduler will not ' help='Dry run mode, in which the scheduler will not '
'actually run the tasks, but only print the commands ' 'actually run the tasks, but only print the commands '
...@@ -115,6 +122,9 @@ def parse_args(): ...@@ -115,6 +122,9 @@ def parse_args():
# set dlc args # set dlc args
dlc_parser = parser.add_argument_group('dlc_args') dlc_parser = parser.add_argument_group('dlc_args')
parse_dlc_args(dlc_parser) parse_dlc_args(dlc_parser)
# set hf args
hf_parser = parser.add_argument_group('hf_args')
parse_hf_args(hf_parser)
args = parser.parse_args() args = parser.parse_args()
if args.slurm: if args.slurm:
assert args.partition is not None, ( assert args.partition is not None, (
...@@ -153,6 +163,22 @@ def parse_dlc_args(dlc_parser): ...@@ -153,6 +163,22 @@ def parse_dlc_args(dlc_parser):
type=str) type=str)
def parse_hf_args(hf_parser):
"""These args are all for the quick construction of HuggingFace models."""
hf_parser.add_argument('--hf-path', type=str)
hf_parser.add_argument('--peft-path', type=str)
hf_parser.add_argument('--tokenizer-path', type=str)
hf_parser.add_argument('--model-kwargs', nargs='+', action=DictAction)
hf_parser.add_argument('--tokenizer-kwargs', nargs='+', action=DictAction)
hf_parser.add_argument('--max-out-len', type=int)
hf_parser.add_argument('--max-seq-len', type=int)
hf_parser.add_argument('--no-batch-padding',
action='store_true',
default=False)
hf_parser.add_argument('--batch-size', type=int)
hf_parser.add_argument('--num-gpus', type=int)
def main(): def main():
args = parse_args() args = parse_args()
if args.dry_run: if args.dry_run:
...@@ -160,7 +186,7 @@ def main(): ...@@ -160,7 +186,7 @@ def main():
# initialize logger # initialize logger
logger = get_logger(log_level='DEBUG' if args.debug else 'INFO') logger = get_logger(log_level='DEBUG' if args.debug else 'INFO')
cfg = Config.fromfile(args.config, format_python_code=False) cfg = get_config_from_arg(args)
if args.work_dir is not None: if args.work_dir is not None:
cfg['work_dir'] = args.work_dir cfg['work_dir'] = args.work_dir
else: else:
...@@ -300,79 +326,5 @@ def main(): ...@@ -300,79 +326,5 @@ def main():
summarizer.summarize(time_str=cfg_time_str) summarizer.summarize(time_str=cfg_time_str)
def exec_mm_infer_runner(tasks, args, cfg):
"""execute multimodal infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='MultimodalInferTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
raise NotImplementedError('Currently, we do not support evaluating \
multimodal models on dlc.')
else:
runner = LocalRunner(task=dict(type='MultimodalInferTask'),
max_num_workers=args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
def exec_infer_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
qos=args.qos,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(task=dict(type='OpenICLInferTask'),
max_num_workers=args.max_num_workers,
max_workers_per_gpu=args.max_workers_per_gpu,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
def exec_eval_runner(tasks, args, cfg):
"""execute infer runner according to args."""
if args.slurm:
runner = SlurmRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
partition=args.partition,
quotatype=args.quotatype,
qos=args.qos,
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
elif args.dlc:
runner = DLCRunner(dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
aliyun_cfg=Config.fromfile(args.aliyun_cfg),
retry=args.retry,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
else:
runner = LocalRunner(task=dict(type='OpenICLEvalTask'),
max_num_workers=args.max_num_workers,
debug=args.debug,
lark_bot_url=cfg['lark_bot_url'])
runner(tasks)
if __name__ == '__main__': if __name__ == '__main__':
main() main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment