Unverified Commit aa2dd2b5 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Format] Add config lints (#892)

parent 3dbba119
...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict( ...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column='judge', output_column='judge',
) )
data_path ="data/subjective/compass_arena" data_path ='data/subjective/compass_arena'
subjective_datasets = [] subjective_datasets = []
...@@ -99,7 +99,7 @@ creation_prompt = """ ...@@ -99,7 +99,7 @@ creation_prompt = """
{question} {question}
""" + base_prompt """ + base_prompt
sub_map = {"language": language_prompt, "knowledge": knowledge_prompt, "reason_v2": reason_prompt, "math_v2": math_prompt, "creationv2_zh": creation_prompt} sub_map = {'language': language_prompt, 'knowledge': knowledge_prompt, 'reason_v2': reason_prompt, 'math_v2': math_prompt, 'creationv2_zh': creation_prompt}
for _name, _prompt in sub_map.items(): for _name, _prompt in sub_map.items():
subjective_infer_cfg = dict( subjective_infer_cfg = dict(
...@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items(): ...@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items():
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -129,12 +129,12 @@ for _name, _prompt in sub_map.items(): ...@@ -129,12 +129,12 @@ for _name, _prompt in sub_map.items():
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=CompassArenaDataset, type=CompassArenaDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict( ...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column='judge', output_column='judge',
) )
data_path ="data/subjective/compass_arena" data_path ='data/subjective/compass_arena'
subjective_datasets = [] subjective_datasets = []
...@@ -99,7 +99,7 @@ creation_prompt = """ ...@@ -99,7 +99,7 @@ creation_prompt = """
{question} {question}
""" + base_prompt """ + base_prompt
sub_map = {"creationv3": creation_prompt} sub_map = {'creationv3': creation_prompt}
for _name, _prompt in sub_map.items(): for _name, _prompt in sub_map.items():
subjective_infer_cfg = dict( subjective_infer_cfg = dict(
...@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items(): ...@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items():
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -130,12 +130,12 @@ for _name, _prompt in sub_map.items(): ...@@ -130,12 +130,12 @@ for _name, _prompt in sub_map.items():
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=CompassArenaDataset, type=CompassArenaDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict( ...@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column='judge', output_column='judge',
) )
data_path ="data/subjective/compass_arena" data_path ='data/subjective/compass_arena'
subjective_datasets = [] subjective_datasets = []
...@@ -91,7 +91,7 @@ reason_prompt = math_prompt ...@@ -91,7 +91,7 @@ reason_prompt = math_prompt
creation_prompt = """ creation_prompt = """
请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。 请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。
评分要求(重要性依次递减): 评分要求(重要性依次递减):
1. 好的回答必须首先符合用户问题里的各种需求,不能跑题 1. 好的回答必须首先符合用户问题里的各种需求,不能跑题
2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答 2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答
3. 好的回答必须具有创造性的词语和表达丰富度 3. 好的回答必须具有创造性的词语和表达丰富度
...@@ -99,7 +99,7 @@ creation_prompt = """ ...@@ -99,7 +99,7 @@ creation_prompt = """
{question} {question}
""" + base_prompt """ + base_prompt
sub_map = {"knowledge": knowledge_prompt, "language": language_prompt, "math_v2": math_prompt, "reason_v2": reason_prompt, "creationv2_zh": creation_prompt} sub_map = {'knowledge': knowledge_prompt, 'language': language_prompt, 'math_v2': math_prompt, 'reason_v2': reason_prompt, 'creationv2_zh': creation_prompt}
meta_prompt = """ meta_prompt = """
\n你是一个评判专家,请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。\n评分要求(重要性依次递减):\n1. 好的回答必须首先符合用户问题里的各种需求,不能跑题 \n2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答\n3. 好的回答必须具有创造性的词语和表达丰富度\n\n[用户问题]\n{question}\n[回答1开始]\n{prediction}\n[回答1结束]\n[回答2开始]\n{prediction2}\n[回答2结束]\n此外,还有两个其他评判专家的评判意见供你参考。\n[评判意见1]\n{judgement}\n[评判意见2]\n{judgement2}\n\n最终请你综合其他评判专家的评判意见与你自己的意见,在以下 3 个选项中做出选择:\nA. 回答1更好\nB. 回答2更好\nC. 回答1、2平局\n并提供你的解释原因。\n\n如果你认为回答1更好,你的输出应形如:\n选择:A\n原因:blahblah blahblah\n\n\n如果你认为回答2更好,你的输出应形如:\n选择:B\n原因:blahblah blahblah\n\n\n如果你认为回答1、2打成平手,你的输出应形如:\n选择:C\n原因:blahblah blahblah\n\n \n你是一个评判专家,请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。\n评分要求(重要性依次递减):\n1. 好的回答必须首先符合用户问题里的各种需求,不能跑题 \n2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答\n3. 好的回答必须具有创造性的词语和表达丰富度\n\n[用户问题]\n{question}\n[回答1开始]\n{prediction}\n[回答1结束]\n[回答2开始]\n{prediction2}\n[回答2结束]\n此外,还有两个其他评判专家的评判意见供你参考。\n[评判意见1]\n{judgement}\n[评判意见2]\n{judgement2}\n\n最终请你综合其他评判专家的评判意见与你自己的意见,在以下 3 个选项中做出选择:\nA. 回答1更好\nB. 回答2更好\nC. 回答1、2平局\n并提供你的解释原因。\n\n如果你认为回答1更好,你的输出应形如:\n选择:A\n原因:blahblah blahblah\n\n\n如果你认为回答2更好,你的输出应形如:\n选择:B\n原因:blahblah blahblah\n\n\n如果你认为回答1、2打成平手,你的输出应形如:\n选择:C\n原因:blahblah blahblah\n\n
...@@ -111,7 +111,7 @@ for _name, _prompt in sub_map.items(): ...@@ -111,7 +111,7 @@ for _name, _prompt in sub_map.items():
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -141,12 +141,12 @@ for _name, _prompt in sub_map.items(): ...@@ -141,12 +141,12 @@ for _name, _prompt in sub_map.items():
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=CompassArenaDataset, type=CompassArenaDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -10,9 +10,9 @@ subjective_reader_cfg = dict( ...@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"creationbench", 'creationbench',
] ]
data_path ="data/subjective/" data_path ='data/subjective/'
subjective_datasets = [] subjective_datasets = []
...@@ -23,7 +23,7 @@ for _name in subjective_all_sets: ...@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -39,17 +39,17 @@ for _name in subjective_all_sets: ...@@ -39,17 +39,17 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{gpt4_prefix}{prediction}{gpt4_suffix}" prompt = '{gpt4_prefix}{prediction}{gpt4_suffix}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=CreationBenchDataset, type=CreationBenchDataset,
multi_dimension=True, multi_dimension=True,
path=data_path, path=data_path,
......
...@@ -10,9 +10,9 @@ subjective_reader_cfg = dict( ...@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"creationv2_zh", 'creationv2_zh',
] ]
data_path ="data/subjective/" data_path ='data/subjective/'
subjective_datasets = [] subjective_datasets = []
...@@ -23,7 +23,7 @@ for _name in subjective_all_sets: ...@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -39,17 +39,17 @@ for _name in subjective_all_sets: ...@@ -39,17 +39,17 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{score_with_ref_prefix}{prediction}{score_with_ref_suffix}" prompt = '{score_with_ref_prefix}{prediction}{score_with_ref_suffix}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=CreationBenchDataset, type=CreationBenchDataset,
multi_dimension=True, multi_dimension=True,
path=data_path, path=data_path,
......
...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict( ...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"FunctionalMT", 'FunctionalMT',
] ]
data_path ="data/subjective/" data_path ='data/subjective/'
subjective_datasets = [] subjective_datasets = []
...@@ -36,17 +36,17 @@ for _name in subjective_all_sets: ...@@ -36,17 +36,17 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{gpt4_prefix}{prediction}{gpt4_suffix}" prompt = '{gpt4_prefix}{prediction}{gpt4_suffix}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=MultiroundDataset, type=MultiroundDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict( ...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"mtbench", 'mtbench',
] ]
data_path ="data/subjective/" data_path ='data/subjective/'
subjective_datasets = [] subjective_datasets = []
...@@ -38,22 +38,22 @@ for _name in subjective_all_sets: ...@@ -38,22 +38,22 @@ for _name in subjective_all_sets:
dict( dict(
role='SYSTEM', role='SYSTEM',
fallback_role='HUMAN', fallback_role='HUMAN',
prompt="{system_prompt}") prompt='{system_prompt}')
], ],
round=[ round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{prompt_template}" prompt = '{prompt_template}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=MTBenchDataset, type=MTBenchDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict( ...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"mtbench", 'mtbench',
] ]
data_path ="data/subjective/" data_path ='data/subjective/'
subjective_datasets = [] subjective_datasets = []
...@@ -37,22 +37,22 @@ for _name in subjective_all_sets: ...@@ -37,22 +37,22 @@ for _name in subjective_all_sets:
dict( dict(
role='SYSTEM', role='SYSTEM',
fallback_role='HUMAN', fallback_role='HUMAN',
prompt="{system_prompt}") prompt='{system_prompt}')
], ],
round=[ round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{prompt_template}" prompt = '{prompt_template}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=MTBenchDataset, type=MTBenchDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict( ...@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"mtbench_0.0","mtbench_0.1","mtbench_0.7" 'mtbench_0.0','mtbench_0.1','mtbench_0.7'
] ]
data_path ="data/subjective/mtbench" data_path ='data/subjective/mtbench'
subjective_datasets = [] subjective_datasets = []
...@@ -39,22 +39,22 @@ for _name in subjective_all_sets: ...@@ -39,22 +39,22 @@ for _name in subjective_all_sets:
dict( dict(
role='SYSTEM', role='SYSTEM',
fallback_role='HUMAN', fallback_role='HUMAN',
prompt="{system_prompt}") prompt='{system_prompt}')
], ],
round=[ round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{prompt_template}" prompt = '{prompt_template}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=MTBenchDataset, type=MTBenchDataset,
path=data_path, path=data_path,
name=_name, name=_name,
......
...@@ -10,7 +10,7 @@ subjective_reader_cfg = dict( ...@@ -10,7 +10,7 @@ subjective_reader_cfg = dict(
train_split='test') train_split='test')
subjective_all_sets = [ subjective_all_sets = [
"creation_v0.1", 'creation_v0.1',
] ]
subjective_datasets = [] subjective_datasets = []
...@@ -22,7 +22,7 @@ for _name in subjective_all_sets: ...@@ -22,7 +22,7 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -39,21 +39,21 @@ for _name in subjective_all_sets: ...@@ -39,21 +39,21 @@ for _name in subjective_all_sets:
template=dict( template=dict(
begin=[ begin=[
dict( dict(
role="SYSTEM", role='SYSTEM',
fallback_role="HUMAN", fallback_role='HUMAN',
prompt="{prompt}" prompt='{prompt}'
), ),
], ],
round=[dict(role="HUMAN", round=[dict(role='HUMAN',
prompt="回答 1: <回答 1 开始> {prediction} <回答 1 结束>\n回答 2: <回答 2 开始> {prediction2} <回答 2 结束>\n")]))), prompt='回答 1: <回答 1 开始> {prediction} <回答 1 结束>\n回答 2: <回答 2 开始> {prediction2} <回答 2 结束>\n')]))),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=SubjectiveCmpDataset, type=SubjectiveCmpDataset,
path="./data/subjective/", path='./data/subjective/',
name=_name, name=_name,
reader_cfg=subjective_reader_cfg, reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg, infer_cfg=subjective_infer_cfg,
......
...@@ -12,7 +12,7 @@ subjective_reader_cfg = dict( ...@@ -12,7 +12,7 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"COREV2_6A_all", 'COREV2_6A_all',
] ]
...@@ -25,7 +25,7 @@ for _name in subjective_all_sets: ...@@ -25,7 +25,7 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -42,19 +42,19 @@ for _name in subjective_all_sets: ...@@ -42,19 +42,19 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{prefix}问题: <问题开始> {question} <问题结束>\n\n回答 1: <回答 1 开始> {prediction} <回答 1 结束>\n\n回答 2: <回答 2 开始> {prediction2} <回答 2 结束>\n\n{suffix}" prompt = '{prefix}问题: <问题开始> {question} <问题结束>\n\n回答 1: <回答 1 开始> {prediction} <回答 1 结束>\n\n回答 2: <回答 2 开始> {prediction2} <回答 2 结束>\n\n{suffix}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=Corev2Dataset, type=Corev2Dataset,
path="./data/subjective/", path='./data/subjective/',
name=_name, name=_name,
reader_cfg=subjective_reader_cfg, reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg, infer_cfg=subjective_infer_cfg,
......
...@@ -11,7 +11,7 @@ subjective_reader_cfg = dict( ...@@ -11,7 +11,7 @@ subjective_reader_cfg = dict(
) )
subjective_all_sets = [ subjective_all_sets = [
"creation_v0.1", 'creation_v0.1',
] ]
subjective_datasets = [] subjective_datasets = []
...@@ -23,7 +23,7 @@ for _name in subjective_all_sets: ...@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt="{question}" prompt='{question}'
), ),
]), ]),
), ),
...@@ -39,19 +39,19 @@ for _name in subjective_all_sets: ...@@ -39,19 +39,19 @@ for _name in subjective_all_sets:
template=dict(round=[ template=dict(round=[
dict( dict(
role='HUMAN', role='HUMAN',
prompt = "{prefix}问题: <问题开始> {question} <问题结束>\n\n回答: <回答开始> {prediction} <回答结束>\n\n{suffix}" prompt = '{prefix}问题: <问题开始> {question} <问题结束>\n\n回答: <回答开始> {prediction} <回答结束>\n\n{suffix}'
), ),
]), ]),
), ),
), ),
pred_role="BOT", pred_role='BOT',
) )
subjective_datasets.append( subjective_datasets.append(
dict( dict(
abbr=f"{_name}", abbr=f'{_name}',
type=Creationv01Dataset, type=Creationv01Dataset,
path="./data/subjective/", path='./data/subjective/',
name=_name, name=_name,
reader_cfg=subjective_reader_cfg, reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg, infer_cfg=subjective_infer_cfg,
......
...@@ -13,7 +13,7 @@ summedits_infer_cfg = dict( ...@@ -13,7 +13,7 @@ summedits_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"""Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document. """Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document.
...@@ -36,7 +36,7 @@ Answer:""" ...@@ -36,7 +36,7 @@ Answer:"""
summedits_eval_cfg = dict( summedits_eval_cfg = dict(
evaluator=dict(type=AccEvaluator), evaluator=dict(type=AccEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=first_capital_postprocess), pred_postprocessor=dict(type=first_capital_postprocess),
) )
......
...@@ -13,7 +13,7 @@ summedits_infer_cfg = dict( ...@@ -13,7 +13,7 @@ summedits_infer_cfg = dict(
type=PromptTemplate, type=PromptTemplate,
template=dict(round=[ template=dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
'Document:\n{doc}Summary:\n{summary}\nQuestion:\nIs the summary factually consistent with the document?\nA. Yes\nB. No\nAnswer:' 'Document:\n{doc}Summary:\n{summary}\nQuestion:\nIs the summary factually consistent with the document?\nA. Yes\nB. No\nAnswer:'
), ),
...@@ -23,7 +23,7 @@ summedits_infer_cfg = dict( ...@@ -23,7 +23,7 @@ summedits_infer_cfg = dict(
summedits_eval_cfg = dict( summedits_eval_cfg = dict(
evaluator=dict(type=AccEvaluator), evaluator=dict(type=AccEvaluator),
pred_role="BOT", pred_role='BOT',
pred_postprocessor=dict(type=first_capital_postprocess), pred_postprocessor=dict(type=first_capital_postprocess),
) )
......
...@@ -16,20 +16,20 @@ summedits_infer_cfg = dict( ...@@ -16,20 +16,20 @@ summedits_infer_cfg = dict(
0: 0:
dict(round=[ dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"""\nDocument:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """ """\nDocument:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
), ),
dict(role="BOT", prompt="No") dict(role='BOT', prompt='No')
]), ]),
1: 1:
dict(round=[ dict(round=[
dict( dict(
role="HUMAN", role='HUMAN',
prompt= prompt=
"""Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """ """Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? """
), ),
dict(role="BOT", prompt="Yes") dict(role='BOT', prompt='Yes')
]), ]),
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
......
...@@ -10,7 +10,7 @@ summedits_reader_cfg = dict( ...@@ -10,7 +10,7 @@ summedits_reader_cfg = dict(
test_split='train') test_split='train')
summedits_prompt1 = "Given the document below, you have to determine if 'Yes' or 'No', the summary is factually consistent with the document." summedits_prompt1 = "Given the document below, you have to determine if 'Yes' or 'No', the summary is factually consistent with the document."
summedits_prompt2 = "Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? " summedits_prompt2 = 'Document:\n{doc}\nSummary:\n{summary}\nIs the summary factually consistent with the document? '
summedits_infer_cfg = dict( summedits_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
...@@ -24,8 +24,8 @@ summedits_infer_cfg = dict( ...@@ -24,8 +24,8 @@ summedits_infer_cfg = dict(
prompt=summedits_prompt1) prompt=summedits_prompt1)
], ],
round=[ round=[
dict(role="HUMAN", prompt=summedits_prompt2), dict(role='HUMAN', prompt=summedits_prompt2),
dict(role="BOT", prompt="No") dict(role='BOT', prompt='No')
]), ]),
1: 1:
dict( dict(
...@@ -36,8 +36,8 @@ summedits_infer_cfg = dict( ...@@ -36,8 +36,8 @@ summedits_infer_cfg = dict(
prompt=summedits_prompt1) prompt=summedits_prompt1)
], ],
round=[ round=[
dict(role="HUMAN", prompt=summedits_prompt2), dict(role='HUMAN', prompt=summedits_prompt2),
dict(role="BOT", prompt="Yes") dict(role='BOT', prompt='Yes')
]), ]),
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
......
...@@ -21,8 +21,8 @@ summedits_infer_cfg = dict( ...@@ -21,8 +21,8 @@ summedits_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template={ template={
0: f"{summedits_prompt}Answer: No.", 0: f'{summedits_prompt}Answer: No.',
1: f"{summedits_prompt}Answer: Yes." 1: f'{summedits_prompt}Answer: Yes.'
}), }),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer)) inferencer=dict(type=PPLInferencer))
......
...@@ -18,7 +18,7 @@ summscreen_infer_cfg = dict( ...@@ -18,7 +18,7 @@ summscreen_infer_cfg = dict(
begin=[ begin=[
dict( dict(
role='SYSTEM', role='SYSTEM',
fallback_role="HUMAN", fallback_role='HUMAN',
prompt= prompt=
'Please summarize the following English play script in English:' 'Please summarize the following English play script in English:'
), ),
......
...@@ -15,7 +15,7 @@ summscreen_infer_cfg = dict( ...@@ -15,7 +15,7 @@ summscreen_infer_cfg = dict(
prompt_template=dict( prompt_template=dict(
type=PromptTemplate, type=PromptTemplate,
template= template=
"Please summarize the following English report in English:{content}\n{summary}."), 'Please summarize the following English report in English:{content}\n{summary}.'),
retriever=dict(type=ZeroRetriever), retriever=dict(type=ZeroRetriever),
inferencer=dict( inferencer=dict(
type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192)) type=GenInferencer, batch_size=4, max_out_len=500, max_seq_len=8192))
......
...@@ -32,9 +32,9 @@ taco_skills = load_dataset('BAAI/TACO', skills=['Sorting', 'Range queries'], tok ...@@ -32,9 +32,9 @@ taco_skills = load_dataset('BAAI/TACO', skills=['Sorting', 'Range queries'], tok
``` ```
## Evaluation results ## Evaluation results
| dataset | metric | CodeLlama-7b-Python | internlm2-chat-1.8b-sft-hf | internlm2-chat-7b-sft-hf | internlm2-chat-20b-sft-hf | | dataset | metric | CodeLlama-7b-Python | internlm2-chat-1.8b-sft-hf | internlm2-chat-7b-sft-hf | internlm2-chat-20b-sft-hf |
|-----------------------|----------|-------------|-------------|-------------|-------------| |-----------------------|----------|-------------|-------------|-------------|-------------|
| TACO | pass@1 | 0.7 | 0.7 | 1.7 | 2.7 | | TACO | pass@1 | 0.7 | 0.7 | 1.7 | 2.7 |
Please refer to [repo](https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov-file) for original results if needed. Please refer to [repo](https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov-file) for original results if needed.
...@@ -47,4 +47,4 @@ Please refer to [repo](https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov- ...@@ -47,4 +47,4 @@ Please refer to [repo](https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov-
journal={arXiv preprint arXiv:2312.14852}, journal={arXiv preprint arXiv:2312.14852},
year={2023} year={2023}
} }
``` ```
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment