Unverified Commit aa2dd2b5 authored by Fengzhe Zhou's avatar Fengzhe Zhou Committed by GitHub
Browse files

[Format] Add config lints (#892)

parent 3dbba119
......@@ -11,7 +11,7 @@ s3eval_cfg = dict(evaluator=dict(type=S3EvalEvaluator))
s3eval_datasets = [
dict(
type=S3EvalDataset,
abbr="s3eval",
abbr='s3eval',
path='FangyuLei/s3eval',
eval_cfg=s3eval_cfg)
]
......@@ -8,24 +8,24 @@ from opencompass.datasets import ScibenchDataset, scibench_postprocess
scibench_reader_cfg = dict(input_columns=['question'], output_column='answer')
scibench_subsets = [
"atkins",
"calculus",
"chemmc",
"class",
"diff",
"fund",
"matter",
"quan",
"stat",
"thermo"
'atkins',
'calculus',
'chemmc',
'class',
'diff',
'fund',
'matter',
'quan',
'stat',
'thermo'
]
scibench_datasets = []
for prompt_type in ["zs", "zs-cot", "fs", "fs-cot"]:
for prompt_type in ['zs', 'zs-cot', 'fs', 'fs-cot']:
for _name in scibench_subsets:
if prompt_type == "fs":
if prompt_type == 'fs':
prompt_path = os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{_name}_prompt.txt')
elif prompt_type == "fs-cot":
elif prompt_type == 'fs-cot':
prompt_path = os.path.join(os.path.dirname(__file__), 'lib_prompt', f'{_name}_sol.txt')
else:
prompt_path = None
......@@ -33,20 +33,20 @@ for prompt_type in ["zs", "zs-cot", "fs", "fs-cot"]:
with open(prompt_path, 'r') as f:
_hint = f.read()
else:
_hint = ""
_hint = ''
human_prompt = {
'zs': "Please provide a clear and step-by-step solution for a scientific problem in the categories of Chemistry, Physics, or Mathematics. The problem will specify the unit of measurement, which should not be included in the answer. Express the final answer as a decimal number with three digits after the decimal point. Conclude the answer by stating 'Therefore, the answer is \\boxed[ANSWER].'\n\nProblem: {question}\nAnswer:",
'zs-cot': "Please provide a clear and step-by-step solution for a scientific problem in the categories of Chemistry, Physics, or Mathematics. The problem will specify the unit of measurement, which should not be included in the answer. Express the final answer as a decimal number with three digits after the decimal point. Conclude the answer by stating 'Therefore, the answer is \\boxed[ANSWER].'\n\nProblem: {question}\nAnswer:Let’s think step by step.",
'fs': f"{_hint}\n\nProblem 6: {{question}}\nAnswer: ",
'fs-cot': f"{_hint}\n\nProblem 6: {{question}}\nExplanation for Problem 6: ",
'fs': f'{_hint}\n\nProblem 6: {{question}}\nAnswer: ',
'fs-cot': f'{_hint}\n\nProblem 6: {{question}}\nExplanation for Problem 6: ',
}[prompt_type]
scibench_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(role="HUMAN", prompt=human_prompt)
dict(role='HUMAN', prompt=human_prompt)
])
),
retriever=dict(type=ZeroRetriever),
......@@ -61,9 +61,9 @@ for prompt_type in ["zs", "zs-cot", "fs", "fs-cot"]:
scibench_datasets.append(
dict(
type=ScibenchDataset,
path="./data/scibench",
path='./data/scibench',
name=_name,
abbr= f"scibench-{_name}" if prompt_type == 'zs' else f"scibench-{_name}_{prompt_type}",
abbr= f'scibench-{_name}' if prompt_type == 'zs' else f'scibench-{_name}_{prompt_type}',
reader_cfg=scibench_reader_cfg,
infer_cfg=scibench_infer_cfg.copy(),
eval_cfg=scibench_eval_cfg.copy()
......
......@@ -6,9 +6,9 @@ from opencompass.utils.text_postprocessors import first_option_postprocess
from opencompass.datasets import siqaDataset_V3
siqa_reader_cfg = dict(
input_columns=["context", "question", "A", "B", "C"],
output_column="answer",
test_split="validation")
input_columns=['context', 'question', 'A', 'B', 'C'],
output_column='answer',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
......@@ -16,9 +16,9 @@ siqa_infer_cfg = dict(
template=dict(
round=[
dict(
role="HUMAN",
role='HUMAN',
prompt=
"{context}\nQuestion: {question}\nA. {A}\nB. {B}\nC. {C}\nAnswer:"
'{context}\nQuestion: {question}\nA. {A}\nB. {B}\nC. {C}\nAnswer:'
)
], ),
),
......@@ -33,7 +33,7 @@ siqa_eval_cfg = dict(
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset_V3,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -5,9 +5,9 @@ from opencompass.openicl.icl_evaluator import EDAccEvaluator
from opencompass.datasets import siqaDataset_V2
siqa_reader_cfg = dict(
input_columns=["context", "question", "answerA", "answerB", "answerC"],
output_column="all_labels",
test_split="validation")
input_columns=['context', 'question', 'answerA', 'answerB', 'answerC'],
output_column='all_labels',
test_split='validation')
siqa_infer_cfg = dict(
prompt_template=dict(
......@@ -15,9 +15,9 @@ siqa_infer_cfg = dict(
template=dict(
round=[
dict(
role="HUMAN",
role='HUMAN',
prompt=
"{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:"
'{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}\nAnswer:'
)
], ),
),
......@@ -27,12 +27,12 @@ siqa_infer_cfg = dict(
siqa_eval_cfg = dict(
evaluator=dict(type=EDAccEvaluator),
pred_role="BOT",
pred_role='BOT',
)
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset_V2,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -24,7 +24,7 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -24,7 +24,7 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -15,18 +15,18 @@ siqa_infer_cfg = dict(
template={
1:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerA}")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt='{answerA}')
]),
2:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerB}")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt='{answerB}')
]),
3:
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nAnswer:"),
dict(role='BOT', prompt="{answerC}")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nAnswer:'),
dict(role='BOT', prompt='{answerC}')
]),
}),
retriever=dict(type=ZeroRetriever),
......@@ -36,7 +36,7 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -13,20 +13,20 @@ siqa_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
"1":
'1':
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}"),
dict(role='BOT', prompt="Answer: A")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}'),
dict(role='BOT', prompt='Answer: A')
]),
"2":
'2':
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}"),
dict(role='BOT', prompt="Answer: B")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}'),
dict(role='BOT', prompt='Answer: B')
]),
"3":
'3':
dict(round=[
dict(role='HUMAN', prompt="{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}"),
dict(role='BOT', prompt="Answer: C")
dict(role='HUMAN', prompt='{context}\nQuestion: {question}\nA. {answerA}\nB. {answerB}\nC. {answerC}'),
dict(role='BOT', prompt='Answer: C')
]),
}),
retriever=dict(type=ZeroRetriever),
......@@ -36,7 +36,7 @@ siqa_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
siqa_datasets = [
dict(
abbr="siqa",
abbr='siqa',
type=siqaDataset,
path='./data/siqa',
reader_cfg=siqa_reader_cfg,
......
......@@ -6,8 +6,8 @@ from opencompass.datasets import storyclozeDataset_V2
from opencompass.utils.text_postprocessors import first_option_postprocess
storycloze_reader_cfg = dict(
input_columns=["context", "sentence_quiz1", "sentence_quiz2"],
output_column="answer_right_ending",
input_columns=['context', 'sentence_quiz1', 'sentence_quiz2'],
output_column='answer_right_ending',
)
storycloze_infer_cfg = dict(
......@@ -15,7 +15,7 @@ storycloze_infer_cfg = dict(
type=PromptTemplate,
template=dict(round=[
dict(
role="HUMAN",
role='HUMAN',
prompt=
"{context}\nQuestion: Which ending makes the most sense?\nA. {sentence_quiz1}\nB. {sentence_quiz2}\nYou may choose between 'A' and 'B'.\nAnswer:",
),
......@@ -27,7 +27,7 @@ storycloze_infer_cfg = dict(
storycloze_eval_cfg = dict(
evaluator=dict(type=AccEvaluator),
pred_role="BOT",
pred_role='BOT',
pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
)
......@@ -35,7 +35,7 @@ storycloze_eval_cfg = dict(
# Using multilingual version of this dataset.
storycloze_datasets = [
dict(
abbr="story_cloze",
abbr='story_cloze',
type=storyclozeDataset_V2,
path='./data/xstory_cloze',
lang='en',
......
......@@ -15,8 +15,8 @@ storycloze_infer_cfg = dict(
type=PromptTemplate,
template={
i: dict(round=[
dict(role="HUMAN", prompt="{context}"),
dict(role="BOT", prompt=f"{{sentence_quiz{i}}}"),
dict(role='HUMAN', prompt='{context}'),
dict(role='BOT', prompt=f'{{sentence_quiz{i}}}'),
])
for i in range(1, 3)
}),
......
......@@ -14,8 +14,8 @@ storycloze_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template={
1: "{context}{sentence_quiz1}",
2: "{context}{sentence_quiz2}",
1: '{context}{sentence_quiz1}',
2: '{context}{sentence_quiz2}',
}),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=PPLInferencer))
......
......@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"alignment_bench",
'alignment_bench',
]
data_path ="data/subjective/alignment_bench"
data_path ='data/subjective/alignment_bench'
subjective_datasets = []
......@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -56,12 +56,12 @@ for _name in subjective_all_sets:
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=AlignmentBenchDataset,
path=data_path,
name=_name,
......
......@@ -10,11 +10,11 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"alignment_bench",
'alignment_bench',
]
data_path ="data/subjective/alignment_bench"
data_path ='data/subjective/alignment_bench'
alignment_bench_config_path = "data/subjective/alignment_bench/config"
alignment_bench_config_path = 'data/subjective/alignment_bench/config'
alignment_bench_config_name = 'multi-dimension'
subjective_datasets = []
......@@ -26,7 +26,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -42,17 +42,17 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt = "{critiquellm_prefix}[助手的答案开始]\n{prediction}\n[助手的答案结束]\n"
prompt = '{critiquellm_prefix}[助手的答案开始]\n{prediction}\n[助手的答案结束]\n'
),
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=AlignmentBenchDataset,
path=data_path,
name=_name,
......
......@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"alignment_bench",
'alignment_bench',
]
data_path ="data/subjective/alignment_bench"
data_path ='data/subjective/alignment_bench'
subjective_datasets = []
......@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -44,12 +44,12 @@ for _name in subjective_all_sets:
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=AlignmentBenchDataset,
path=data_path,
name=_name,
......
......@@ -11,7 +11,7 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"alpaca_eval",
'alpaca_eval',
]
......@@ -54,7 +54,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -72,7 +72,7 @@ for _name in subjective_all_sets:
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt="You are a helpful assistant, that ranks models by the quality of their answers.")
prompt='You are a helpful assistant, that ranks models by the quality of their answers.')
],
round=[
dict(
......@@ -82,14 +82,14 @@ for _name in subjective_all_sets:
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=SubjectiveCmpDataset,
path="./data/subjective/alpaca_eval",
path='./data/subjective/alpaca_eval',
name=_name,
reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg,
......
......@@ -11,7 +11,7 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"alpaca_eval",
'alpaca_eval',
]
......@@ -56,7 +56,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -74,7 +74,7 @@ for _name in subjective_all_sets:
dict(
role='SYSTEM',
fallback_role='HUMAN',
prompt="You are a highly efficient assistant, who evaluates and selects the best large language model (LLMs) based on the quality of their responses to a given instruction. This process will be used to create a leaderboard reflecting the most accurate and human-preferred answers.")
prompt='You are a highly efficient assistant, who evaluates and selects the best large language model (LLMs) based on the quality of their responses to a given instruction. This process will be used to create a leaderboard reflecting the most accurate and human-preferred answers.')
],
round=[
dict(
......@@ -84,14 +84,14 @@ for _name in subjective_all_sets:
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=SubjectiveCmpDataset,
path="./data/subjective/alpaca_eval",
path='./data/subjective/alpaca_eval',
name=_name,
reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg,
......
......@@ -11,7 +11,7 @@ subjective_reader_cfg = dict(
)
subjective_all_sets = [
"question",
'question',
]
......@@ -29,7 +29,7 @@ for _name in subjective_all_sets:
template=dict(round=[
dict(
role='HUMAN',
prompt="{question}"
prompt='{question}'
),
]),
),
......@@ -57,14 +57,14 @@ for _name in subjective_all_sets:
]),
),
),
pred_role="BOT",
pred_role='BOT',
)
subjective_datasets.append(
dict(
abbr=f"{_name}",
abbr=f'{_name}',
type=ArenaHardDataset,
path="./data/subjective/arena_hard",
path='./data/subjective/arena_hard',
name=_name,
reader_cfg=subjective_reader_cfg,
infer_cfg=subjective_infer_cfg,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment