Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
aa2dd2b5
Unverified
Commit
aa2dd2b5
authored
May 14, 2024
by
Fengzhe Zhou
Committed by
GitHub
May 14, 2024
Browse files
[Format] Add config lints (#892)
parent
3dbba119
Changes
648
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
93 additions
and
93 deletions
+93
-93
configs/datasets/subjective/compassarena/compassarena_compare.py
.../datasets/subjective/compassarena/compassarena_compare.py
+5
-5
configs/datasets/subjective/compassarena/compassarena_compare_creationv3.py
...ubjective/compassarena/compassarena_compare_creationv3.py
+5
-5
configs/datasets/subjective/compassarena/compassarena_compare_moe.py
...asets/subjective/compassarena/compassarena_compare_moe.py
+6
-6
configs/datasets/subjective/creationbench/creationbench_judgeby_gpt4.py
...ts/subjective/creationbench/creationbench_judgeby_gpt4.py
+6
-6
configs/datasets/subjective/creationbench/creationbench_judgeby_gpt4_withref.py
...ctive/creationbench/creationbench_judgeby_gpt4_withref.py
+6
-6
configs/datasets/subjective/multiround/functionalmt_zh_judgeby_gpt4.py
...ets/subjective/multiround/functionalmt_zh_judgeby_gpt4.py
+5
-5
configs/datasets/subjective/multiround/mtbench_pair_judge.py
configs/datasets/subjective/multiround/mtbench_pair_judge.py
+6
-6
configs/datasets/subjective/multiround/mtbench_single_judge.py
...gs/datasets/subjective/multiround/mtbench_single_judge.py
+6
-6
configs/datasets/subjective/multiround/mtbench_single_judge_diff_temp.py
...s/subjective/multiround/mtbench_single_judge_diff_temp.py
+6
-6
configs/datasets/subjective/subjective_cmp/subjective_cmp.py
configs/datasets/subjective/subjective_cmp/subjective_cmp.py
+10
-10
configs/datasets/subjective/subjective_cmp/subjective_corev2.py
...s/datasets/subjective/subjective_cmp/subjective_corev2.py
+6
-6
configs/datasets/subjective/subjective_cmp/subjective_creation.py
...datasets/subjective/subjective_cmp/subjective_creation.py
+6
-6
configs/datasets/summedits/summedits_gen_315438.py
configs/datasets/summedits/summedits_gen_315438.py
+2
-2
configs/datasets/summedits/summedits_gen_4fb38b.py
configs/datasets/summedits/summedits_gen_4fb38b.py
+2
-2
configs/datasets/summedits/summedits_ppl_1fbeb6.py
configs/datasets/summedits/summedits_ppl_1fbeb6.py
+4
-4
configs/datasets/summedits/summedits_ppl_3c30d0.py
configs/datasets/summedits/summedits_ppl_3c30d0.py
+5
-5
configs/datasets/summedits/summedits_ppl_fa58ba.py
configs/datasets/summedits/summedits_ppl_fa58ba.py
+2
-2
configs/datasets/summscreen/summscreen_gen_653185.py
configs/datasets/summscreen/summscreen_gen_653185.py
+1
-1
configs/datasets/summscreen/summscreen_gen_aa5eb3.py
configs/datasets/summscreen/summscreen_gen_aa5eb3.py
+1
-1
configs/datasets/taco/README.md
configs/datasets/taco/README.md
+3
-3
No files found.
configs/datasets/subjective/compassarena/compassarena_compare.py
View file @
aa2dd2b5
...
...
@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column
=
'judge'
,
)
data_path
=
"
data/subjective/compass_arena
"
data_path
=
'
data/subjective/compass_arena
'
subjective_datasets
=
[]
...
...
@@ -99,7 +99,7 @@ creation_prompt = """
{question}
"""
+
base_prompt
sub_map
=
{
"
language
"
:
language_prompt
,
"
knowledge
"
:
knowledge_prompt
,
"
reason_v2
"
:
reason_prompt
,
"
math_v2
"
:
math_prompt
,
"
creationv2_zh
"
:
creation_prompt
}
sub_map
=
{
'
language
'
:
language_prompt
,
'
knowledge
'
:
knowledge_prompt
,
'
reason_v2
'
:
reason_prompt
,
'
math_v2
'
:
math_prompt
,
'
creationv2_zh
'
:
creation_prompt
}
for
_name
,
_prompt
in
sub_map
.
items
():
subjective_infer_cfg
=
dict
(
...
...
@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items():
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -129,12 +129,12 @@ for _name, _prompt in sub_map.items():
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
CompassArenaDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/compassarena/compassarena_compare_creationv3.py
View file @
aa2dd2b5
...
...
@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column
=
'judge'
,
)
data_path
=
"
data/subjective/compass_arena
"
data_path
=
'
data/subjective/compass_arena
'
subjective_datasets
=
[]
...
...
@@ -99,7 +99,7 @@ creation_prompt = """
{question}
"""
+
base_prompt
sub_map
=
{
"
creationv3
"
:
creation_prompt
}
sub_map
=
{
'
creationv3
'
:
creation_prompt
}
for
_name
,
_prompt
in
sub_map
.
items
():
subjective_infer_cfg
=
dict
(
...
...
@@ -108,7 +108,7 @@ for _name, _prompt in sub_map.items():
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -130,12 +130,12 @@ for _name, _prompt in sub_map.items():
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
CompassArenaDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/compassarena/compassarena_compare_moe.py
View file @
aa2dd2b5
...
...
@@ -9,7 +9,7 @@ subjective_reader_cfg = dict(
output_column
=
'judge'
,
)
data_path
=
"
data/subjective/compass_arena
"
data_path
=
'
data/subjective/compass_arena
'
subjective_datasets
=
[]
...
...
@@ -91,7 +91,7 @@ reason_prompt = math_prompt
creation_prompt
=
"""
请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。
评分要求(重要性依次递减):
1. 好的回答必须首先符合用户问题里的各种需求,不能跑题
1. 好的回答必须首先符合用户问题里的各种需求,不能跑题
2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答
3. 好的回答必须具有创造性的词语和表达丰富度
...
...
@@ -99,7 +99,7 @@ creation_prompt = """
{question}
"""
+
base_prompt
sub_map
=
{
"
knowledge
"
:
knowledge_prompt
,
"
language
"
:
language_prompt
,
"
math_v2
"
:
math_prompt
,
"
reason_v2
"
:
reason_prompt
,
"
creationv2_zh
"
:
creation_prompt
}
sub_map
=
{
'
knowledge
'
:
knowledge_prompt
,
'
language
'
:
language_prompt
,
'
math_v2
'
:
math_prompt
,
'
reason_v2
'
:
reason_prompt
,
'
creationv2_zh
'
:
creation_prompt
}
meta_prompt
=
"""
\n
你是一个评判专家,请根据提供的 评分要求,用户问题 以及 相应的两个回答(回答1,回答2),判断两个回答中哪一个更好。
\n
评分要求(重要性依次递减):
\n
1. 好的回答必须首先符合用户问题里的各种需求,不能跑题
\n
2. 好的回答必须具有逻辑连贯性,围绕一个中心进行回答
\n
3. 好的回答必须具有创造性的词语和表达丰富度
\n\n
[用户问题]
\n
{question}
\n
[回答1开始]
\n
{prediction}
\n
[回答1结束]
\n
[回答2开始]
\n
{prediction2}
\n
[回答2结束]
\n
此外,还有两个其他评判专家的评判意见供你参考。
\n
[评判意见1]
\n
{judgement}
\n
[评判意见2]
\n
{judgement2}
\n\n
最终请你综合其他评判专家的评判意见与你自己的意见,在以下 3 个选项中做出选择:
\n
A. 回答1更好
\n
B. 回答2更好
\n
C. 回答1、2平局
\n
并提供你的解释原因。
\n\n
如果你认为回答1更好,你的输出应形如:
\n
选择:A
\n
原因:blahblah blahblah
\n\n\n
如果你认为回答2更好,你的输出应形如:
\n
选择:B
\n
原因:blahblah blahblah
\n\n\n
如果你认为回答1、2打成平手,你的输出应形如:
\n
选择:C
\n
原因:blahblah blahblah
\n\n
...
...
@@ -111,7 +111,7 @@ for _name, _prompt in sub_map.items():
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -141,12 +141,12 @@ for _name, _prompt in sub_map.items():
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
CompassArenaDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/creationbench/creationbench_judgeby_gpt4.py
View file @
aa2dd2b5
...
...
@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
creationbench
"
,
'
creationbench
'
,
]
data_path
=
"
data/subjective/
"
data_path
=
'
data/subjective/
'
subjective_datasets
=
[]
...
...
@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -39,17 +39,17 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{gpt4_prefix}{prediction}{gpt4_suffix}
"
prompt
=
'
{gpt4_prefix}{prediction}{gpt4_suffix}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
CreationBenchDataset
,
multi_dimension
=
True
,
path
=
data_path
,
...
...
configs/datasets/subjective/creationbench/creationbench_judgeby_gpt4_withref.py
View file @
aa2dd2b5
...
...
@@ -10,9 +10,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
creationv2_zh
"
,
'
creationv2_zh
'
,
]
data_path
=
"
data/subjective/
"
data_path
=
'
data/subjective/
'
subjective_datasets
=
[]
...
...
@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -39,17 +39,17 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{score_with_ref_prefix}{prediction}{score_with_ref_suffix}
"
prompt
=
'
{score_with_ref_prefix}{prediction}{score_with_ref_suffix}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
CreationBenchDataset
,
multi_dimension
=
True
,
path
=
data_path
,
...
...
configs/datasets/subjective/multiround/functionalmt_zh_judgeby_gpt4.py
View file @
aa2dd2b5
...
...
@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
FunctionalMT
"
,
'
FunctionalMT
'
,
]
data_path
=
"
data/subjective/
"
data_path
=
'
data/subjective/
'
subjective_datasets
=
[]
...
...
@@ -36,17 +36,17 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{gpt4_prefix}{prediction}{gpt4_suffix}
"
prompt
=
'
{gpt4_prefix}{prediction}{gpt4_suffix}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
MultiroundDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/multiround/mtbench_pair_judge.py
View file @
aa2dd2b5
...
...
@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
mtbench
"
,
'
mtbench
'
,
]
data_path
=
"
data/subjective/
"
data_path
=
'
data/subjective/
'
subjective_datasets
=
[]
...
...
@@ -38,22 +38,22 @@ for _name in subjective_all_sets:
dict
(
role
=
'SYSTEM'
,
fallback_role
=
'HUMAN'
,
prompt
=
"
{system_prompt}
"
)
prompt
=
'
{system_prompt}
'
)
],
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{prompt_template}
"
prompt
=
'
{prompt_template}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
MTBenchDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/multiround/mtbench_single_judge.py
View file @
aa2dd2b5
...
...
@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
mtbench
"
,
'
mtbench
'
,
]
data_path
=
"
data/subjective/
"
data_path
=
'
data/subjective/
'
subjective_datasets
=
[]
...
...
@@ -37,22 +37,22 @@ for _name in subjective_all_sets:
dict
(
role
=
'SYSTEM'
,
fallback_role
=
'HUMAN'
,
prompt
=
"
{system_prompt}
"
)
prompt
=
'
{system_prompt}
'
)
],
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{prompt_template}
"
prompt
=
'
{prompt_template}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
MTBenchDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/multiround/mtbench_single_judge_diff_temp.py
View file @
aa2dd2b5
...
...
@@ -11,9 +11,9 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
mtbench_0.0
"
,
"
mtbench_0.1
"
,
"
mtbench_0.7
"
'
mtbench_0.0
'
,
'
mtbench_0.1
'
,
'
mtbench_0.7
'
]
data_path
=
"
data/subjective/mtbench
"
data_path
=
'
data/subjective/mtbench
'
subjective_datasets
=
[]
...
...
@@ -39,22 +39,22 @@ for _name in subjective_all_sets:
dict
(
role
=
'SYSTEM'
,
fallback_role
=
'HUMAN'
,
prompt
=
"
{system_prompt}
"
)
prompt
=
'
{system_prompt}
'
)
],
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{prompt_template}
"
prompt
=
'
{prompt_template}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
MTBenchDataset
,
path
=
data_path
,
name
=
_name
,
...
...
configs/datasets/subjective/subjective_cmp/subjective_cmp.py
View file @
aa2dd2b5
...
...
@@ -10,7 +10,7 @@ subjective_reader_cfg = dict(
train_split
=
'test'
)
subjective_all_sets
=
[
"
creation_v0.1
"
,
'
creation_v0.1
'
,
]
subjective_datasets
=
[]
...
...
@@ -22,7 +22,7 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -39,21 +39,21 @@ for _name in subjective_all_sets:
template
=
dict
(
begin
=
[
dict
(
role
=
"
SYSTEM
"
,
fallback_role
=
"
HUMAN
"
,
prompt
=
"
{prompt}
"
role
=
'
SYSTEM
'
,
fallback_role
=
'
HUMAN
'
,
prompt
=
'
{prompt}
'
),
],
round
=
[
dict
(
role
=
"
HUMAN
"
,
prompt
=
"
回答 1: <回答 1 开始> {prediction} <回答 1 结束>
\n
回答 2: <回答 2 开始> {prediction2} <回答 2 结束>
\n
"
)]))),
pred_role
=
"
BOT
"
,
round
=
[
dict
(
role
=
'
HUMAN
'
,
prompt
=
'
回答 1: <回答 1 开始> {prediction} <回答 1 结束>
\n
回答 2: <回答 2 开始> {prediction2} <回答 2 结束>
\n
'
)]))),
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
SubjectiveCmpDataset
,
path
=
"
./data/subjective/
"
,
path
=
'
./data/subjective/
'
,
name
=
_name
,
reader_cfg
=
subjective_reader_cfg
,
infer_cfg
=
subjective_infer_cfg
,
...
...
configs/datasets/subjective/subjective_cmp/subjective_corev2.py
View file @
aa2dd2b5
...
...
@@ -12,7 +12,7 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
COREV2_6A_all
"
,
'
COREV2_6A_all
'
,
]
...
...
@@ -25,7 +25,7 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -42,19 +42,19 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{prefix}问题: <问题开始> {question} <问题结束>
\n\n
回答 1: <回答 1 开始> {prediction} <回答 1 结束>
\n\n
回答 2: <回答 2 开始> {prediction2} <回答 2 结束>
\n\n
{suffix}
"
prompt
=
'
{prefix}问题: <问题开始> {question} <问题结束>
\n\n
回答 1: <回答 1 开始> {prediction} <回答 1 结束>
\n\n
回答 2: <回答 2 开始> {prediction2} <回答 2 结束>
\n\n
{suffix}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
Corev2Dataset
,
path
=
"
./data/subjective/
"
,
path
=
'
./data/subjective/
'
,
name
=
_name
,
reader_cfg
=
subjective_reader_cfg
,
infer_cfg
=
subjective_infer_cfg
,
...
...
configs/datasets/subjective/subjective_cmp/subjective_creation.py
View file @
aa2dd2b5
...
...
@@ -11,7 +11,7 @@ subjective_reader_cfg = dict(
)
subjective_all_sets
=
[
"
creation_v0.1
"
,
'
creation_v0.1
'
,
]
subjective_datasets
=
[]
...
...
@@ -23,7 +23,7 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{question}
"
prompt
=
'
{question}
'
),
]),
),
...
...
@@ -39,19 +39,19 @@ for _name in subjective_all_sets:
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
"
{prefix}问题: <问题开始> {question} <问题结束>
\n\n
回答: <回答开始> {prediction} <回答结束>
\n\n
{suffix}
"
prompt
=
'
{prefix}问题: <问题开始> {question} <问题结束>
\n\n
回答: <回答开始> {prediction} <回答结束>
\n\n
{suffix}
'
),
]),
),
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
)
subjective_datasets
.
append
(
dict
(
abbr
=
f
"
{
_name
}
"
,
abbr
=
f
'
{
_name
}
'
,
type
=
Creationv01Dataset
,
path
=
"
./data/subjective/
"
,
path
=
'
./data/subjective/
'
,
name
=
_name
,
reader_cfg
=
subjective_reader_cfg
,
infer_cfg
=
subjective_infer_cfg
,
...
...
configs/datasets/summedits/summedits_gen_315438.py
View file @
aa2dd2b5
...
...
@@ -13,7 +13,7 @@ summedits_infer_cfg = dict(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
"
HUMAN
"
,
role
=
'
HUMAN
'
,
prompt
=
"""Given the document below, you have to determine if "Yes" or "No", the summary is factually consistent with the document.
...
...
@@ -36,7 +36,7 @@ Answer:"""
summedits_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
pred_postprocessor
=
dict
(
type
=
first_capital_postprocess
),
)
...
...
configs/datasets/summedits/summedits_gen_4fb38b.py
View file @
aa2dd2b5
...
...
@@ -13,7 +13,7 @@ summedits_infer_cfg = dict(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
"
HUMAN
"
,
role
=
'
HUMAN
'
,
prompt
=
'Document:
\n
{doc}Summary:
\n
{summary}
\n
Question:
\n
Is the summary factually consistent with the document?
\n
A. Yes
\n
B. No
\n
Answer:'
),
...
...
@@ -23,7 +23,7 @@ summedits_infer_cfg = dict(
summedits_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
),
pred_role
=
"
BOT
"
,
pred_role
=
'
BOT
'
,
pred_postprocessor
=
dict
(
type
=
first_capital_postprocess
),
)
...
...
configs/datasets/summedits/summedits_ppl_1fbeb6.py
View file @
aa2dd2b5
...
...
@@ -16,20 +16,20 @@ summedits_infer_cfg = dict(
0
:
dict
(
round
=
[
dict
(
role
=
"
HUMAN
"
,
role
=
'
HUMAN
'
,
prompt
=
"""
\n
Document:
\n
{doc}
\n
Summary:
\n
{summary}
\n
Is the summary factually consistent with the document? """
),
dict
(
role
=
"
BOT
"
,
prompt
=
"
No
"
)
dict
(
role
=
'
BOT
'
,
prompt
=
'
No
'
)
]),
1
:
dict
(
round
=
[
dict
(
role
=
"
HUMAN
"
,
role
=
'
HUMAN
'
,
prompt
=
"""Document:
\n
{doc}
\n
Summary:
\n
{summary}
\n
Is the summary factually consistent with the document? """
),
dict
(
role
=
"
BOT
"
,
prompt
=
"
Yes
"
)
dict
(
role
=
'
BOT
'
,
prompt
=
'
Yes
'
)
]),
}),
retriever
=
dict
(
type
=
ZeroRetriever
),
...
...
configs/datasets/summedits/summedits_ppl_3c30d0.py
View file @
aa2dd2b5
...
...
@@ -10,7 +10,7 @@ summedits_reader_cfg = dict(
test_split
=
'train'
)
summedits_prompt1
=
"Given the document below, you have to determine if 'Yes' or 'No', the summary is factually consistent with the document."
summedits_prompt2
=
"
Document:
\n
{doc}
\n
Summary:
\n
{summary}
\n
Is the summary factually consistent with the document?
"
summedits_prompt2
=
'
Document:
\n
{doc}
\n
Summary:
\n
{summary}
\n
Is the summary factually consistent with the document?
'
summedits_infer_cfg
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
...
...
@@ -24,8 +24,8 @@ summedits_infer_cfg = dict(
prompt
=
summedits_prompt1
)
],
round
=
[
dict
(
role
=
"
HUMAN
"
,
prompt
=
summedits_prompt2
),
dict
(
role
=
"
BOT
"
,
prompt
=
"
No
"
)
dict
(
role
=
'
HUMAN
'
,
prompt
=
summedits_prompt2
),
dict
(
role
=
'
BOT
'
,
prompt
=
'
No
'
)
]),
1
:
dict
(
...
...
@@ -36,8 +36,8 @@ summedits_infer_cfg = dict(
prompt
=
summedits_prompt1
)
],
round
=
[
dict
(
role
=
"
HUMAN
"
,
prompt
=
summedits_prompt2
),
dict
(
role
=
"
BOT
"
,
prompt
=
"
Yes
"
)
dict
(
role
=
'
HUMAN
'
,
prompt
=
summedits_prompt2
),
dict
(
role
=
'
BOT
'
,
prompt
=
'
Yes
'
)
]),
}),
retriever
=
dict
(
type
=
ZeroRetriever
),
...
...
configs/datasets/summedits/summedits_ppl_fa58ba.py
View file @
aa2dd2b5
...
...
@@ -21,8 +21,8 @@ summedits_infer_cfg = dict(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
{
0
:
f
"
{
summedits_prompt
}
Answer: No.
"
,
1
:
f
"
{
summedits_prompt
}
Answer: Yes.
"
0
:
f
'
{
summedits_prompt
}
Answer: No.
'
,
1
:
f
'
{
summedits_prompt
}
Answer: Yes.
'
}),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
PPLInferencer
))
...
...
configs/datasets/summscreen/summscreen_gen_653185.py
View file @
aa2dd2b5
...
...
@@ -18,7 +18,7 @@ summscreen_infer_cfg = dict(
begin
=
[
dict
(
role
=
'SYSTEM'
,
fallback_role
=
"
HUMAN
"
,
fallback_role
=
'
HUMAN
'
,
prompt
=
'Please summarize the following English play script in English:'
),
...
...
configs/datasets/summscreen/summscreen_gen_aa5eb3.py
View file @
aa2dd2b5
...
...
@@ -15,7 +15,7 @@ summscreen_infer_cfg = dict(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
"
Please summarize the following English report in English:{content}
\n
{summary}.
"
),
'
Please summarize the following English report in English:{content}
\n
{summary}.
'
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
GenInferencer
,
batch_size
=
4
,
max_out_len
=
500
,
max_seq_len
=
8192
))
...
...
configs/datasets/taco/README.md
View file @
aa2dd2b5
...
...
@@ -32,9 +32,9 @@ taco_skills = load_dataset('BAAI/TACO', skills=['Sorting', 'Range queries'], tok
```
## Evaluation results
| dataset | metric | CodeLlama-7b-Python | internlm2-chat-1.8b-sft-hf | internlm2-chat-7b-sft-hf | internlm2-chat-20b-sft-hf |
| dataset | metric | CodeLlama-7b-Python | internlm2-chat-1.8b-sft-hf | internlm2-chat-7b-sft-hf | internlm2-chat-20b-sft-hf |
|-----------------------|----------|-------------|-------------|-------------|-------------|
| TACO | pass@1 | 0.7 | 0.7 | 1.7 | 2.7 |
| TACO | pass@1 | 0.7 | 0.7 | 1.7 | 2.7 |
Please refer to
[
repo
](
https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov-file
)
for original results if needed.
...
...
@@ -47,4 +47,4 @@ Please refer to [repo](https://github.com/FlagOpen/TACO/tree/main?tab=readme-ov-
journal={arXiv preprint arXiv:2312.14852},
year={2023}
}
```
\ No newline at end of file
```
Prev
1
…
18
19
20
21
22
23
24
25
26
…
33
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment