Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
b03d5dc5
"git@developer.sourcefind.cn:gaoqiong/composable_kernel.git" did not exist on "f74345868b0212bf2d37eea2bae53e285653ce2e"
Unverified
Commit
b03d5dc5
authored
Mar 04, 2024
by
Fengzhe Zhou
Committed by
GitHub
Mar 04, 2024
Browse files
[Sync] Sync Internal (#941)
parent
bbec7d87
Changes
73
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
513 additions
and
175 deletions
+513
-175
configs/datasets/nq/nq_open_1shot_gen_01cf41.py
configs/datasets/nq/nq_open_1shot_gen_01cf41.py
+61
-0
configs/datasets/nq/nq_open_1shot_gen_20a989.py
configs/datasets/nq/nq_open_1shot_gen_20a989.py
+45
-0
configs/datasets/race/race_ppl_abed12.py
configs/datasets/race/race_ppl_abed12.py
+3
-10
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_20a989.py
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_20a989.py
+46
-0
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_eaf81e.py
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_eaf81e.py
+62
-0
configs/datasets/winogrande/winogrande_5shot_gen_6447e6.py
configs/datasets/winogrande/winogrande_5shot_gen_6447e6.py
+46
-0
configs/datasets/winogrande/winogrande_5shot_ll_9d81d7.py
configs/datasets/winogrande/winogrande_5shot_ll_9d81d7.py
+38
-0
configs/models/gemma/hf_gemma_2b.py
configs/models/gemma/hf_gemma_2b.py
+23
-0
configs/models/gemma/hf_gemma_2b_it.py
configs/models/gemma/hf_gemma_2b_it.py
+32
-0
configs/models/gemma/hf_gemma_7b.py
configs/models/gemma/hf_gemma_7b.py
+23
-0
configs/models/gemma/hf_gemma_7b_it.py
configs/models/gemma/hf_gemma_7b_it.py
+33
-0
configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
+32
-0
configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
+32
-0
configs/models/qwen/hf_qwen1_5_14b.py
configs/models/qwen/hf_qwen1_5_14b.py
+1
-1
configs/models/qwen/vllm_qwen1_5_14b_chat.py
configs/models/qwen/vllm_qwen1_5_14b_chat.py
+1
-2
configs/summarizers/agent_bench.py
configs/summarizers/agent_bench.py
+17
-91
configs/summarizers/code_passk.py
configs/summarizers/code_passk.py
+6
-14
configs/summarizers/compass_knowledge.py
configs/summarizers/compass_knowledge.py
+1
-9
configs/summarizers/compass_math.py
configs/summarizers/compass_math.py
+6
-24
configs/summarizers/compassbench_v1_language.py
configs/summarizers/compassbench_v1_language.py
+5
-24
No files found.
configs/datasets/nq/nq_open_1shot_gen_01cf41.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
ZeroRetriever
,
FixKRetriever
,
RandomRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.datasets
import
NQOpenDataset
,
NQEvaluator
nq_datasets
=
[]
for
k
in
[
1
]:
nq_reader_cfg
=
dict
(
input_columns
=
[
'question'
],
output_column
=
'answer'
,
train_split
=
'train'
,
test_split
=
'validation'
)
if
k
==
0
:
nq_infer_cfg
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}?'
),
dict
(
role
=
'BOT'
,
prompt
=
'A:'
),
]
)
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
)
)
else
:
nq_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}?'
),
dict
(
role
=
'BOT'
,
prompt
=
'A: {answer}.
\n
'
),
]
),
),
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
begin
=
"</E>"
,
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}?'
),
dict
(
role
=
'BOT'
,
prompt
=
'A:'
),
]
),
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
list
(
range
(
k
))),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
,
stopping_criteria
=
[
"Q:"
,
"
\n
"
]),
)
nq_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
NQEvaluator
),
pred_role
=
"BOT"
)
nq_datasets
.
append
(
dict
(
type
=
NQOpenDataset
,
abbr
=
f
'nq_open_
{
k
}
shot'
,
path
=
'./data/nq-open/'
,
reader_cfg
=
nq_reader_cfg
,
infer_cfg
=
nq_infer_cfg
,
eval_cfg
=
nq_eval_cfg
)
)
configs/datasets/nq/nq_open_1shot_gen_20a989.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
ZeroRetriever
,
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.datasets
import
NQOpenDataset
,
NQEvaluator
nq_datasets
=
[]
for
k
in
[
1
]:
nq_reader_cfg
=
dict
(
input_columns
=
[
'question'
],
output_column
=
'answer'
,
train_split
=
'train'
,
test_split
=
'validation'
)
if
k
==
0
:
nq_infer_cfg
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
'Q: {question}
\n
A: '
,
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
)
)
else
:
nq_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
'Q: {question}
\n
A: {answer}.
\n
'
,
),
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
'</E>Q: {question}
\n
A: '
,
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
list
(
range
(
k
))),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
,
stopping_criteria
=
[
"Q:"
,
"
\n
"
]),
)
nq_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
NQEvaluator
),
pred_role
=
"BOT"
)
nq_datasets
.
append
(
dict
(
type
=
NQOpenDataset
,
abbr
=
f
'nq_open_
{
k
}
shot'
,
path
=
'./data/nq-open/'
,
reader_cfg
=
nq_reader_cfg
,
infer_cfg
=
nq_infer_cfg
,
eval_cfg
=
nq_eval_cfg
)
)
configs/datasets/race/race_ppl_abed12.py
View file @
b03d5dc5
...
@@ -11,19 +11,12 @@ race_reader_cfg = dict(
...
@@ -11,19 +11,12 @@ race_reader_cfg = dict(
test_split
=
"test"
test_split
=
"test"
)
)
hint
=
"Read the article, and answer the question by replying A, B, C or D."
question_and_options
=
"{article}
\n\n
Q: {question}
\n\n
A. {A}
\n
B. {B}
\n
C. {C}
\n
D. {D}"
race_infer_cfg
=
dict
(
race_infer_cfg
=
dict
(
prompt_template
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
type
=
PromptTemplate
,
template
=
{
template
=
{
answer
:
hint
+
'
\n\n
'
+
question_and_options
+
'
\n\n
Answer: '
+
answer
for
answer
in
[
'A'
,
'B'
,
'C'
,
'D'
]}),
'A'
:
'Read the article, and answer the question by replying A, B, C or D.
\n\n
{article}
\n\n
Q: {question}
\n\n
A. {A}
\n
B. {B}
\n
C. {C}
\n
D. {D}
\n\n
Answer: A'
,
'B'
:
'Read the article, and answer the question by replying A, B, C or D.
\n\n
{article}
\n\n
Q: {question}
\n\n
A. {A}
\n
B. {B}
\n
C. {C}
\n
D. {D}
\n\n
Answer: B'
,
'C'
:
'Read the article, and answer the question by replying A, B, C or D.
\n\n
{article}
\n\n
Q: {question}
\n\n
A. {A}
\n
B. {B}
\n
C. {C}
\n
D. {D}
\n\n
Answer: C'
,
'D'
:
'Read the article, and answer the question by replying A, B, C or D.
\n\n
{article}
\n\n
Q: {question}
\n\n
A. {A}
\n
B. {B}
\n
C. {C}
\n
D. {D}
\n\n
Answer: D'
,
}),
retriever
=
dict
(
type
=
ZeroRetriever
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
PPLInferencer
))
inferencer
=
dict
(
type
=
PPLInferencer
))
...
...
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_20a989.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
ZeroRetriever
,
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.datasets
import
TriviaQADataset_V2
,
TriviaQAEvaluator
triviaqa_datasets
=
[]
for
k
in
[
1
]:
triviaqa_reader_cfg
=
dict
(
input_columns
=
[
'question'
],
output_column
=
'answer'
,
train_split
=
'train'
,
test_split
=
'validation'
)
if
k
==
0
:
triviaqa_infer_cfg
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
'Q: {question}
\n
A: '
,
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
)
)
else
:
triviaqa_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
'Q: {question}
\n
A: {answer}.
\n
'
,
),
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
'</E>Q: {question}
\n
A: '
,
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
list
(
range
(
k
))),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
,
stopping_criteria
=
[
"Q:"
,
"
\n
"
]),
)
triviaqa_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
TriviaQAEvaluator
),
pred_role
=
"BOT"
)
triviaqa_datasets
.
append
(
dict
(
type
=
TriviaQADataset_V2
,
abbr
=
f
'triviaqa_wiki_
{
k
}
shot'
,
path
=
'./data/triviaqa'
,
reader_cfg
=
triviaqa_reader_cfg
,
infer_cfg
=
triviaqa_infer_cfg
,
eval_cfg
=
triviaqa_eval_cfg
)
)
configs/datasets/triviaqa/triviaqa_wiki_1shot_gen_eaf81e.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
ZeroRetriever
,
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.datasets
import
TriviaQADataset_V2
,
TriviaQAEvaluator
triviaqa_datasets
=
[]
for
k
in
[
1
]:
triviaqa_reader_cfg
=
dict
(
input_columns
=
[
'question'
],
output_column
=
'answer'
,
train_split
=
'train'
,
test_split
=
'validation'
)
if
k
==
0
:
triviaqa_infer_cfg
=
dict
(
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}'
),
dict
(
role
=
'BOT'
,
prompt
=
'A:'
),
]
)
),
retriever
=
dict
(
type
=
ZeroRetriever
),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
)
)
else
:
triviaqa_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}'
),
dict
(
role
=
'BOT'
,
prompt
=
'A: {answer}.
\n
'
),
]
),
),
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
begin
=
"</E>"
,
round
=
[
dict
(
role
=
'HUMAN'
,
prompt
=
'Q: {question}'
),
dict
(
role
=
'BOT'
,
prompt
=
'A:'
),
]
),
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
list
(
range
(
k
))),
inferencer
=
dict
(
type
=
GenInferencer
,
max_out_len
=
50
,
stopping_criteria
=
[
"Q:"
,
"
\n
"
]),
)
triviaqa_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
TriviaQAEvaluator
),
pred_role
=
"BOT"
)
triviaqa_datasets
.
append
(
dict
(
type
=
TriviaQADataset_V2
,
abbr
=
f
'triviaqa_wiki_
{
k
}
shot'
,
path
=
'./data/triviaqa'
,
reader_cfg
=
triviaqa_reader_cfg
,
infer_cfg
=
triviaqa_infer_cfg
,
eval_cfg
=
triviaqa_eval_cfg
)
)
configs/datasets/winogrande/winogrande_5shot_gen_6447e6.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
from
opencompass.openicl.icl_evaluator
import
AccEvaluator
from
opencompass.datasets
import
winograndeDataset_V3
from
opencompass.utils.text_postprocessors
import
first_option_postprocess
winogrande_reader_cfg
=
dict
(
input_columns
=
[
"opt1"
,
"opt2"
],
output_column
=
"answer"
,
train_split
=
"train_xs"
,
test_split
=
"dev"
,
)
winogrande_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
dict
(
begin
=
"</E>"
,
round
=
[
dict
(
role
=
"HUMAN"
,
prompt
=
"Which of the following is a good sentence:
\n
A. {opt1}
\n
B. {opt2}
\n
Answer:"
),
dict
(
role
=
"BOT"
,
prompt
=
"{answer}"
),
]
),
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
[
0
,
2
,
4
,
6
,
8
]),
inferencer
=
dict
(
type
=
GenInferencer
),
)
winogrande_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
),
pred_role
=
"BOT"
,
pred_postprocessor
=
dict
(
type
=
first_option_postprocess
,
options
=
"AB"
),
)
winogrande_datasets
=
[
dict
(
abbr
=
"winogrande"
,
type
=
winograndeDataset_V3
,
path
=
"./data/winogrande"
,
reader_cfg
=
winogrande_reader_cfg
,
infer_cfg
=
winogrande_infer_cfg
,
eval_cfg
=
winogrande_eval_cfg
,
)
]
configs/datasets/winogrande/winogrande_5shot_ll_9d81d7.py
0 → 100644
View file @
b03d5dc5
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
FixKRetriever
from
opencompass.openicl.icl_inferencer
import
LLInferencer
from
opencompass.openicl.icl_evaluator
import
AccEvaluator
from
opencompass.datasets
import
winograndeDataset_V3
winogrande_reader_cfg
=
dict
(
input_columns
=
[
'opt1'
,
'opt2'
],
output_column
=
'answer'
,
train_split
=
"train_xs"
,
test_split
=
"dev"
,
)
question_and_options
=
"Which of the following is a good sentence:
\n
A. {opt1}
\n
B. {opt2}"
winogrande_infer_cfg
=
dict
(
ice_template
=
dict
(
type
=
PromptTemplate
,
template
=
{
answer
:
f
"
{
question_and_options
}
\n
Answer:
{
answer
}
\n
"
for
answer
in
[
"A"
,
"B"
]},
),
prompt_template
=
dict
(
type
=
PromptTemplate
,
template
=
{
answer
:
f
"</E>
{
question_and_options
}
\n
Answer:
{
answer
}
"
for
answer
in
[
"A"
,
"B"
]},
ice_token
=
"</E>"
,
),
retriever
=
dict
(
type
=
FixKRetriever
,
fix_id_list
=
[
0
,
2
,
4
,
6
,
8
]),
inferencer
=
dict
(
type
=
LLInferencer
),
)
winogrande_eval_cfg
=
dict
(
evaluator
=
dict
(
type
=
AccEvaluator
))
winogrande_datasets
=
[
dict
(
abbr
=
'winogrande'
,
type
=
winograndeDataset_V3
,
path
=
'./data/winogrande'
,
reader_cfg
=
winogrande_reader_cfg
,
infer_cfg
=
winogrande_infer_cfg
,
eval_cfg
=
winogrande_eval_cfg
)
]
configs/models/gemma/hf_gemma_2b.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'gemma-2b-hf'
,
path
=
"google/gemma-2b"
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
use_fast
=
False
,
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
configs/models/gemma/hf_gemma_2b_it.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFaceCausalLM
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<start_of_turn>user
\n
'
,
end
=
'<end_of_turn>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<start_of_turn>model
\n
"
,
end
=
'<end_of_turn>
\n
'
,
generate
=
True
),
],
eos_token_id
=
151645
,
)
models
=
[
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'gemma-2b-it-hf'
,
path
=
"google/gemma-2b-it"
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
use_fast
=
False
,
),
meta_template
=
_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
configs/models/gemma/hf_gemma_7b.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'gemma-7b-hf'
,
path
=
"google/gemma-7b"
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
use_fast
=
False
,
),
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
configs/models/gemma/hf_gemma_7b_it.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFaceCausalLM
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<start_of_turn>user
\n
'
,
end
=
'<end_of_turn>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<start_of_turn>model
\n
"
,
end
=
'<end_of_turn>
\n
'
,
generate
=
True
),
],
eos_token_id
=
151645
,
)
models
=
[
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'gemma-7b-it-hf'
,
path
=
"google/gemma-7b-it"
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
use_fast
=
False
,
),
meta_template
=
_meta_template
,
min_out_len
=
1
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
configs/models/openbmb/hf_minicpm_2b_dpo_fp32.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFace
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<用户>'
),
dict
(
role
=
"BOT"
,
begin
=
"<AI>"
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
HuggingFace
,
abbr
=
'minicpm-2b-dpo-hf'
,
path
=
'openbmb/MiniCPM-2B-dpo-fp32'
,
tokenizer_path
=
'openbmb/MiniCPM-2B-dpo-fp32'
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
,
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
meta_template
=
_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
end_str
=
'<用户>'
,
)
]
configs/models/openbmb/hf_minicpm_2b_sft_fp32.py
0 → 100644
View file @
b03d5dc5
from
opencompass.models
import
HuggingFace
_meta_template
=
dict
(
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<用户>'
),
dict
(
role
=
"BOT"
,
begin
=
"<AI>"
,
generate
=
True
),
],
)
models
=
[
dict
(
type
=
HuggingFace
,
abbr
=
'minicpm-2b-sft-hf'
,
path
=
'openbmb/MiniCPM-2B-sft-fp32'
,
tokenizer_path
=
'openbmb/MiniCPM-2B-sft-fp32'
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
,
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
meta_template
=
_meta_template
,
max_out_len
=
100
,
max_seq_len
=
2048
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
end_str
=
'<用户>'
,
)
]
configs/models/qwen/hf_qwen1_5_14b.py
View file @
b03d5dc5
...
@@ -20,6 +20,6 @@ models = [
...
@@ -20,6 +20,6 @@ models = [
max_out_len
=
100
,
max_out_len
=
100
,
max_seq_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
8
,
batch_size
=
8
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
)
)
]
]
configs/models/qwen/vllm_qwen1_5_14b_chat.py
View file @
b03d5dc5
...
@@ -4,8 +4,7 @@ from opencompass.models import VLLM
...
@@ -4,8 +4,7 @@ from opencompass.models import VLLM
_meta_template
=
dict
(
_meta_template
=
dict
(
round
=
[
round
=
[
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"HUMAN"
,
begin
=
'<|im_start|>user
\n
'
,
end
=
'<|im_end|>
\n
'
),
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
dict
(
role
=
"BOT"
,
begin
=
"<|im_start|>assistant
\n
"
,
end
=
'<|im_end|>
\n
'
,
generate
=
True
),
generate
=
True
),
],
],
eos_token_id
=
151645
,
eos_token_id
=
151645
,
)
)
...
...
configs/summarizers/agent_bench.py
View file @
b03d5dc5
...
@@ -5,101 +5,27 @@ with read_base():
...
@@ -5,101 +5,27 @@ with read_base():
from
.groups.plugineval
import
plugineval_summary_groups
from
.groups.plugineval
import
plugineval_summary_groups
agent_summary_groups
=
[
agent_summary_groups
=
[
dict
(
name
=
'math_acc_1_and_fill_in_blank-native'
,
subsets
=
[[
'compassbench_v1_math-high-single_choice_cn-native'
,
'acc_1'
],
[
'compassbench_v1_math-high-single_choice_en-native'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_cn-native'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_en-native'
,
'acc_1'
],
[
'compassbench_v1_math-primary-cloze_cn-native'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en-native'
,
'accuracy'
]]),
# dict(name='math_acc_1_and_fill_in_blank-native', subsets=[['compassbench_v1_math-high-single_choice_cn-native', 'acc_1'], ['compassbench_v1_math-high-single_choice_en-native', 'acc_1'], ['compassbench_v1_math-middle-single_choice_cn-native', 'acc_1'], ['compassbench_v1_math-middle-single_choice_en-native', 'acc_1'], ['compassbench_v1_math-primary-cloze_cn-native', 'accuracy'], ['compassbench_v1_math-primary-cloze_en-native', 'accuracy']]),
dict
(
name
=
'math_perf_4_and_fill_in_blank-native'
,
subsets
=
[[
'compassbench_v1_math-high-single_choice_cn-native'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en-native'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn-native'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_en-native'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_cn-native'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en-native'
,
'accuracy'
]]),
# dict(name='math_perf_4_and_fill_in_blank-native', subsets=[['compassbench_v1_math-high-single_choice_cn-native', 'perf_4'], ['compassbench_v1_math-high-single_choice_en-native', 'perf_4'], ['compassbench_v1_math-middle-single_choice_cn-native', 'perf_4'], ['compassbench_v1_math-middle-single_choice_en-native', 'perf_4'], ['compassbench_v1_math-primary-cloze_cn-native', 'accuracy'], ['compassbench_v1_math-primary-cloze_en-native', 'accuracy']]),
dict
(
name
=
'math_acc_1_and_fill_in_blank-agent'
,
subsets
=
[[
'compassbench_v1_math-high-single_choice_cn-agent'
,
'acc_1'
],
[
'compassbench_v1_math-high-single_choice_en-agent'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_cn-agent'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_en-agent'
,
'acc_1'
],
[
'compassbench_v1_math-primary-cloze_cn-agent'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en-agent'
,
'accuracy'
]]),
# dict(name='math_acc_1_and_fill_in_blank-agent', subsets=[['compassbench_v1_math-high-single_choice_cn-agent', 'acc_1'], ['compassbench_v1_math-high-single_choice_en-agent', 'acc_1'], ['compassbench_v1_math-middle-single_choice_cn-agent', 'acc_1'], ['compassbench_v1_math-middle-single_choice_en-agent', 'acc_1'], ['compassbench_v1_math-primary-cloze_cn-agent', 'accuracy'], ['compassbench_v1_math-primary-cloze_en-agent', 'accuracy']]),
dict
(
name
=
'math_perf_4_and_fill_in_blank-agent'
,
subsets
=
[[
'compassbench_v1_math-high-single_choice_cn-agent'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en-agent'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn-agent'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_en-agent'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_cn-agent'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en-agent'
,
'accuracy'
]]),
# dict(name='math_perf_4_and_fill_in_blank-agent', subsets=[['compassbench_v1_math-high-single_choice_cn-agent', 'perf_4'], ['compassbench_v1_math-high-single_choice_en-agent', 'perf_4'], ['compassbench_v1_math-middle-single_choice_cn-agent', 'perf_4'], ['compassbench_v1_math-middle-single_choice_en-agent', 'perf_4'], ['compassbench_v1_math-primary-cloze_cn-agent', 'accuracy'], ['compassbench_v1_math-primary-cloze_en-agent', 'accuracy']]),
dict
(
# dict(name='agent', subsets=['math_perf_4_and_fill_in_blank-agent', 'cibench_template_wo_nltk:executable', 'cibench_template_wo_nltk:numeric_correct', 'cibench_template_wo_nltk:vis_sim', 'cibench_template_cn_wo_nltk:executable', 'cibench_template_cn_wo_nltk:numeric_correct', 'cibench_template_cn_wo_nltk:vis_sim', 'plugin_eval-p10', 'plugin_eval-p10_zh'], weights={'math_perf_4_and_fill_in_blank-agent': 1, 'cibench_template_wo_nltk:executable': 0.5, 'cibench_template_wo_nltk:numeric_correct': 0.25, 'cibench_template_wo_nltk:vis_sim': 0.25, 'cibench_template_cn_wo_nltk:executable': 0.5, 'cibench_template_cn_wo_nltk:numeric_correct': 0.25, 'cibench_template_cn_wo_nltk:vis_sim': 0.25, 'plugin_eval-p10': 1, 'plugin_eval-p10_zh': 1}),
name
=
'agent'
,
dict
(
name
=
'cibench_template'
,
subsets
=
[
'cibench_template:executable'
,
'cibench_template:numeric_correct'
,
'cibench_template:text_score'
,
'cibench_template:vis_sim'
]),
subsets
=
[
'math_perf_4_and_fill_in_blank-agent'
,
'cibench_template_wo_nltk:executable'
,
'cibench_template_wo_nltk:numeric_correct'
,
'cibench_template_wo_nltk:vis_sim'
,
'cibench_template_cn_wo_nltk:executable'
,
'cibench_template_cn_wo_nltk:numeric_correct'
,
'cibench_template_cn_wo_nltk:vis_sim'
,
'plugin_eval-p10'
,
'plugin_eval-p10_zh'
],
dict
(
name
=
'cibench_template_cn'
,
subsets
=
[
'cibench_template_cn:executable'
,
'cibench_template_cn:numeric_correct'
,
'cibench_template_cn:text_score'
,
'cibench_template_cn:vis_sim'
]),
weights
=
{
'math_perf_4_and_fill_in_blank-agent'
:
1
,
'cibench_template_wo_nltk:executable'
:
0.5
,
'cibench_template_wo_nltk:numeric_correct'
:
0.25
,
'cibench_template_wo_nltk:vis_sim'
:
0.25
,
'cibench_template_cn_wo_nltk:executable'
:
0.5
,
'cibench_template_cn_wo_nltk:numeric_correct'
:
0.25
,
'cibench_template_cn_wo_nltk:vis_sim'
:
0.25
,
'plugin_eval-p10'
:
1
,
'plugin_eval-p10_zh'
:
1
}
dict
(
name
=
'agent_cn'
,
subsets
=
[
'cibench_template_cn'
,
'plugin_eval-mus-p10_one_review_zh'
]),
)
dict
(
name
=
'agent_en'
,
subsets
=
[
'cibench_template'
,
'plugin_eval-mus-p10_one_review'
]),
dict
(
name
=
'agent'
,
subsets
=
[
'agent_cn'
,
'agent_en'
]),
]
]
summarizer
=
dict
(
summarizer
=
dict
(
dataset_abbrs
=
[
dataset_abbrs
=
[
# 'agent',
'agent'
,
# 'math_acc_1_and_fill_in_blank-native',
'agent_cn'
,
# 'math_perf_4_and_fill_in_blank-native',
'agent_en'
,
# # '######## MathBench-Agent Accuracy ########', # category
'cibench_template_cn'
,
# 'math_acc_1_and_fill_in_blank-agent',
'cibench_template'
,
# 'math_perf_4_and_fill_in_blank-agent',
'plugin_eval-mus-p10_one_review_zh'
,
# # '######## CIBench Template ########', # category
'plugin_eval-mus-p10_one_review'
,
# 'cibench_template:executable',
# 'cibench_template:numeric_correct',
# 'cibench_template:text_score',
# 'cibench_template:vis_sim',
# # '######## CIBench Template Chinese ########', # category
# 'cibench_template_cn:executable',
# 'cibench_template_cn:numeric_correct',
# 'cibench_template_cn:text_score',
# 'cibench_template_cn:vis_sim',
# # '######## CIBench Template w/o NLTK ########', # category no text score becase it is only for nltk
# 'cibench_template_wo_nltk:executable',
# 'cibench_template_wo_nltk:numeric_correct',
# 'cibench_template_wo_nltk:vis_sim',
# # '######## CIBench Template Chinese w/o NLTK ########', # category
# 'cibench_template_cn_wo_nltk:executable',
# 'cibench_template_cn_wo_nltk:numeric_correct',
# 'cibench_template_cn_wo_nltk:vis_sim',
# '######## T-Eval ########', # category
[
'plugin_eval-p10'
,
'naive_average'
],
[
'plugin_eval-p10-instruct_v1'
,
'format_metric'
],
[
'plugin_eval-p10-instruct_v1'
,
'args_em_metric'
],
[
'plugin_eval-p10-plan_str_v1'
,
'f1_score'
],
[
'plugin_eval-p10-plan_json_v1'
,
'f1_score'
],
[
'plugin_eval-p10-reason_str_v1'
,
'thought'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1'
,
'thought'
],
[
'plugin_eval-p10-retrieve_str_v1'
,
'name'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1'
,
'name'
],
[
'plugin_eval-p10-understand_str_v1'
,
'args'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1'
,
'args'
],
[
'plugin_eval-p10-review_str_v1'
,
'review_quality'
],
[
'plugin_eval-p10_zh'
,
'naive_average'
],
[
'plugin_eval-p10-instruct_v1_zh'
,
'format_metric'
],
[
'plugin_eval-p10-instruct_v1_zh'
,
'args_em_metric'
],
[
'plugin_eval-p10-plan_str_v1_zh'
,
'f1_score'
],
[
'plugin_eval-p10-plan_json_v1_zh'
,
'f1_score'
],
[
'plugin_eval-p10-reason_str_v1_zh'
,
'thought'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1_zh'
,
'thought'
],
[
'plugin_eval-p10-retrieve_str_v1_zh'
,
'name'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1_zh'
,
'name'
],
[
'plugin_eval-p10-understand_str_v1_zh'
,
'args'
],
[
'plugin_eval-p10-reason_retrieve_understand_json_v1_zh'
,
'args'
],
[
'plugin_eval-p10-review_str_v1_zh'
,
'review_quality'
],
# '######## MUS-T-Eval ########', # category
[
'plugin_eval-mus-p10'
,
'naive_average'
],
[
'plugin_eval-mus-p10-instruct_v1'
,
'format_metric'
],
[
'plugin_eval-mus-p10-instruct_v1'
,
'args_em_metric'
],
[
'plugin_eval-mus-p10-plan_str_v1'
,
'f1_score'
],
[
'plugin_eval-mus-p10-plan_json_v1'
,
'f1_score'
],
[
'plugin_eval-mus-p10-reason_str_v1'
,
'thought'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1'
,
'thought'
],
[
'plugin_eval-mus-p10-retrieve_str_v1'
,
'name'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1'
,
'name'
],
[
'plugin_eval-mus-p10-understand_str_v1'
,
'args'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1'
,
'args'
],
[
'plugin_eval-mus-p10-review_str_v1'
,
'review_quality'
],
[
'plugin_eval-mus-p10_zh'
,
'naive_average'
],
[
'plugin_eval-mus-p10-instruct_v1_zh'
,
'format_metric'
],
[
'plugin_eval-mus-p10-instruct_v1_zh'
,
'args_em_metric'
],
[
'plugin_eval-mus-p10-plan_str_v1_zh'
,
'f1_score'
],
[
'plugin_eval-mus-p10-plan_json_v1_zh'
,
'f1_score'
],
[
'plugin_eval-mus-p10-reason_str_v1_zh'
,
'thought'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1_zh'
,
'thought'
],
[
'plugin_eval-mus-p10-retrieve_str_v1_zh'
,
'name'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1_zh'
,
'name'
],
[
'plugin_eval-mus-p10-understand_str_v1_zh'
,
'args'
],
[
'plugin_eval-mus-p10-reason_retrieve_understand_json_v1_zh'
,
'args'
],
[
'plugin_eval-mus-p10-review_str_v1_zh'
,
'review_quality'
],
# ['plugin_eval-p10', 'naive_average'],
# ['plugin_eval-mus-p10', 'naive_average'],
# ['plugin_eval-p10_zh', 'naive_average'],
# ['plugin_eval-mus-p10_zh', 'naive_average'],
],
],
summary_groups
=
sum
(
summary_groups
=
sum
(
[
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_summary_groups"
)],
[])
[
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_summary_groups"
)],
[])
...
...
configs/summarizers/code_passk.py
View file @
b03d5dc5
...
@@ -21,30 +21,22 @@ code_passk_summary_groups = [
...
@@ -21,30 +21,22 @@ code_passk_summary_groups = [
{
'name'
:
'sanitized_mbpp_pass@10'
,
'subsets'
:
[[
'sanitized_mbpp_repeat10'
,
'pass@10'
]]},
{
'name'
:
'sanitized_mbpp_pass@10'
,
'subsets'
:
[[
'sanitized_mbpp_repeat10'
,
'pass@10'
]]},
# real add
# real add
{
'name'
:
'humanevalx'
,
'subsets'
:
[
'humanevalx-python'
,
'humanevalx-cpp'
,
'humanevalx-go'
,
'humanevalx-java'
,
'humanevalx-js'
]},
{
'name'
:
'humanevalx'
,
'subsets'
:
[
'humanevalx-python'
,
'humanevalx-cpp'
,
'humanevalx-go'
,
'humanevalx-java'
,
'humanevalx-js'
]},
{
'name'
:
'code'
,
'subsets'
:
[
'humaneval_plus_pass@1(greedy)'
,
'sanitized_mbpp_pass@1(greedy)'
,
'humaneval_cn_pass@1(greedy)'
,
'mbpp_cn_pass@1(greedy)'
,
'humanevalx'
]}
# {'name': 'code', 'subsets': ['humaneval_plus_pass@1(greedy)', 'sanitized_mbpp_pass@1(greedy)', 'humaneval_cn_pass@1(greedy)', 'mbpp_cn_pass@1(greedy)', 'humanevalx']}
{
'name'
:
'code_cn'
,
'subsets'
:
[
'humaneval_cn_pass@1(greedy)'
,
'mbpp_cn_pass@1(greedy)'
]},
{
'name'
:
'code_en'
,
'subsets'
:
[
'humaneval_plus_pass@1(greedy)'
,
'sanitized_mbpp_pass@1(greedy)'
,
'humanevalx'
]},
{
'name'
:
'code'
,
'subsets'
:
[
'humaneval_cn_pass@1(greedy)'
,
'mbpp_cn_pass@1(greedy)'
,
'humaneval_plus_pass@1(greedy)'
,
'sanitized_mbpp_pass@1(greedy)'
,
'humanevalx'
]},
]
]
summarizer
=
dict
(
summarizer
=
dict
(
dataset_abbrs
=
[
dataset_abbrs
=
[
'code'
,
'code'
,
'
humaneval_pass@1(greedy)
'
,
'
code_cn
'
,
'
humaneval_pass@10
'
,
'
code_en
'
,
'humaneval_cn_pass@1(greedy)'
,
'humaneval_cn_pass@1(greedy)'
,
'humaneval_cn_pass@10'
,
'humaneval_plus_pass@1(greedy)'
,
'humaneval_plus_pass@1(greedy)'
,
'humaneval_plus_pass@10'
,
'mbpp_pass@1(greedy)'
,
'mbpp_pass@10'
,
'mbpp_cn_pass@1(greedy)'
,
'mbpp_cn_pass@1(greedy)'
,
'mbpp_cn_pass@10'
,
'sanitized_mbpp_pass@1(greedy)'
,
'sanitized_mbpp_pass@1(greedy)'
,
'sanitized_mbpp_pass@10'
,
'humanevalx'
,
'humanevalx'
,
'humanevalx-python'
,
'humanevalx-cpp'
,
'humanevalx-go'
,
'humanevalx-java'
,
'humanevalx-js'
,
],
],
summary_groups
=
sum
(
summary_groups
=
sum
(
[
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_summary_groups"
)],
[])
[
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"_summary_groups"
)],
[])
...
...
configs/summarizers/compass_knowledge.py
View file @
b03d5dc5
...
@@ -15,21 +15,13 @@ compassbench_v1_knowledge_groups = [
...
@@ -15,21 +15,13 @@ compassbench_v1_knowledge_groups = [
'compassbench_v1_knowledge-mixed-cloze_en'
'compassbench_v1_knowledge-mixed-cloze_en'
summarizer
=
dict
(
summarizer
=
dict
(
dataset_abbrs
=
[
dataset_abbrs
=
[
'knowledge_acc_1_and_cloze'
,
[
'knowledge_cn'
,
'acc_1'
],
[
'compassbench_v1_knowledge-common_knowledge-single_choice_cn_circular'
,
'acc_1'
],
[
'compassbench_v1_knowledge-humanity-single_choice_cn_circular'
,
'acc_1'
],
[
'compassbench_v1_knowledge-natural_science-single_choice_cn_circular'
,
'acc_1'
],
[
'compassbench_v1_knowledge-social_science-single_choice_cn_circular'
,
'acc_1'
],
'compassbench_v1_knowledge-mixed-cloze_en'
,
'knowledge_perf_4_and_cloze'
,
'knowledge_perf_4_and_cloze'
,
[
'knowledge_cn'
,
'perf_4'
],
[
'knowledge_cn'
,
'perf_4'
],
'compassbench_v1_knowledge-mixed-cloze_en'
,
[
'compassbench_v1_knowledge-common_knowledge-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-common_knowledge-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-humanity-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-humanity-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-natural_science-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-natural_science-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-social_science-single_choice_cn_circular'
,
'perf_4'
],
[
'compassbench_v1_knowledge-social_science-single_choice_cn_circular'
,
'perf_4'
],
'compassbench_v1_knowledge-mixed-cloze_en'
,
],
],
summary_groups
=
compassbench_v1_knowledge_groups
summary_groups
=
compassbench_v1_knowledge_groups
)
)
configs/summarizers/compass_math.py
View file @
b03d5dc5
# This summarizer is used for `./datasets/compassbench_v1_math/compassbench_v1_math_gen`
# This summarizer is used for `./datasets/compassbench_v1_math/compassbench_v1_math_gen`
compassbench_v1_math_groups
=
[
compassbench_v1_math_groups
=
[
{
'name'
:
'math_acc_1_and_fill_in_blank'
,
'subsets'
:
[
{
'name'
:
'math_acc_1_and_fill_in_blank'
,
'subsets'
:
[[
'compassbench_v1_math-high-single_choice_cn'
,
'acc_1'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_en'
,
'acc_1'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
]]},
[
'compassbench_v1_math-high-single_choice_cn'
,
'acc_1'
],
{
'name'
:
'math_perf_4_and_fill_in_blank'
,
'subsets'
:
[[
'compassbench_v1_math-high-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
]]},
[
'compassbench_v1_math-high-single_choice_en'
,
'acc_1'
],
{
'name'
:
'math_perf_4_and_fill_in_blank_cn'
,
'subsets'
:
[[
'compassbench_v1_math-high-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
]]},
[
'compassbench_v1_math-middle-single_choice_cn'
,
'acc_1'
],
{
'name'
:
'math_perf_4_and_fill_in_blank_en'
,
'subsets'
:
[[
'compassbench_v1_math-high-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
]]},
[
'compassbench_v1_math-middle-single_choice_en'
,
'acc_1'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
],
]},
{
'name'
:
'math_perf_4_and_fill_in_blank'
,
'subsets'
:
[
[
'compassbench_v1_math-high-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
],
]},
]
]
summarizer
=
dict
(
summarizer
=
dict
(
dataset_abbrs
=
[
dataset_abbrs
=
[
'math_acc_1_and_fill_in_blank'
,
[
'compassbench_v1_math-high-single_choice_cn'
,
'acc_1'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'acc_1'
],
[
'compassbench_v1_math-middle-single_choice_en'
,
'acc_1'
],
[
'compassbench_v1_math-primary-cloze_cn'
,
'accuracy'
],
[
'compassbench_v1_math-primary-cloze_en'
,
'accuracy'
],
'math_perf_4_and_fill_in_blank'
,
'math_perf_4_and_fill_in_blank'
,
'math_perf_4_and_fill_in_blank_cn'
,
'math_perf_4_and_fill_in_blank_en'
,
[
'compassbench_v1_math-high-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-high-single_choice_en'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'perf_4'
],
[
'compassbench_v1_math-middle-single_choice_cn'
,
'perf_4'
],
...
...
configs/summarizers/compassbench_v1_language.py
View file @
b03d5dc5
...
@@ -34,37 +34,18 @@ compassbench_v1_language_groups = [
...
@@ -34,37 +34,18 @@ compassbench_v1_language_groups = [
summarizer
=
dict
(
summarizer
=
dict
(
dataset_abbrs
=
[
dataset_abbrs
=
[
'language_acc_1_and_non_mcq'
,
'language_en_acc_1_and_non_mcq'
,
'language_zh_acc_1_and_non_mcq'
,
# ['information_retrieval_en', 'score'],
# ['information_retrieval_zh', 'score'],
[
'intention_recognition_en_circular'
,
'acc_origin'
],
[
'intention_recognition_zh_circular'
,
'acc_origin'
],
[
'sentiment_analysis_en_circular'
,
'acc_origin'
],
[
'sentiment_analysis_zh_circular'
,
'acc_origin'
],
[
'translation'
,
'score'
],
[
'content_critic_en_circular'
,
'acc_origin'
],
[
'content_critic_zh_circular'
,
'acc_origin'
],
[
'content_summarization_en'
,
'rouge1'
],
[
'content_summarization_zh'
,
'rouge1'
],
[
'traditional_cultural_understanding_zh_circular'
,
'acc_origin'
],
[
'chinese_semantic_understanding_zh_circular'
,
'acc_origin'
],
'language_perf_4_and_non_mcq'
,
'language_perf_4_and_non_mcq'
,
'language_en_perf_4_and_non_mcq'
,
'language_zh_perf_4_and_non_mcq'
,
'language_zh_perf_4_and_non_mcq'
,
# ['information_retrieval_en', 'score'],
'language_en_perf_4_and_non_mcq'
,
# ['information_retrieval_zh', 'score'],
[
'intention_recognition_en_circular'
,
'perf_circular'
],
[
'intention_recognition_zh_circular'
,
'perf_circular'
],
[
'intention_recognition_zh_circular'
,
'perf_circular'
],
[
'
s
enti
ment_analysis
_en_circular'
,
'perf_circular'
],
[
'
int
enti
on_recognition
_en_circular'
,
'perf_circular'
],
[
'sentiment_analysis_zh_circular'
,
'perf_circular'
],
[
'sentiment_analysis_zh_circular'
,
'perf_circular'
],
[
'sentiment_analysis_en_circular'
,
'perf_circular'
],
[
'translation'
,
'score'
],
[
'translation'
,
'score'
],
[
'content_critic_en_circular'
,
'perf_circular'
],
[
'content_critic_zh_circular'
,
'perf_circular'
],
[
'content_critic_zh_circular'
,
'perf_circular'
],
[
'content_
summarization_en'
,
'rouge1
'
],
[
'content_
critic_en_circular'
,
'perf_circular
'
],
[
'content_summarization_zh'
,
'rouge1'
],
[
'content_summarization_zh'
,
'rouge1'
],
[
'content_summarization_en'
,
'rouge1'
],
[
'traditional_cultural_understanding_zh_circular'
,
'perf_circular'
],
[
'traditional_cultural_understanding_zh_circular'
,
'perf_circular'
],
[
'chinese_semantic_understanding_zh_circular'
,
'perf_circular'
],
[
'chinese_semantic_understanding_zh_circular'
,
'perf_circular'
],
],
],
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment