Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
0665bb91
Unverified
Commit
0665bb91
authored
Mar 22, 2024
by
bittersweet1999
Committed by
GitHub
Mar 22, 2024
Browse files
[Fix] Quick fix (#995)
parent
1d319855
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
7 additions
and
178 deletions
+7
-178
configs/eval_subjective_alignbench.py
configs/eval_subjective_alignbench.py
+2
-14
configs/eval_subjective_alpacaeval.py
configs/eval_subjective_alpacaeval.py
+1
-10
configs/eval_subjective_compassarena.py
configs/eval_subjective_compassarena.py
+0
-11
configs/eval_subjective_corev2.py
configs/eval_subjective_corev2.py
+0
-115
configs/eval_subjective_creationbench.py
configs/eval_subjective_creationbench.py
+2
-14
configs/eval_subjective_mtbench.py
configs/eval_subjective_mtbench.py
+2
-14
No files found.
configs/eval_subjective_alignbench.py
View file @
0665bb91
...
...
@@ -3,7 +3,7 @@ from mmengine.config import read_base
with
read_base
():
from
.datasets.subjective.alignbench.alignbench_judgeby_critiquellm
import
subjective_datasets
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
,
OpenAI
from
opencompass.models.openai_api
import
OpenAIAllesAPIN
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners.sub_naive
import
SubjectiveNaivePartitioner
...
...
@@ -51,26 +51,14 @@ models = [
datasets
=
[
*
subjective_datasets
]
infer
=
dict
(
partitioner
=
dict
(
type
=
NaivePartitioner
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llmeval'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
judge_model
=
dict
(
abbr
=
'GPT4-Turbo'
,
type
=
OpenAI
AllesAPIN
,
type
=
OpenAI
,
path
=
'gpt-4-1106-preview'
,
key
=
'xxxx'
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url
=
'xxxx'
,
meta_template
=
api_meta_template
,
query_per_second
=
16
,
max_out_len
=
2048
,
...
...
configs/eval_subjective_alpacaeval.py
View file @
0665bb91
...
...
@@ -68,16 +68,7 @@ gpt4 = dict(
temperature
=
1
,
)
# Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
infer
=
dict
(
partitioner
=
dict
(
type
=
NaivePartitioner
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llmeval'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
...
...
configs/eval_subjective_compassarena.py
View file @
0665bb91
...
...
@@ -69,17 +69,6 @@ gpt4 = dict(
temperature
=
1
,
)
# Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
infer
=
dict
(
partitioner
=
dict
(
type
=
SizePartitioner
,
strategy
=
'split'
,
max_task_size
=
10000
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llm_dev2'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
...
...
configs/eval_subjective_corev2.py
deleted
100644 → 0
View file @
1d319855
from
mmengine.config
import
read_base
with
read_base
():
from
.datasets.subjective.subjective_cmp.subjective_corev2
import
subjective_datasets
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
,
OpenAI
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners.sub_naive
import
SubjectiveNaivePartitioner
from
opencompass.partitioners.sub_size
import
SubjectiveSizePartitioner
from
opencompass.runners
import
LocalRunner
from
opencompass.runners
import
SlurmSequentialRunner
from
opencompass.tasks
import
OpenICLInferTask
from
opencompass.tasks.subjective_eval
import
SubjectiveEvalTask
from
opencompass.summarizers
import
Corev2Summarizer
api_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
dict
(
role
=
'BOT'
,
api_role
=
'BOT'
,
generate
=
True
),
],
reserved_roles
=
[
dict
(
role
=
'SYSTEM'
,
api_role
=
'SYSTEM'
),
],
)
# -------------Inference Stage ----------------------------------------
# For subjective evaluation, we often set do sample for models
models
=
[
dict
(
type
=
HuggingFaceChatGLM3
,
abbr
=
'chatglm3-6b-hf'
,
path
=
'THUDM/chatglm3-6b'
,
tokenizer_path
=
'THUDM/chatglm3-6b'
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
,
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
generation_kwargs
=
dict
(
do_sample
=
True
,
),
meta_template
=
api_meta_template
,
max_out_len
=
2048
,
max_seq_len
=
4096
,
batch_size
=
1
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
)
]
datasets
=
[
*
subjective_datasets
]
gpt4
=
dict
(
abbr
=
'gpt4-turbo'
,
type
=
OpenAI
,
path
=
'gpt-4-1106-preview'
,
key
=
''
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
meta_template
=
api_meta_template
,
query_per_second
=
1
,
max_out_len
=
2048
,
max_seq_len
=
4096
,
batch_size
=
4
,
retry
=
20
,
temperature
=
1
,
)
# Re-inference gpt4's predictions or you can choose to use the pre-commited gpt4's predictions
infer
=
dict
(
partitioner
=
dict
(
type
=
SizePartitioner
,
max_task_size
=
500
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llm_dev2'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
judge_model
=
dict
(
abbr
=
'GPT4-Turbo'
,
type
=
OpenAI
,
path
=
'gpt-4-1106-preview'
,
key
=
''
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
meta_template
=
api_meta_template
,
query_per_second
=
1
,
max_out_len
=
1024
,
max_seq_len
=
4096
,
batch_size
=
2
,
retry
=
20
,
temperature
=
0
,
)
## ------------- Evaluation Configuration
eval
=
dict
(
partitioner
=
dict
(
type
=
SubjectiveSizePartitioner
,
mode
=
'm2n'
,
max_task_size
=
500
,
base_models
=
[
gpt4
],
compare_models
=
models
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llm_dev2'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
SubjectiveEvalTask
,
judge_cfg
=
judge_model
),
),
)
summarizer
=
dict
(
type
=
Corev2Summarizer
,
match_method
=
'smart'
)
work_dir
=
'outputs/corev2/'
configs/eval_subjective_creationbench.py
View file @
0665bb91
...
...
@@ -3,7 +3,7 @@ from mmengine.config import read_base
with
read_base
():
from
.datasets.subjective.creationbench.creationbench_judgeby_gpt4_withref
import
subjective_datasets
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
,
OpenAI
from
opencompass.models.openai_api
import
OpenAIAllesAPIN
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners.sub_naive
import
SubjectiveNaivePartitioner
...
...
@@ -51,26 +51,14 @@ models = [
datasets
=
[
*
subjective_datasets
]
infer
=
dict
(
partitioner
=
dict
(
type
=
NaivePartitioner
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llmeval'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
judge_model
=
dict
(
abbr
=
'GPT4-Turbo'
,
type
=
OpenAI
AllesAPIN
,
type
=
OpenAI
,
path
=
'gpt-4-1106-preview'
,
key
=
'xxxx'
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url
=
'xxxx'
,
meta_template
=
api_meta_template
,
query_per_second
=
16
,
max_out_len
=
2048
,
...
...
configs/eval_subjective_mtbench.py
View file @
0665bb91
...
...
@@ -4,7 +4,7 @@ with read_base():
from
.datasets.subjective.multiround.mtbench_single_judge_diff_temp
import
subjective_datasets
# from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
,
OpenAI
from
opencompass.models.openai_api
import
OpenAIAllesAPIN
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners.sub_naive
import
SubjectiveNaivePartitioner
...
...
@@ -59,26 +59,14 @@ models = [
datasets
=
[
*
subjective_datasets
]
infer
=
dict
(
partitioner
=
dict
(
type
=
SizePartitioner
,
strategy
=
'split'
,
max_task_size
=
10000
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llm_dev2'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
),
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
judge_model
=
dict
(
abbr
=
'GPT4-Turbo'
,
type
=
OpenAI
AllesAPIN
,
type
=
OpenAI
,
path
=
'gpt-4-0613'
,
# To compare with the official leaderboard, please use gpt4-0613
key
=
'xxxx'
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url
=
'xxxx'
,
meta_template
=
api_meta_template
,
query_per_second
=
16
,
max_out_len
=
2048
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment