Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
1c8e193d
Unverified
Commit
1c8e193d
authored
Feb 06, 2024
by
bittersweet1999
Committed by
GitHub
Feb 06, 2024
Browse files
[Fix] hotfix for mtbench (#877)
* hotfix for mtbench * hotfix
parent
d34ba111
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
10 deletions
+23
-10
configs/eval_subjective_mtbench.py
configs/eval_subjective_mtbench.py
+6
-6
opencompass/summarizers/subjective/mtbench.py
opencompass/summarizers/subjective/mtbench.py
+17
-4
No files found.
configs/eval_subjective_mtbench.py
View file @
1c8e193d
...
...
@@ -2,7 +2,6 @@ from mmengine.config import read_base
with
read_base
():
from
.datasets.subjective.multiround.mtbench_single_judge
import
subjective_datasets
# from .datasets.subjective.multiround.mtbench_pair_judge import subjective_datasets
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
HuggingFaceChatGLM3
...
...
@@ -18,6 +17,7 @@ from opencompass.summarizers import MTBenchSummarizer
api_meta_template
=
dict
(
round
=
[
dict
(
role
=
'SYSTEM'
,
api_role
=
'SYSTEM'
),
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
dict
(
role
=
'BOT'
,
api_role
=
'BOT'
,
generate
=
True
),
]
...
...
@@ -54,10 +54,10 @@ models = [
datasets
=
[
*
subjective_datasets
]
infer
=
dict
(
partitioner
=
dict
(
type
=
SizePartitioner
,
max_task_size
=
100
),
partitioner
=
dict
(
type
=
SizePartitioner
,
max_task_size
=
100
00
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llm
eval
'
,
partition
=
'llm
_dev2
'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
task
=
dict
(
type
=
OpenICLInferTask
),
...
...
@@ -70,12 +70,12 @@ infer = dict(
judge_model
=
dict
(
abbr
=
'GPT4-Turbo'
,
type
=
OpenAIAllesAPIN
,
path
=
'gpt-4-0613'
,
path
=
'gpt-4-0613'
,
# To compare with the official leaderboard, please use gpt4-0613
key
=
'xxxx'
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url
=
'xxxx'
,
meta_template
=
api_meta_template
,
query_per_second
=
16
,
max_out_len
=
1024
,
max_out_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
8
,
temperature
=
0
,
...
...
@@ -95,7 +95,7 @@ judge_model = dict(
## single evaluation
eval
=
dict
(
partitioner
=
dict
(
type
=
SubjectiveSizePartitioner
,
max_task_size
=
100
,
mode
=
'singlescore'
,
models
=
models
),
partitioner
=
dict
(
type
=
SubjectiveSizePartitioner
,
max_task_size
=
100
00
,
mode
=
'singlescore'
,
models
=
models
),
runner
=
dict
(
type
=
LocalRunner
,
max_num_workers
=
32
,
task
=
dict
(
type
=
SubjectiveEvalTask
,
judge_cfg
=
judge_model
)),
)
...
...
opencompass/summarizers/subjective/mtbench.py
View file @
1c8e193d
...
...
@@ -17,11 +17,10 @@ except ImportError:
from
opencompass.utils
import
model_abbr_from_cfg
from
.compass_arena
import
CompassArenaSummarizer
from
.subjective_post_process
import
post_process_autoj
from
.utils
import
get_judgeanswer_and_reference
,
get_outdir
def
post_process_mtbench
(
judgement
:
str
):
def
post_process_mtbench
_pair
(
judgement
:
str
):
"""Input a string like below:
xxx[[A]]xxx, and extract the judge
...
...
@@ -34,6 +33,20 @@ def post_process_mtbench(judgement: str):
return
None
def
post_process_mtbench_single
(
judgement
:
str
):
"""Input a string like below:
xxx[[5]]xxx, and extract the score
"""
pattern
=
r
'Rating:\s*\[\[([\d.]+)\]\]'
matched_result
=
re
.
findall
(
pattern
,
judgement
)
if
matched_result
:
score
=
float
(
matched_result
[
0
])
else
:
return
None
return
{
'score'
:
score
}
def
get_capability_results
(
judged_answers
,
references
,
...
...
@@ -87,8 +100,8 @@ class MTBenchSummarizer(CompassArenaSummarizer):
'compare_models'
]
self
.
judge_abbr
=
model_abbr_from_cfg
(
self
.
cfg
[
'judge_model'
])
self
.
judge_map
=
{
'single'
:
post_process_
autoj
,
'pair'
:
post_process_mtbench
'single'
:
post_process_
mtbench_single
,
'pair'
:
post_process_mtbench
_pair
}
self
.
judge_function
=
self
.
judge_map
[
self
.
judge_type
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment