Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
bfe4aa2a
Unverified
Commit
bfe4aa2a
authored
Dec 14, 2023
by
Songyang Zhang
Committed by
GitHub
Dec 14, 2023
Browse files
[Fix] Update alignmentbench (#704)
* update alignmentbench * update alignmentbench * update alignmentbench
parent
1fe152b3
Changes
11
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
158 additions
and
46 deletions
+158
-46
configs/datasets/subjective_cmp/alignment_bench.py
configs/datasets/subjective_cmp/alignment_bench.py
+3
-5
configs/eval_subjective_alignbench.py
configs/eval_subjective_alignbench.py
+24
-35
configs/eval_subjective_compare.py
configs/eval_subjective_compare.py
+0
-0
configs/eval_subjective_score.py
configs/eval_subjective_score.py
+0
-0
configs/models/hf_internlm/hf_internlm_chat_20b.py
configs/models/hf_internlm/hf_internlm_chat_20b.py
+1
-1
configs/models/hf_internlm/hf_internlm_chat_7b.py
configs/models/hf_internlm/hf_internlm_chat_7b.py
+1
-1
configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
+1
-1
opencompass/models/__init__.py
opencompass/models/__init__.py
+1
-1
opencompass/models/openai_api.py
opencompass/models/openai_api.py
+118
-0
opencompass/runners/local_api.py
opencompass/runners/local_api.py
+2
-1
opencompass/summarizers/alignmentbench.py
opencompass/summarizers/alignmentbench.py
+7
-1
No files found.
configs/datasets/subjective_cmp/alignment_bench.py
View file @
bfe4aa2a
from
os
import
getenv
as
gv
from
opencompass.openicl.icl_prompt_template
import
PromptTemplate
from
opencompass.openicl.icl_retriever
import
ZeroRetriever
from
opencompass.openicl.icl_inferencer
import
GenInferencer
...
...
@@ -15,10 +13,10 @@ subjective_reader_cfg = dict(
subjective_all_sets
=
[
"alignment_bench"
,
]
data_path
=
gv
(
'WORKDIR'
)
+
"data/subjective/alignment_bench"
data_path
=
"data/subjective/alignment_bench"
alignment_bench_config_path
=
gv
(
'WORKDIR'
)
+
"data/subjective/alignment_bench/
config
"
alignment_bench_config_name
=
'multi-dimension'
alignment_bench_config_path
=
"data/subjective/alignment_bench/"
alignment_bench_config_name
=
'
config/
multi-dimension'
subjective_datasets
=
[]
...
...
configs/align
ment_
bench.py
→
configs/
eval_subjective_
alignbench.py
View file @
bfe4aa2a
...
...
@@ -11,7 +11,7 @@ with read_base():
datasets
=
[
*
subjective_datasets
]
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
OpenAI
,
HuggingFaceChatGLM3
from
opencompass.models
import
HuggingFaceCausalLM
,
HuggingFace
,
OpenAI
AllesAPIN
,
HuggingFaceChatGLM3
from
opencompass.partitioners
import
NaivePartitioner
from
opencompass.partitioners.sub_naive
import
SubjectiveNaivePartitioner
from
opencompass.runners
import
LocalRunner
...
...
@@ -19,17 +19,11 @@ from opencompass.runners import SlurmSequentialRunner
from
opencompass.tasks
import
OpenICLInferTask
from
opencompass.tasks.subjective_eval
import
SubjectiveEvalTask
from
opencompass.summarizers
import
AlignmentBenchSummarizer
models
=
[
*
hf_baichuan2_7b
]
#, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_qwen_14b_chat]
api_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
dict
(
role
=
'BOT'
,
api_role
=
'BOT'
,
generate
=
True
)
],
reserved_roles
=
[
dict
(
role
=
'SYSTEM'
,
api_role
=
'SYSTEM'
),
],
)
# -------------Inferen Stage ----------------------------------------
models
=
[
*
hf_baichuan2_7b
]
#, *hf_chatglm3_6b, *hf_internlm_chat_20b, *hf_qwen_7b_chat, *hf_qwen_14b_chat]
infer
=
dict
(
partitioner
=
dict
(
type
=
NaivePartitioner
),
...
...
@@ -42,6 +36,10 @@ infer = dict(
)
# -------------Evalation Stage ----------------------------------------
## ------------- JudgeLLM Configuration
api_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
...
...
@@ -50,26 +48,18 @@ api_meta_template = dict(
)
judge_model
=
dict
(
type
=
HuggingFaceChatGLM3
,
abbr
=
'chatglm3-6b-hf'
,
path
=
'THUDM/chatglm3-6b'
,
tokenizer_path
=
'THUDM/chatglm3-6b'
,
model_kwargs
=
dict
(
device_map
=
'auto'
,
trust_remote_code
=
True
,
),
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
abbr
=
'GPT4-Turbo'
,
type
=
OpenAIAllesAPIN
,
path
=
'gpt-4-1106-preview'
,
key
=
'xxxx'
,
# The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
url
=
'xxxx'
,
meta_template
=
api_meta_template
,
max_out_len
=
100
,
max_
seq
_len
=
4096
,
batch_size
=
1
,
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
)
)
query_per_second
=
16
,
max_
out
_len
=
2048
,
max_seq_len
=
2048
,
batch_size
=
8
)
## ------------- Evaluation Configuration
eval
=
dict
(
partitioner
=
dict
(
type
=
SubjectiveNaivePartitioner
,
...
...
@@ -77,17 +67,16 @@ eval = dict(
models
=
[
*
hf_baichuan2_7b
]
),
runner
=
dict
(
type
=
SlurmSequentialRunner
,
partition
=
'llmeval'
,
quotatype
=
'auto'
,
max_num_workers
=
256
,
type
=
LocalRunner
,
max_num_workers
=
2
,
task
=
dict
(
type
=
SubjectiveEvalTask
,
judge_cfg
=
judge_model
)),
)
work_dir
=
gv
(
'WORKDIR'
)
+
'alignment_bench/'
summarizer
=
dict
(
type
=
AlignmentBenchSummarizer
,
)
work_dir
=
'outputs/alignment_bench/'
configs/subjective_compare.py
→
configs/
eval_
subjective_compare.py
View file @
bfe4aa2a
File moved
configs/subjective_score.py
→
configs/
eval_
subjective_score.py
View file @
bfe4aa2a
File moved
configs/models/hf_internlm/hf_internlm_chat_20b.py
View file @
bfe4aa2a
...
...
@@ -3,7 +3,7 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
<eoh>
\n
'
),
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
\n
'
),
dict
(
role
=
'BOT'
,
begin
=
'<|Bot|>:'
,
end
=
'<eoa>
\n
'
,
generate
=
True
),
],
)
...
...
configs/models/hf_internlm/hf_internlm_chat_7b.py
View file @
bfe4aa2a
...
...
@@ -3,7 +3,7 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
<eoh>
\n
'
),
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
\n
'
),
dict
(
role
=
'BOT'
,
begin
=
'<|Bot|>:'
,
end
=
'<eoa>
\n
'
,
generate
=
True
),
],
)
...
...
configs/models/hf_internlm/hf_internlm_chat_7b_8k.py
View file @
bfe4aa2a
...
...
@@ -3,7 +3,7 @@ from opencompass.models import HuggingFaceCausalLM
_meta_template
=
dict
(
round
=
[
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
<eoh>
\n
'
),
dict
(
role
=
'HUMAN'
,
begin
=
'<|User|>:'
,
end
=
'
\n
'
),
dict
(
role
=
'BOT'
,
begin
=
'<|Bot|>:'
,
end
=
'<eoa>
\n
'
,
generate
=
True
),
],
)
...
...
opencompass/models/__init__.py
View file @
bfe4aa2a
...
...
@@ -17,7 +17,7 @@ from .minimax_api import MiniMax # noqa: F401
from
.mixtral
import
Mixtral
# noqa: F401
from
.modelscope
import
ModelScope
,
ModelScopeCausalLM
# noqa: F401, F403
from
.moonshot_api
import
MoonShot
# noqa: F401
from
.openai_api
import
OpenAI
# noqa: F401
from
.openai_api
import
OpenAI
,
OpenAIAllesAPIN
# noqa: F401
from
.pangu_api
import
PanGu
# noqa: F401
from
.sensetime_api
import
SenseTime
# noqa: F401
from
.turbomind
import
TurboMindModel
# noqa: F401
...
...
opencompass/models/openai_api.py
View file @
bfe4aa2a
...
...
@@ -310,3 +310,121 @@ class OpenAI(BaseAPIModel):
elif
self
.
mode
==
'rear'
:
prompt
=
sep
.
join
(
words
[:
l
])
return
prompt
@
MODELS
.
register_module
(
name
=
[
'OpenAIAllesAPIN'
])
class
OpenAIAllesAPIN
(
OpenAI
):
"""Model wrapper around OpenAI-AllesAPIN.
Args:
path (str): The name of OpenAI's model.
url (str): URL to AllesAPIN.
key (str): AllesAPIN key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
is_api
:
bool
=
True
def
__init__
(
self
,
path
:
str
,
url
:
str
,
key
:
str
,
query_per_second
:
int
=
1
,
rpm_verbose
:
bool
=
False
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
retry
:
int
=
2
):
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
query_per_second
=
query_per_second
,
rpm_verbose
=
rpm_verbose
,
meta_template
=
meta_template
,
retry
=
retry
)
self
.
url
=
url
self
.
headers
=
{
'alles-apin-token'
:
key
,
'content-type'
:
'application/json'
,
}
def
_generate
(
self
,
input
:
str
or
PromptList
,
max_out_len
:
int
,
temperature
:
float
)
->
str
:
"""Generate results given an input.
Args:
inputs (str or PromptList): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
temperature (float): What sampling temperature to use,
between 0 and 2. Higher values like 0.8 will make the output
more random, while lower values like 0.2 will make it more
focused and deterministic.
Returns:
str: The generated string.
"""
assert
isinstance
(
input
,
(
str
,
PromptList
))
if
isinstance
(
input
,
str
):
messages
=
[{
'role'
:
'user'
,
'content'
:
input
}]
else
:
messages
=
[]
for
item
in
input
:
msg
=
{
'content'
:
item
[
'prompt'
]}
if
item
[
'role'
]
==
'HUMAN'
:
msg
[
'role'
]
=
'user'
elif
item
[
'role'
]
==
'BOT'
:
msg
[
'role'
]
=
'assistant'
elif
item
[
'role'
]
==
'SYSTEM'
:
msg
[
'role'
]
=
'system'
messages
.
append
(
msg
)
# model can be response with user and system
# when it comes with agent involved.
assert
msg
[
'role'
]
in
[
'user'
,
'system'
]
data
=
{
'model'
:
self
.
path
,
'messages'
:
messages
,
}
for
_
in
range
(
self
.
retry
):
self
.
wait
()
raw_response
=
requests
.
post
(
self
.
url
,
headers
=
self
.
headers
,
data
=
json
.
dumps
(
data
))
try
:
response
=
raw_response
.
json
()
except
requests
.
JSONDecodeError
:
self
.
logger
.
error
(
'JsonDecode error, got'
,
str
(
raw_response
.
content
))
continue
if
raw_response
.
status_code
==
200
and
response
[
'msgCode'
]
==
'10000'
:
data
=
response
[
'data'
]
choices
=
data
[
'choices'
]
if
choices
is
None
:
self
.
logger
.
error
(
data
)
else
:
return
choices
[
0
][
'message'
][
'content'
].
strip
()
self
.
logger
.
error
(
response
[
'msg'
])
raise
RuntimeError
(
'API call failed.'
)
def
get_token_len
(
self
,
prompt
:
str
)
->
int
:
"""Get lengths of the tokenized string. Only English and Chinese
characters are counted for now. Users are encouraged to override this
method if more accurate length is needed.
Args:
prompt (str): Input string.
Returns:
int: Length of the input tokens
"""
enc
=
self
.
tiktoken
.
encoding_for_model
(
self
.
path
)
return
len
(
enc
.
encode
(
prompt
))
opencompass/runners/local_api.py
View file @
bfe4aa2a
...
...
@@ -172,7 +172,8 @@ class LocalAPIRunner(BaseRunner):
self
.
max_num_workers
=
max_num_workers
self
.
concurrent_users
=
concurrent_users
assert
task
[
'type'
]
in
[
'OpenICLInferTask'
,
'opencompass.tasks.OpenICLInferTask'
'OpenICLInferTask'
,
'opencompass.tasks.OpenICLInferTask'
,
],
'Only supported for api infer task.'
def
launch
(
self
,
tasks
:
List
[
Dict
[
str
,
Any
]])
->
List
[
Tuple
[
str
,
int
]]:
...
...
opencompass/summarizers/alignmentbench.py
View file @
bfe4aa2a
...
...
@@ -15,7 +15,7 @@ try:
except
ImportError
:
from_csv
=
None
from
opencompass.utils
import
dataset_abbr_from_cfg
from
opencompass.utils
import
dataset_abbr_from_cfg
,
model_abbr_from_cfg
CATEGORIES
=
{
'中文推理'
:
[
'数学计算'
,
'逻辑推理'
],
...
...
@@ -91,6 +91,10 @@ class AlignmentBenchSummarizer:
def
__init__
(
self
,
config
:
ConfigDict
)
->
None
:
self
.
tasks
=
[]
self
.
cfg
=
config
self
.
eval_model_cfgs
=
self
.
cfg
[
'eval'
][
'partitioner'
][
'models'
]
self
.
eval_model_abbrs
=
[
model_abbr_from_cfg
(
model
)
for
model
in
self
.
eval_model_cfgs
]
def
summarize
(
self
,
time_str
:
str
=
datetime
.
now
().
strftime
(
'%Y%m%d_%H%M%S'
)):
...
...
@@ -116,6 +120,8 @@ class AlignmentBenchSummarizer:
fout2
=
osp
.
join
(
output_dir
,
'capability.csv'
)
fout_flag
,
fout_flag2
=
0
,
0
for
subdir
in
os
.
listdir
(
results_folder
):
if
subdir
not
in
self
.
eval_model_abbrs
:
continue
subdir_path
=
os
.
path
.
join
(
results_folder
,
subdir
)
if
os
.
path
.
isdir
(
subdir_path
):
model
=
subdir
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment