Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
b39f5015
Unverified
Commit
b39f5015
authored
Apr 09, 2024
by
Fengzhe Zhou
Committed by
GitHub
Apr 09, 2024
Browse files
[Sync] update taco (#1030)
parent
16f29b25
Changes
87
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
590 additions
and
82 deletions
+590
-82
opencompass/models/qwen_api.py
opencompass/models/qwen_api.py
+27
-13
opencompass/models/sensetime_api.py
opencompass/models/sensetime_api.py
+14
-9
opencompass/models/turbomind.py
opencompass/models/turbomind.py
+36
-8
opencompass/models/turbomind_api.py
opencompass/models/turbomind_api.py
+4
-4
opencompass/models/turbomind_tis.py
opencompass/models/turbomind_tis.py
+4
-4
opencompass/models/unigpt_api.py
opencompass/models/unigpt_api.py
+147
-0
opencompass/models/xunfei_api.py
opencompass/models/xunfei_api.py
+4
-4
opencompass/models/yayi_api.py
opencompass/models/yayi_api.py
+261
-0
opencompass/models/zhipuai_api.py
opencompass/models/zhipuai_api.py
+4
-4
opencompass/models/zhipuai_v2_api.py
opencompass/models/zhipuai_v2_api.py
+12
-6
opencompass/openicl/icl_evaluator/lm_evaluator.py
opencompass/openicl/icl_evaluator/lm_evaluator.py
+2
-2
opencompass/openicl/icl_prompt_template.py
opencompass/openicl/icl_prompt_template.py
+4
-4
opencompass/registry.py
opencompass/registry.py
+15
-1
opencompass/runners/dlc.py
opencompass/runners/dlc.py
+27
-10
opencompass/summarizers/default.py
opencompass/summarizers/default.py
+1
-1
opencompass/summarizers/subjective/alignmentbench.py
opencompass/summarizers/subjective/alignmentbench.py
+13
-6
opencompass/summarizers/subjective/compass_arena.py
opencompass/summarizers/subjective/compass_arena.py
+1
-0
opencompass/summarizers/subjective/mtbench.py
opencompass/summarizers/subjective/mtbench.py
+4
-2
opencompass/tasks/openicl_infer.py
opencompass/tasks/openicl_infer.py
+6
-3
opencompass/tasks/outer_eval/alpacaeval.py
opencompass/tasks/outer_eval/alpacaeval.py
+4
-1
No files found.
opencompass/models/qwen_api.py
View file @
b39f5015
...
...
@@ -48,13 +48,13 @@ class Qwen(BaseAPIModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -71,13 +71,13 @@ class Qwen(BaseAPIModel):
def
_generate
(
self
,
input
:
str
or
Prompt
List
,
input
:
Prompt
Type
,
max_out_len
:
int
=
512
,
)
->
str
:
"""Generate results given an input.
Args:
inputs (
str or
Prompt
List
): A string or PromptDict.
inputs (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -103,16 +103,26 @@ class Qwen(BaseAPIModel):
messages
=
[{
'role'
:
'user'
,
'content'
:
input
}]
else
:
messages
=
[]
for
item
in
input
:
msg
=
{
'content'
:
item
[
'prompt'
]}
if
item
[
'role'
]
==
'
HUMAN
'
:
msg
[
'
role
'
]
=
'
user
'
msg_buffer
,
last_role
=
[],
None
for
index
,
item
in
enumerate
(
input
):
if
index
==
0
and
item
[
'role'
]
==
'
SYSTEM
'
:
role
=
'
system
'
elif
item
[
'role'
]
==
'BOT'
:
msg
[
'role'
]
=
'assistant'
elif
item
[
'role'
]
==
'SYSTEM'
:
msg
[
'role'
]
=
'system'
messages
.
append
(
msg
)
role
=
'assistant'
else
:
role
=
'user'
if
role
!=
last_role
and
last_role
is
not
None
:
messages
.
append
({
'content'
:
'
\n
'
.
join
(
msg_buffer
),
'role'
:
last_role
})
msg_buffer
=
[]
msg_buffer
.
append
(
item
[
'prompt'
])
last_role
=
role
messages
.
append
({
'content'
:
'
\n
'
.
join
(
msg_buffer
),
'role'
:
last_role
})
data
=
{
'messages'
:
messages
}
data
.
update
(
self
.
generation_kwargs
)
...
...
@@ -142,6 +152,8 @@ class Qwen(BaseAPIModel):
if
response
.
status_code
==
200
:
try
:
msg
=
response
.
output
.
text
print
(
'='
*
128
)
print
(
msg
)
return
msg
except
KeyError
:
print
(
response
)
...
...
@@ -153,6 +165,8 @@ class Qwen(BaseAPIModel):
time
.
sleep
(
2
)
continue
if
response
.
status_code
==
400
:
print
(
'='
*
128
)
print
(
response
)
msg
=
'Output data may contain inappropriate content.'
return
msg
...
...
opencompass/models/sensetime_api.py
View file @
b39f5015
...
...
@@ -61,13 +61,13 @@ class SenseTime(BaseAPIModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -84,13 +84,13 @@ class SenseTime(BaseAPIModel):
def
_generate
(
self
,
input
:
str
or
Prompt
List
,
input
:
Prompt
Type
,
max_out_len
:
int
=
512
,
)
->
str
:
"""Generate results given an input.
Args:
inputs (
str or
Prompt
List
): A string or PromptDict.
inputs (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -114,7 +114,8 @@ class SenseTime(BaseAPIModel):
messages
.
append
(
msg
)
data
=
{
'messages'
:
messages
,
'model'
:
self
.
model
}
data
.
update
(
self
.
params
)
if
self
.
params
is
not
None
:
data
.
update
(
self
.
params
)
stream
=
data
[
'stream'
]
...
...
@@ -123,10 +124,14 @@ class SenseTime(BaseAPIModel):
self
.
acquire
()
max_num_retries
+=
1
raw_response
=
requests
.
request
(
'POST'
,
url
=
self
.
url
,
headers
=
self
.
headers
,
json
=
data
)
try
:
raw_response
=
requests
.
request
(
'POST'
,
url
=
self
.
url
,
headers
=
self
.
headers
,
json
=
data
)
except
Exception
:
time
.
sleep
(
1
)
continue
requests_id
=
raw_response
.
headers
[
'X-Request-Id'
]
# noqa
self
.
release
()
...
...
opencompass/models/turbomind.py
View file @
b39f5015
import
copy
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
,
Union
...
...
@@ -70,11 +71,10 @@ class TurboMindModel(BaseModel):
self
.
gen_config
=
gen_config
self
.
end_str
=
end_str
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
=
512
,
**
kwargs
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
...
...
@@ -93,6 +93,15 @@ class TurboMindModel(BaseModel):
inputs
[
i
:
i
+
batch_size
]
for
i
in
range
(
0
,
len
(
inputs
),
batch_size
)
]
gen_config
=
copy
.
deepcopy
(
self
.
gen_config
)
if
'do_sample'
in
kwargs
:
if
kwargs
[
'do_sample'
]:
gen_config
.
top_k
=
1000
gen_config
.
temperature
=
kwargs
.
get
(
'temperature'
,
1
)
else
:
gen_config
.
top_k
=
1
gen_config
.
temperature
=
0.01
results
=
[]
for
batch_input
in
batch_inputs
:
with
ThreadPoolExecutor
()
as
executor
:
...
...
@@ -103,7 +112,7 @@ class TurboMindModel(BaseModel):
self
.
generator_ids
[:
len
(
batch_input
)],
batch_input
,
[
max_out_len
]
*
len
(
batch_input
),
[
self
.
gen_config
]
*
len
(
batch_input
),
[
gen_config
]
*
len
(
batch_input
),
[
self
.
end_str
]
*
len
(
batch_input
),
))
results
+=
_results
...
...
@@ -123,14 +132,14 @@ class TurboMindModel(BaseModel):
def
_generate
(
self
,
generator
,
session_id
,
prompt
:
str
or
Prompt
List
,
prompt
:
Prompt
Type
,
max_out_len
:
int
,
gen_config
=
None
,
end_str
:
Optional
[
str
]
=
None
)
->
str
:
"""Generate results given a list of inputs.
Args:
prompt (
str or
Prompt
List
): A string or PromptDict.
prompt (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -189,3 +198,22 @@ class TurboMindModel(BaseModel):
results
.
append
(
res
)
results
=
np
.
concatenate
(
results
)
return
results
def
get_loglikelihood
(
self
,
inputs
:
List
[
str
],
conts
:
List
[
str
],
mask_length
:
Optional
[
List
[
int
]]
=
None
)
->
List
[
float
]:
assert
isinstance
(
inputs
,
List
),
f
'List(str) is expected, but got
{
type
(
inputs
)
}
'
results
=
[]
for
text
,
cont
in
zip
(
inputs
,
conts
):
input_ids
=
self
.
tokenizer
.
encode
(
text
)
res
=
self
.
generators
[
0
].
get_ppl
(
input_ids
)
logit_sum
=
res
*
len
(
input_ids
)
input_ids
=
self
.
tokenizer
.
encode
(
text
.
replace
(
cont
,
''
))
res
=
self
.
generators
[
0
].
get_ppl
(
input_ids
)
logit_part
=
res
*
len
(
input_ids
)
results
.
append
(
-
(
logit_sum
-
logit_part
))
results
=
np
.
concatenate
(
results
)
return
results
opencompass/models/turbomind_api.py
View file @
b39f5015
...
...
@@ -60,14 +60,14 @@ class TurboMindAPIModel(BaseModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
temperature
:
float
=
1.0
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -102,12 +102,12 @@ class TurboMindAPIModel(BaseModel):
"""
return
self
.
token_bucket
.
get_token
()
def
_generate
(
self
,
prompt
:
str
or
Prompt
List
,
max_out_len
:
int
,
def
_generate
(
self
,
prompt
:
Prompt
Type
,
max_out_len
:
int
,
temperature
:
float
,
end_str
:
str
)
->
str
:
"""Generate results given a list of inputs.
Args:
prompt (
str or
Prompt
List
): A string or PromptDict.
prompt (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
opencompass/models/turbomind_tis.py
View file @
b39f5015
...
...
@@ -58,14 +58,14 @@ class TurboMindTisModel(BaseModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
temperature
:
float
=
1.0
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -96,12 +96,12 @@ class TurboMindTisModel(BaseModel):
"""
return
self
.
token_bucket
.
get_token
()
def
_generate
(
self
,
prompt
:
str
or
Prompt
List
,
max_out_len
:
int
,
def
_generate
(
self
,
prompt
:
Prompt
Type
,
max_out_len
:
int
,
temperature
:
float
)
->
str
:
"""Generate results given a list of inputs.
Args:
prompt (
str or
Prompt
List
): A string or PromptDict.
prompt (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
opencompass/models/unigpt_api.py
0 → 100644
View file @
b39f5015
import
hashlib
import
time
import
uuid
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
,
Union
import
requests
from
opencompass.utils.prompt
import
PromptList
from
.base_api
import
BaseAPIModel
PromptType
=
Union
[
PromptList
,
str
]
def
get_sign
(
appkey
,
udid
,
timestamp
,
secret
):
original_str
=
f
'
{
appkey
}{
udid
}{
timestamp
}{
secret
}
'
sign
=
''
try
:
md
=
hashlib
.
sha256
()
md
.
update
(
original_str
.
encode
(
'utf-8'
))
bytes_result
=
md
.
digest
()
for
byte
in
bytes_result
:
hex_value
=
format
(
byte
,
'02X'
)
sign
+=
hex_value
.
upper
()
except
Exception
as
e
:
print
(
e
)
return
sign
class
UniGPT
(
BaseAPIModel
):
def
__init__
(
self
,
path
:
str
,
appkey
:
str
,
secret
:
str
,
url
:
str
,
query_per_second
:
int
=
2
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
retry
:
int
=
2
,
temperature
:
float
=
0.2
,
):
# noqa E125
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
query_per_second
=
query_per_second
,
meta_template
=
meta_template
,
retry
=
retry
,
)
self
.
appkey
=
appkey
self
.
secret
=
secret
self
.
udid
=
str
(
uuid
.
uuid1
())
self
.
url
=
url
self
.
model
=
path
self
.
temperature
=
temperature
def
generate
(
self
,
inputs
:
List
[
PromptType
],
max_out_len
:
int
=
512
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with
ThreadPoolExecutor
()
as
executor
:
results
=
list
(
executor
.
map
(
self
.
_generate
,
inputs
,
[
max_out_len
]
*
len
(
inputs
)))
self
.
flush
()
return
results
def
_generate
(
self
,
input
:
PromptType
,
max_out_len
:
int
=
512
)
->
str
:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert
isinstance
(
input
,
(
str
,
PromptList
))
if
isinstance
(
input
,
str
):
messages
=
[{
'role'
:
'user'
,
'content'
:
input
}]
else
:
messages
=
[]
for
item
in
input
:
msg
=
{
'content'
:
item
[
'prompt'
]}
if
item
[
'role'
]
==
'HUMAN'
:
msg
[
'role'
]
=
'user'
elif
item
[
'role'
]
==
'BOT'
:
msg
[
'role'
]
=
'assistant'
elif
item
[
'role'
]
==
'SYSTEM'
:
msg
[
'role'
]
=
'system'
messages
.
append
(
msg
)
data
=
{
'model'
:
self
.
path
,
'temperature'
:
self
.
temperature
,
'messages'
:
messages
,
'max_tokens'
:
max_out_len
,
}
timestamp
=
str
(
int
(
time
.
time
())
*
1000
)
headers
=
{
'appkey'
:
self
.
appkey
,
'sign'
:
get_sign
(
self
.
appkey
,
self
.
udid
,
timestamp
,
self
.
secret
),
'stream'
:
'false'
,
'timestamp'
:
timestamp
,
'udid'
:
self
.
udid
,
'censor'
:
'none'
,
}
for
_
in
range
(
self
.
retry
):
try
:
response
=
requests
.
post
(
self
.
url
,
json
=
data
,
headers
=
headers
)
except
Exception
as
e
:
print
(
e
)
continue
if
response
is
None
or
response
.
status_code
!=
200
:
code
=
response
.
status_code
if
response
else
-
1
print
(
f
'request err, status_code:
{
code
}
'
)
time
.
sleep
(
10
)
continue
try
:
response
=
response
.
json
()
except
Exception
as
e
:
print
(
e
)
continue
print
(
response
)
if
response
.
get
(
'errorCode'
)
==
'8500502'
:
return
'context_length_exceeded'
return
response
[
'result'
][
'choices'
][
0
][
'message'
][
'content'
]
raise
RuntimeError
(
f
'Failed to respond in
{
self
.
retry
}
retrys'
)
opencompass/models/xunfei_api.py
View file @
b39f5015
...
...
@@ -98,13 +98,13 @@ class XunFei(BaseAPIModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -121,13 +121,13 @@ class XunFei(BaseAPIModel):
def
_generate
(
self
,
input
:
str
or
Prompt
List
,
input
:
Prompt
Type
,
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given an input.
Args:
inputs (
str or
Prompt
List
): A string or PromptDict.
inputs (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
opencompass/models/yayi_api.py
0 → 100644
View file @
b39f5015
import
base64
import
hashlib
import
hmac
import
random
import
string
import
time
from
concurrent.futures
import
ThreadPoolExecutor
from
datetime
import
datetime
from
typing
import
Dict
,
List
,
Optional
,
Union
import
requests
from
opencompass.utils.prompt
import
PromptList
from
.base_api
import
BaseAPIModel
PromptType
=
Union
[
PromptList
,
str
]
def
generate_random_string
(
length
=
16
):
"""生成随机串.
:param length: 随机串长度,默认为 16
:return: 随机串
"""
letters
=
string
.
ascii_letters
+
string
.
digits
rand_str
=
''
.
join
(
random
.
choice
(
letters
)
for
i
in
range
(
length
))
return
rand_str
def
get_current_time
(
format
=
'%Y-%m-%d %H:%M:%S'
):
"""获取当前时间.
:param format: 时间格式,默认为 '%H:%M:%S'
:return: 当前时间字符串
"""
now
=
datetime
.
now
()
time_str
=
now
.
strftime
(
format
)
return
time_str
def
get_current_timestamp
():
"""
获取当前时间时间戳
:return:
"""
timestamp_str
=
int
(
round
(
time
.
time
()
*
1000
))
return
str
(
timestamp_str
)
def
encode_base64_string
(
s
):
"""对字符串进行 Base64 编码.
:param s: 字符串
:return: 编码后的字符串
"""
encoded
=
base64
.
b64encode
(
s
).
decode
()
return
encoded
def
get_current_time_gmt_format
():
"""
获取当前时间的GMT 时间
:return:
"""
GMT_FORMAT
=
'%a, %d %b %Y %H:%M:%SGMT+00:00'
now
=
datetime
.
now
()
time_str
=
now
.
strftime
(
GMT_FORMAT
)
return
time_str
class
Yayi
(
BaseAPIModel
):
"""Model wrapper around SenseTime.
Args:
path (str): The name of SenseTime model.
e.g. `nova-ptc-xl-v1`
key (str): Authorization key.
query_per_second (int): The maximum queries allowed per second
between two consecutive calls of the API. Defaults to 1.
max_seq_len (int): Unused here.
meta_template (Dict, optional): The model's meta prompt
template if needed, in case the requirement of injecting or
wrapping of any meta instructions.
retry (int): Number of retires if the API call fails. Defaults to 2.
"""
def
__init__
(
self
,
path
:
str
,
url
:
str
,
url_path
:
str
,
x_tilake_app_key
:
str
,
x_tilake_app_secret
:
str
,
x_tilake_ca_sginature_method
:
str
,
query_per_second
:
int
=
2
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
retry
:
int
=
2
,
temperature
:
float
=
0.4
,
):
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
query_per_second
=
query_per_second
,
meta_template
=
meta_template
,
retry
=
retry
,
)
self
.
url
=
url
self
.
url_path
=
url_path
self
.
X_TILAKE_APP_KEY
=
x_tilake_app_key
self
.
X_TILAKE_APP_SECRET
=
x_tilake_app_secret
self
.
X_TILAKE_CA_SGINATURE_METHOD
=
x_tilake_ca_sginature_method
self
.
temperature
=
temperature
self
.
model
=
path
def
generate_signature
(
self
,
method
,
accept
,
content_type
,
date
,
url_path
):
"""生成签名.
:param method:
:param accept:
:param content_type:
:param date:
:param url_path:
:return:
"""
string_to_sign
=
(
method
+
'
\n
'
+
accept
+
'
\n
'
+
content_type
+
'
\n
'
+
date
+
'
\n
'
+
url_path
)
string_to_sign
=
string_to_sign
.
encode
(
'utf-8'
)
secret_key
=
self
.
X_TILAKE_APP_SECRET
.
encode
(
'utf-8'
)
signature
=
hmac
.
new
(
secret_key
,
string_to_sign
,
hashlib
.
sha256
).
digest
()
return
encode_base64_string
(
signature
)
def
generate_header
(
self
,
content_type
,
accept
,
date
,
signature
):
"""生成请求头参数.
:param content_type:
:param accept:
:return:
"""
headers
=
{
'x-tilake-app-key'
:
self
.
X_TILAKE_APP_KEY
,
'x-tilake-ca-signature-method'
:
self
.
X_TILAKE_CA_SGINATURE_METHOD
,
'x-tilake-ca-timestamp'
:
get_current_timestamp
(),
'x-tilake-ca-nonce'
:
generate_random_string
(),
'x-tilake-ca-signature'
:
signature
,
'Date'
:
date
,
'Content-Type'
:
content_type
,
'Accept'
:
accept
,
}
return
headers
def
generate
(
self
,
inputs
:
List
[
PromptType
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[PromptType]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
List[str]: A list of generated strings.
"""
with
ThreadPoolExecutor
()
as
executor
:
results
=
list
(
executor
.
map
(
self
.
_generate
,
inputs
,
[
max_out_len
]
*
len
(
inputs
)))
self
.
flush
()
return
results
def
_generate
(
self
,
input
:
PromptType
,
max_out_len
:
int
=
512
,
)
->
str
:
"""Generate results given an input.
Args:
inputs (PromptType): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
Returns:
str: The generated string.
"""
assert
isinstance
(
input
,
(
str
,
PromptList
))
if
isinstance
(
input
,
str
):
messages
=
[{
'role'
:
'user'
,
'content'
:
input
}]
else
:
messages
=
[]
msg_buffer
,
last_role
=
[],
None
for
item
in
input
:
item
[
'role'
]
=
'yayi'
if
item
[
'role'
]
==
'BOT'
else
'user'
if
item
[
'role'
]
!=
last_role
and
last_role
is
not
None
:
messages
.
append
({
'content'
:
'
\n
'
.
join
(
msg_buffer
),
'role'
:
last_role
})
msg_buffer
=
[]
msg_buffer
.
append
(
item
[
'prompt'
])
last_role
=
item
[
'role'
]
messages
.
append
({
'content'
:
'
\n
'
.
join
(
msg_buffer
),
'role'
:
last_role
})
date
=
get_current_time_gmt_format
()
content_type
=
'application/json'
accept
=
'*/*'
method
=
'POST'
data
=
{
'id'
:
'001'
,
# 请求id,无需修改。
'model'
:
self
.
model
,
'messages'
:
messages
,
'max_new_tokens'
:
max_out_len
,
# max_new_tokens及以下参数可根据实际任务进行调整。
'temperature'
:
self
.
temperature
,
'presence_penalty'
:
0.85
,
'frequency_penalty'
:
0.16
,
'do_sample'
:
True
,
'top_p'
:
1.0
,
'top_k'
:
-
1
,
}
for
_
in
range
(
self
.
retry
):
signature_str
=
self
.
generate_signature
(
method
=
method
,
accept
=
accept
,
content_type
=
content_type
,
date
=
date
,
url_path
=
self
.
url_path
)
headers
=
self
.
generate_header
(
content_type
=
content_type
,
accept
=
accept
,
date
=
date
,
signature
=
signature_str
)
try
:
response
=
requests
.
post
(
self
.
url
,
json
=
data
,
headers
=
headers
)
except
Exception
as
e
:
print
(
e
)
continue
try
:
response
=
response
.
json
()
except
Exception
as
e
:
print
(
e
)
continue
print
(
response
)
try
:
return
response
[
'data'
][
'choices'
][
0
][
'message'
][
'content'
]
except
Exception
as
e
:
print
(
e
)
continue
raise
RuntimeError
(
f
'Failed to respond in
{
self
.
retry
}
retrys'
)
opencompass/models/zhipuai_api.py
View file @
b39f5015
...
...
@@ -44,13 +44,13 @@ class ZhiPuAI(BaseAPIModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -67,13 +67,13 @@ class ZhiPuAI(BaseAPIModel):
def
_generate
(
self
,
input
:
str
or
Prompt
List
,
input
:
Prompt
Type
,
max_out_len
:
int
=
512
,
)
->
str
:
"""Generate results given an input.
Args:
inputs (
str or
Prompt
List
): A string or PromptDict.
inputs (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
opencompass/models/zhipuai_v2_api.py
View file @
b39f5015
...
...
@@ -2,8 +2,6 @@ import time
from
concurrent.futures
import
ThreadPoolExecutor
from
typing
import
Dict
,
List
,
Optional
,
Union
from
httpx
import
ProxyError
from
opencompass.utils.prompt
import
PromptList
from
.base_api
import
BaseAPIModel
...
...
@@ -59,13 +57,13 @@ class ZhiPuV2AI(BaseAPIModel):
def
generate
(
self
,
inputs
:
List
[
str
or
Prompt
List
],
inputs
:
List
[
Prompt
Type
],
max_out_len
:
int
=
512
,
)
->
List
[
str
]:
"""Generate results given a list of inputs.
Args:
inputs (List[
str or
Prompt
List
]): A list of strings or PromptDicts.
inputs (List[Prompt
Type
]): A list of strings or PromptDicts.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -82,13 +80,13 @@ class ZhiPuV2AI(BaseAPIModel):
def
_generate
(
self
,
input
:
str
or
Prompt
List
,
input
:
Prompt
Type
,
max_out_len
:
int
=
512
,
)
->
str
:
"""Generate results given an input.
Args:
inputs (
str or
Prompt
List
): A string or PromptDict.
inputs (Prompt
Type
): A string or PromptDict.
The PromptDict should be organized in OpenCompass'
API format.
max_out_len (int): The maximum length of the output.
...
...
@@ -103,6 +101,8 @@ class ZhiPuV2AI(BaseAPIModel):
else
:
messages
=
[]
for
item
in
input
:
if
not
item
[
'prompt'
]:
continue
msg
=
{
'content'
:
item
[
'prompt'
]}
if
item
[
'role'
]
==
'HUMAN'
:
msg
[
'role'
]
=
'user'
...
...
@@ -115,11 +115,15 @@ class ZhiPuV2AI(BaseAPIModel):
data
=
{
'model'
:
self
.
model
,
'messages'
:
messages
}
data
.
update
(
self
.
generation_kwargs
)
from
pprint
import
pprint
print
(
'-'
*
128
)
pprint
(
data
)
max_num_retries
=
0
while
max_num_retries
<
self
.
retry
:
self
.
acquire
()
response
=
None
from
httpx
import
ProxyError
try
:
response
=
self
.
client
.
chat
.
completions
.
create
(
**
data
)
...
...
@@ -161,6 +165,8 @@ class ZhiPuV2AI(BaseAPIModel):
# msg = response['data']['choices'][0]['content']
else
:
msg
=
response
.
choices
[
0
].
message
.
content
print
(
'='
*
128
)
print
(
msg
)
return
msg
# sensitive content, prompt overlength, network error
# or illegal prompt
...
...
opencompass/openicl/icl_evaluator/lm_evaluator.py
View file @
b39f5015
...
...
@@ -120,7 +120,7 @@ class LMEvaluator:
meta
:
Optional
[
bool
]
=
False
,
infer_order
:
Optional
[
str
]
=
'random'
)
->
Dict
:
dup_indices
=
[]
if
typ
e
(
predictions
)
==
list
:
if
isinstanc
e
(
predictions
,
list
)
:
"""Apply to multi-model comparison."""
references
=
[{}
for
_
in
range
(
len
(
predictions
[
0
][
'model_preds'
]))
]
if
references
is
None
else
references
...
...
@@ -137,7 +137,7 @@ class LMEvaluator:
if
len
(
set
(
check
))
==
1
:
dup_indices
.
append
(
i
)
elif
typ
e
(
predictions
)
==
dict
:
elif
isinstanc
e
(
predictions
,
dict
)
:
"""Apply to single-model scoring."""
references
=
[{}
for
_
in
range
(
len
(
predictions
[
0
][
'model_preds'
]))
]
if
references
is
None
else
references
...
...
opencompass/openicl/icl_prompt_template.py
View file @
b39f5015
...
...
@@ -77,7 +77,7 @@ class PromptTemplate:
label (:obj:`Hashable`): The value of the output field.
Returns:
str or
Prompt
List
: The generated in-context example.
Prompt
Type
: The generated in-context example.
"""
# Select the corresponding template
if
isinstance
(
self
.
template
,
str
)
or
self
.
prompt_type
==
'meta'
:
...
...
@@ -114,7 +114,7 @@ class PromptTemplate:
entry (:obj:`Dict`): A piece of data containing the input field
content.
ice (
str or
Prompt
List
): The generated in-context example.
ice (Prompt
Type
): The generated in-context example.
label (:obj:`Hashable`): The value of the output field.
remain_sep (:obj:`bool`): If remain sep_token
...
...
@@ -165,7 +165,7 @@ class PromptTemplate:
the :obj:`ice_token`. Defaults to ``''``.
Returns:
str or
Prompt
List
: The generated item.
Prompt
Type
: The generated item.
"""
template
=
None
if
isinstance
(
self
.
template
,
str
):
...
...
@@ -220,7 +220,7 @@ class PromptTemplate:
examples.
Returns:
str or
Prompt
List
: The encoded template.
Prompt
Type
: The encoded template.
"""
if
isinstance
(
prompt_template
,
str
):
return
prompt_template
...
...
opencompass/registry.py
View file @
b39f5015
from
typing
import
Callable
,
List
,
Optional
,
Type
,
Union
from
mmengine.registry
import
DATASETS
as
MMENGINE_DATASETS
from
mmengine.registry
import
METRICS
as
MMENGINE_METRICS
from
mmengine.registry
import
MODELS
as
MMENGINE_MODELS
from
mmengine.registry
import
Registry
from
mmengine.registry
import
Registry
as
OriginalRegistry
class
Registry
(
OriginalRegistry
):
# override the default force behavior
def
register_module
(
self
,
name
:
Optional
[
Union
[
str
,
List
[
str
]]]
=
None
,
force
:
bool
=
True
,
module
:
Optional
[
Type
]
=
None
)
->
Union
[
type
,
Callable
]:
return
super
().
register_module
(
name
,
force
,
module
)
PARTITIONERS
=
Registry
(
'partitioner'
,
locations
=
[
'opencompass.partitioners'
])
RUNNERS
=
Registry
(
'runner'
,
locations
=
[
'opencompass.runners'
])
...
...
opencompass/runners/dlc.py
View file @
b39f5015
...
...
@@ -118,6 +118,7 @@ class DLCRunner(BaseRunner):
conda_env_name
=
self
.
aliyun_cfg
[
'conda_env_name'
]
shell_cmd
=
(
f
'source
{
bashrc_path
}
; '
f
'conda activate
{
conda_env_name
}
; '
)
shell_cmd
+=
f
'export PYTHONPATH=
{
pwd
}
:$PYTHONPATH; '
else
:
# using public conda env
# users can also set `python_env_path` to their
...
...
@@ -151,6 +152,11 @@ class DLCRunner(BaseRunner):
if
hf_endpoint
is
not
None
:
shell_cmd
+=
f
'export HF_ENDPOINT=
{
hf_endpoint
}
; '
extra_envs
=
self
.
aliyun_cfg
.
get
(
'extra_envs'
)
if
extra_envs
is
not
None
:
for
extra_env
in
extra_envs
:
shell_cmd
+=
f
'export
{
extra_env
}
; '
shell_cmd
+=
f
'cd
{
pwd
}
; '
shell_cmd
+=
'{task_cmd}'
...
...
@@ -161,9 +167,9 @@ class DLCRunner(BaseRunner):
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
f
" --workspace_id
{
self
.
aliyun_cfg
[
'workspace_id'
]
}
"
' --worker_count 1'
f
' --worker_cpu
{
max
(
num_gpus
*
8
,
3
2
)
}
'
f
' --worker_cpu
{
max
(
num_gpus
*
8
,
1
2
)
}
'
f
' --worker_gpu
{
num_gpus
}
'
f
' --worker_memory
{
max
(
num_gpus
*
128
,
256
)
}
'
f
' --worker_memory
{
max
(
num_gpus
*
128
,
192
)
}
'
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
)
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
...
...
@@ -185,14 +191,25 @@ class DLCRunner(BaseRunner):
time
.
sleep
(
random
.
randint
(
0
,
10
))
def
_run_within_retry
():
output
=
subprocess
.
getoutput
(
cmd
)
match
=
re
.
search
(
r
'\|\s+(dlc[0-9a-z]+)\s+\|'
,
output
)
if
match
is
None
:
raise
RuntimeError
(
f
'Failed to launch dlc job for
{
output
}
'
)
num_retry_to_start
=
5
index_to_start
=
0
while
index_to_start
<
num_retry_to_start
:
index_to_start
+=
1
output
=
subprocess
.
getoutput
(
cmd
)
match
=
re
.
search
(
r
'\|\s+(dlc[0-9a-z]+)\s+\|'
,
output
)
if
match
is
None
:
stdout
.
write
(
'Failed to get job id from output:'
)
stdout
.
write
(
output
)
if
index_to_start
<
num_retry_to_start
:
stdout
.
write
(
f
'Retry #
{
index_to_start
}
starting'
)
time
.
sleep
(
2
)
continue
else
:
job_id
=
match
.
group
(
1
)
stdout
.
write
(
output
)
break
else
:
job_id
=
match
.
group
(
1
)
stdout
.
write
(
output
)
raise
RuntimeError
(
f
'Cannot get job id from
{
output
}
'
)
pod_create_time
=
None
pri_time
=
None
...
...
@@ -200,7 +217,7 @@ class DLCRunner(BaseRunner):
while
True
:
# 1. Avoid to request dlc too frequently.
# 2. DLC job may not be ready immediately after creation.
for
_
in
range
(
5
):
for
_
in
range
(
20
):
time
.
sleep
(
2
)
try
:
job_info
=
json
.
loads
(
...
...
opencompass/summarizers/default.py
View file @
b39f5015
...
...
@@ -17,7 +17,7 @@ from opencompass.utils import (LarkReporter, dataset_abbr_from_cfg,
from
opencompass.utils.prompt
import
get_prompt_hash
METRIC_WHITELIST
=
[
'score'
,
'auc_score'
,
'accuracy'
,
'humaneval_pass@1'
,
'rouge1'
,
'avg_toxicity_score'
,
'bleurt_diff'
,
'matthews_correlation'
,
'truth'
,
'f1'
,
'exact_match'
]
METRIC_BLACKLIST
=
[
'bp'
,
'sys_len'
,
'ref_len'
]
METRIC_BLACKLIST
=
[
'bp'
,
'sys_len'
,
'ref_len'
,
'tool_rate'
]
def
model_abbr_from_cfg_used_in_summarizer
(
model
):
if
model
.
get
(
'summarizer_abbr'
,
None
):
...
...
opencompass/summarizers/subjective/alignmentbench.py
View file @
b39f5015
...
...
@@ -218,8 +218,9 @@ def get_dimension_results(judged_answers, references, fout, fout_flag, model):
dimension_avg_ratings
=
defaultdict
(
float
)
for
dimension
,
total_score
in
dimension_ratings
.
items
():
dimension_avg_ratings
[
dimension
]
=
total_score
/
dimension_counts
[
dimension
]
s
=
total_score
/
dimension_counts
[
dimension
]
s
=
round
(
s
,
2
)
dimension_avg_ratings
[
dimension
]
=
s
scores
=
{
model
:
dimension_avg_ratings
}
rows
=
list
(
scores
.
keys
())
...
...
@@ -249,8 +250,9 @@ def get_capability_results(judged_answers,
capability_avg_ratings
=
defaultdict
(
float
)
for
capability
,
total_score
in
capability_ratings
.
items
():
capability_avg_ratings
[
capability
]
=
total_score
/
capability_counts
[
capability
]
s
=
total_score
/
capability_counts
[
capability
]
s
=
round
(
s
,
2
)
capability_avg_ratings
[
capability
]
=
s
temp_list
=
[]
total_column_num
=
2
...
...
@@ -260,11 +262,14 @@ def get_capability_results(judged_answers,
np
.
mean
(
capability_avg_ratings
[
cat
])
for
cat
in
categories
[
category
]
])
capability_avg_ratings
[
category
+
'总分'
]
=
round
(
capability_avg_ratings
[
category
+
'总分'
],
2
)
temp_list
.
append
(
category
+
'总分'
)
capability_avg_ratings
[
'总分'
]
=
0
for
temp
in
temp_list
:
capability_avg_ratings
[
'总分'
]
+=
capability_avg_ratings
[
temp
]
capability_avg_ratings
[
'总分'
]
/=
len
(
temp_list
)
capability_avg_ratings
[
'总分'
]
=
round
(
capability_avg_ratings
[
'总分'
],
2
)
scores
=
{
model
:
capability_avg_ratings
}
with
open
(
fout
,
'a+'
,
newline
=
''
)
as
csvfile
:
...
...
@@ -365,8 +370,10 @@ class AlignmentBenchSummarizer:
print
(
subdir_path
+
' is not exist! please check!'
)
if
self
.
judge_type
==
'general'
:
with
open
(
fout
,
'r'
)
as
f
:
x
=
from_csv
(
f
)
x
=
from_csv
(
f
,
delimiter
=
','
)
print
(
x
)
print
(
fout
)
with
open
(
fout2
,
'r'
)
as
f
:
x
=
from_csv
(
f
)
x
=
from_csv
(
f
,
delimiter
=
','
)
print
(
x
)
print
(
fout2
)
opencompass/summarizers/subjective/compass_arena.py
View file @
b39f5015
...
...
@@ -229,4 +229,5 @@ class CompassArenaSummarizer:
for
fout
in
fout_list
:
with
open
(
fout
,
'r'
)
as
f
:
x
=
from_csv
(
f
)
print
(
fout
)
print
(
x
)
opencompass/summarizers/subjective/mtbench.py
View file @
b39f5015
...
...
@@ -65,8 +65,9 @@ def get_capability_results(
capability_avg_ratings
=
defaultdict
(
float
)
for
capability
,
total_score
in
capability_ratings
.
items
():
capability_avg_ratings
[
capability
]
=
total_score
/
capability_counts
[
capability
]
s
=
total_score
/
capability_counts
[
capability
]
s
=
round
(
s
,
2
)
capability_avg_ratings
[
capability
]
=
s
columns
=
list
(
capability_avg_ratings
.
keys
())
columns
.
insert
(
0
,
columns
.
pop
(
columns
.
index
(
'total'
)))
with
open
(
fout
,
'a+'
,
newline
=
''
)
as
csvfile
:
...
...
@@ -142,5 +143,6 @@ class MTBenchSummarizer(CompassArenaSummarizer):
with
open
(
fout
,
'r'
)
as
f
:
x
=
from_csv
(
f
)
print
(
x
)
print
(
fout
)
elif
self
.
judge_type
==
'pair'
:
super
().
summarize
()
opencompass/tasks/openicl_infer.py
View file @
b39f5015
...
...
@@ -43,9 +43,12 @@ class OpenICLInferTask(BaseTask):
the command.
"""
script_path
=
__file__
has_vllm
=
(
'VLLM'
in
str
(
self
.
model_cfgs
[
0
].
get
(
'type'
,
''
)))
or
\
'VLLM'
in
str
(
self
.
model_cfgs
[
0
].
get
(
'llm'
,
{}).
get
(
'type'
,
''
))
if
self
.
num_gpus
>
0
and
not
has_vllm
:
backend_keys
=
[
'VLLM'
,
'Lmdeploy'
]
use_backend
=
any
(
key
in
str
(
self
.
model_cfgs
[
0
].
get
(
'type'
,
''
))
or
key
in
str
(
self
.
model_cfgs
[
0
].
get
(
'llm'
,
{}).
get
(
'type'
,
''
))
for
key
in
backend_keys
)
if
self
.
num_gpus
>
0
and
not
use_backend
:
port
=
random
.
randint
(
12000
,
32000
)
command
=
(
f
'torchrun --master_port=
{
port
}
'
f
'--nproc_per_node
{
self
.
num_procs
}
'
...
...
opencompass/tasks/outer_eval/alpacaeval.py
View file @
b39f5015
...
...
@@ -120,7 +120,10 @@ class AlpacaEvalTask(BaseTask):
filename
=
get_infer_output_path
(
m_cfg
,
dataset_cfg
,
osp
.
join
(
work_dir
,
'predictions'
))
output_path
=
osp
.
join
(
work_dir
,
'results'
,
m_cfg
[
'abbr'
])
command
=
f
'export OPENAI_API_KEY=
{
api_key
}
; alpaca_eval --model_outputs
{
filename
}
--annotators_config
{
alpaca_cfg
}
--output_path
{
output_path
}
'
command
=
''
if
api_key
is
not
None
:
command
+=
f
'export OPENAI_API_KEY=
{
api_key
}
; '
command
+=
f
'alpaca_eval --model_outputs
{
filename
}
--annotators_config
{
alpaca_cfg
}
--output_path
{
output_path
}
'
return
template
.
format
(
task_cmd
=
command
)
def
run
(
self
):
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment