Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
2b3d4150
Unverified
Commit
2b3d4150
authored
May 21, 2024
by
Fengzhe Zhou
Committed by
GitHub
May 21, 2024
Browse files
[Sync] update evaluator (#1175)
parent
296ea599
Changes
24
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
60 additions
and
6 deletions
+60
-6
opencompass/datasets/mbpp.py
opencompass/datasets/mbpp.py
+4
-3
opencompass/models/huggingface_above_v4_33.py
opencompass/models/huggingface_above_v4_33.py
+14
-1
opencompass/models/turbomind.py
opencompass/models/turbomind.py
+16
-2
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
+26
-0
No files found.
opencompass/datasets/mbpp.py
View file @
2b3d4150
...
@@ -227,9 +227,10 @@ class MBPPEvaluator(BaseEvaluator):
...
@@ -227,9 +227,10 @@ class MBPPEvaluator(BaseEvaluator):
from
tqdm
import
tqdm
from
tqdm
import
tqdm
for
future
in
tqdm
(
as_completed
(
futures
),
total
=
len
(
futures
)):
for
future
in
tqdm
(
as_completed
(
futures
),
total
=
len
(
futures
)):
index
,
key
=
future
.
result
()
index
,
ret
=
future
.
result
()
result
[
key
]
+=
1
result
[
ret
]
+=
1
details
[
str
(
index
)][
'result'
]
=
key
details
[
str
(
index
)][
'result'
]
=
ret
details
[
str
(
index
)][
'is_correct'
]
=
(
ret
==
'pass'
)
result
[
'score'
]
=
result
[
'pass'
]
/
len
(
predictions
)
*
100
result
[
'score'
]
=
result
[
'pass'
]
/
len
(
predictions
)
*
100
result
[
'details'
]
=
details
result
[
'details'
]
=
details
...
...
opencompass/models/huggingface_above_v4_33.py
View file @
2b3d4150
...
@@ -59,7 +59,7 @@ def _get_possible_max_seq_len(max_seq_len, path):
...
@@ -59,7 +59,7 @@ def _get_possible_max_seq_len(max_seq_len, path):
raise
ValueError
(
'max_seq_len is not provided and cannot be inferred from the model config.'
)
raise
ValueError
(
'max_seq_len is not provided and cannot be inferred from the model config.'
)
def
_convert_chat_messages
(
inputs
):
def
_convert_chat_messages
(
inputs
,
merge_role
=
True
):
outputs
=
[]
outputs
=
[]
for
_input
in
inputs
:
for
_input
in
inputs
:
messages
=
[]
messages
=
[]
...
@@ -73,7 +73,18 @@ def _convert_chat_messages(inputs):
...
@@ -73,7 +73,18 @@ def _convert_chat_messages(inputs):
'SYSTEM'
:
'system'
,
'SYSTEM'
:
'system'
,
}[
item
[
'role'
]]
}[
item
[
'role'
]]
messages
.
append
({
'role'
:
role
,
'content'
:
item
[
'prompt'
]})
messages
.
append
({
'role'
:
role
,
'content'
:
item
[
'prompt'
]})
if
merge_role
:
merged_messages
=
[]
for
item
in
messages
:
if
merged_messages
and
merged_messages
[
-
1
][
'role'
]
==
item
[
'role'
]:
merged_messages
[
-
1
][
'content'
]
+=
'
\n
'
+
item
[
'content'
]
else
:
merged_messages
.
append
(
item
)
messages
=
merged_messages
outputs
.
append
(
messages
)
outputs
.
append
(
messages
)
print
(
messages
)
return
outputs
return
outputs
...
@@ -104,6 +115,8 @@ def _get_meta_template(meta_template):
...
@@ -104,6 +115,8 @@ def _get_meta_template(meta_template):
default_meta_template
=
dict
(
default_meta_template
=
dict
(
round
=
[
round
=
[
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
dict
(
role
=
'HUMAN'
,
api_role
=
'HUMAN'
),
# XXX: all system roles are mapped to human in purpose
dict
(
role
=
'SYSTEM'
,
api_role
=
'HUMAN'
),
dict
(
role
=
'BOT'
,
api_role
=
'BOT'
,
generate
=
True
),
dict
(
role
=
'BOT'
,
api_role
=
'BOT'
,
generate
=
True
),
]
]
)
)
...
...
opencompass/models/turbomind.py
View file @
2b3d4150
...
@@ -37,6 +37,9 @@ class TurboMindModel(BaseModel):
...
@@ -37,6 +37,9 @@ class TurboMindModel(BaseModel):
arguments like session_len, max_batch_size for TurboMind.
arguments like session_len, max_batch_size for TurboMind.
gen_config (Dict, optional): Generation config to set
gen_config (Dict, optional): Generation config to set
arguments like top_k, top_p, temperature.
arguments like top_k, top_p, temperature.
end_str (str, optional): Whether to trim generated strings with end_str
if the model has special ending strings that are not handled well.
Defaults to None.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -45,7 +48,8 @@ class TurboMindModel(BaseModel):
...
@@ -45,7 +48,8 @@ class TurboMindModel(BaseModel):
max_seq_len
:
int
=
2048
,
max_seq_len
:
int
=
2048
,
meta_template
:
Optional
[
Dict
]
=
None
,
meta_template
:
Optional
[
Dict
]
=
None
,
engine_config
:
Dict
=
{},
engine_config
:
Dict
=
{},
gen_config
:
Dict
=
{}):
gen_config
:
Dict
=
{},
end_str
:
Optional
[
str
]
=
None
):
super
().
__init__
(
path
=
path
,
super
().
__init__
(
path
=
path
,
max_seq_len
=
max_seq_len
,
max_seq_len
=
max_seq_len
,
meta_template
=
meta_template
)
meta_template
=
meta_template
)
...
@@ -64,6 +68,7 @@ class TurboMindModel(BaseModel):
...
@@ -64,6 +68,7 @@ class TurboMindModel(BaseModel):
self
.
generator_ids
=
[
i
+
1
for
i
in
range
(
concurrency
)]
self
.
generator_ids
=
[
i
+
1
for
i
in
range
(
concurrency
)]
self
.
gen_config
=
gen_config
self
.
gen_config
=
gen_config
self
.
major_version
,
self
.
minor_version
,
_
=
version_info
self
.
major_version
,
self
.
minor_version
,
_
=
version_info
self
.
end_str
=
end_str
def
generate
(
self
,
def
generate
(
self
,
inputs
:
List
[
str
],
inputs
:
List
[
str
],
...
@@ -119,6 +124,7 @@ class TurboMindModel(BaseModel):
...
@@ -119,6 +124,7 @@ class TurboMindModel(BaseModel):
batch_input
,
batch_input
,
[
max_out_len
]
*
len
(
batch_input
),
[
max_out_len
]
*
len
(
batch_input
),
[
gen_config
]
*
len
(
batch_input
),
[
gen_config
]
*
len
(
batch_input
),
[
self
.
end_str
]
*
len
(
batch_input
),
))
))
results
+=
_results
results
+=
_results
if
stopping_criteria
:
if
stopping_criteria
:
...
@@ -142,7 +148,8 @@ class TurboMindModel(BaseModel):
...
@@ -142,7 +148,8 @@ class TurboMindModel(BaseModel):
session_id
,
session_id
,
prompt
:
PromptType
,
prompt
:
PromptType
,
max_out_len
:
int
,
max_out_len
:
int
,
gen_config
=
None
)
->
str
:
gen_config
=
None
,
end_str
:
Optional
[
str
]
=
None
)
->
str
:
"""Generate results given a list of inputs.
"""Generate results given a list of inputs.
Args:
Args:
...
@@ -152,6 +159,10 @@ class TurboMindModel(BaseModel):
...
@@ -152,6 +159,10 @@ class TurboMindModel(BaseModel):
max_out_len (int): The maximum length of the output.
max_out_len (int): The maximum length of the output.
gen_config (EngineGenerationConfig, optional): Generation
gen_config (EngineGenerationConfig, optional): Generation
config to set arguments like top_k, top_p, temperature.
config to set arguments like top_k, top_p, temperature.
end_str (str, optional): Whether to trim generated strings
with end_str if the model has special ending strings
that are not handled well.
Defaults to None.
Returns:
Returns:
str: The generated string.
str: The generated string.
"""
"""
...
@@ -174,6 +185,9 @@ class TurboMindModel(BaseModel):
...
@@ -174,6 +185,9 @@ class TurboMindModel(BaseModel):
_
,
output_ids
,
_
=
outputs
_
,
output_ids
,
_
=
outputs
response
=
self
.
tokenizer
.
decode
(
output_ids
)
response
=
self
.
tokenizer
.
decode
(
output_ids
)
response
=
valid_str
(
response
)
response
=
valid_str
(
response
)
# used to trim
if
end_str
:
response
=
response
.
split
(
end_str
)[
0
]
return
response
return
response
def
get_ppl
(
self
,
def
get_ppl
(
self
,
...
...
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
View file @
2b3d4150
...
@@ -342,3 +342,29 @@ class EDAccEvaluator(AccEvaluator):
...
@@ -342,3 +342,29 @@ class EDAccEvaluator(AccEvaluator):
'predictions'
:
preds
,
'predictions'
:
preds
,
'references'
:
golds
,
'references'
:
golds
,
}
}
@
ICL_EVALUATORS
.
register_module
()
class
AccwithDetailsEvaluator
(
BaseEvaluator
):
def
score
(
self
,
predictions
,
references
,
origin_prompt
)
->
dict
:
if
len
(
predictions
)
!=
len
(
references
):
return
{
'error'
:
'preds and refrs have different length.'
}
details
=
{}
correct
,
total
=
0
,
0
for
index
,
(
pred
,
ref
)
in
enumerate
(
zip
(
predictions
,
references
)):
is_correct
=
pred
==
ref
correct
+=
is_correct
details
[
str
(
index
)]
=
{
'prompt'
:
origin_prompt
[
index
],
'pred'
:
pred
,
'refr'
:
ref
,
'is_correct'
:
is_correct
,
}
total
+=
1
results
=
{
'accuracy'
:
correct
/
total
*
100
,
'details'
:
details
}
return
results
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment