Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
dbb20b82
Unverified
Commit
dbb20b82
authored
Oct 27, 2023
by
Fengzhe Zhou
Committed by
GitHub
Oct 27, 2023
Browse files
[Sync] update (#517)
parent
6f07af30
Changes
45
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
151 additions
and
57 deletions
+151
-57
opencompass/datasets/cmb.py
opencompass/datasets/cmb.py
+2
-2
opencompass/datasets/cmnli.py
opencompass/datasets/cmnli.py
+1
-1
opencompass/datasets/cmrc.py
opencompass/datasets/cmrc.py
+1
-1
opencompass/datasets/copa.py
opencompass/datasets/copa.py
+1
-1
opencompass/datasets/csl.py
opencompass/datasets/csl.py
+1
-1
opencompass/datasets/drcd.py
opencompass/datasets/drcd.py
+1
-1
opencompass/datasets/eprstmt.py
opencompass/datasets/eprstmt.py
+1
-1
opencompass/datasets/gsm8k.py
opencompass/datasets/gsm8k.py
+23
-0
opencompass/datasets/hellaswag.py
opencompass/datasets/hellaswag.py
+1
-1
opencompass/datasets/math.py
opencompass/datasets/math.py
+5
-1
opencompass/datasets/natural_question.py
opencompass/datasets/natural_question.py
+6
-1
opencompass/datasets/tnews.py
opencompass/datasets/tnews.py
+1
-1
opencompass/datasets/triviaqa.py
opencompass/datasets/triviaqa.py
+6
-1
opencompass/models/claude_api/postprocessors.py
opencompass/models/claude_api/postprocessors.py
+14
-0
opencompass/openicl/icl_evaluator/icl_em_evaluator.py
opencompass/openicl/icl_evaluator/icl_em_evaluator.py
+8
-1
opencompass/openicl/icl_inferencer/icl_base_inferencer.py
opencompass/openicl/icl_inferencer/icl_base_inferencer.py
+1
-2
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
+6
-0
opencompass/partitioners/base.py
opencompass/partitioners/base.py
+17
-8
opencompass/partitioners/naive.py
opencompass/partitioners/naive.py
+16
-4
opencompass/partitioners/size.py
opencompass/partitioners/size.py
+39
-29
No files found.
opencompass/datasets/cmb.py
View file @
dbb20b82
...
...
@@ -13,9 +13,9 @@ class CMBDataset(BaseDataset):
@
staticmethod
def
load
(
path
:
str
):
with
open
(
osp
.
join
(
path
,
'test.json'
),
'r'
)
as
f
:
with
open
(
osp
.
join
(
path
,
'test.json'
),
'r'
,
encoding
=
'utf-8'
)
as
f
:
test_data
=
json
.
load
(
f
)
with
open
(
osp
.
join
(
path
,
'val.json'
),
'r'
)
as
f
:
with
open
(
osp
.
join
(
path
,
'val.json'
),
'r'
,
encoding
=
'utf-8'
)
as
f
:
val_data
=
json
.
load
(
f
)
for
da
in
test_data
:
...
...
opencompass/datasets/cmnli.py
View file @
dbb20b82
...
...
@@ -13,7 +13,7 @@ class cmnliDataset_V2(BaseDataset):
@
staticmethod
def
load
(
path
):
data
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
if
line
[
'label'
]
==
'-'
:
...
...
opencompass/datasets/cmrc.py
View file @
dbb20b82
...
...
@@ -12,7 +12,7 @@ class CMRCDataset(BaseDataset):
@
staticmethod
def
load
(
path
:
str
):
with
open
(
path
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
data
=
json
.
load
(
f
)
# 将原始数据转换为所需的格式
rows
=
[]
...
...
opencompass/datasets/copa.py
View file @
dbb20b82
...
...
@@ -13,7 +13,7 @@ class COPADataset_V2(BaseDataset):
@
staticmethod
def
load
(
path
):
dataset
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
line
[
'label'
]
=
'AB'
[
line
[
'label'
]]
...
...
opencompass/datasets/csl.py
View file @
dbb20b82
...
...
@@ -31,7 +31,7 @@ class CslDataset_V2(BaseDataset):
@
staticmethod
def
load
(
path
):
data
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
item
=
{
...
...
opencompass/datasets/drcd.py
View file @
dbb20b82
...
...
@@ -12,7 +12,7 @@ class DRCDDataset(BaseDataset):
@
staticmethod
def
load
(
path
:
str
):
with
open
(
path
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
data
=
json
.
load
(
f
)
# 将原始数据转换为所需的格式
rows
=
[]
...
...
opencompass/datasets/eprstmt.py
View file @
dbb20b82
...
...
@@ -13,7 +13,7 @@ class eprstmtDataset_V2(BaseDataset):
@
staticmethod
def
load
(
path
):
data
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
item
=
{
...
...
opencompass/datasets/gsm8k.py
View file @
dbb20b82
from
opencompass.openicl
import
BaseEvaluator
from
opencompass.registry
import
TEXT_POSTPROCESSORS
...
...
@@ -26,3 +27,25 @@ def gsm8k_postprocess(text: str) -> str:
if
ret
[
i
].
isdigit
():
ret1
+=
ret
[
i
]
return
ret1
class
Gsm8kEvaluator
(
BaseEvaluator
):
def
score
(
self
,
predictions
,
references
):
if
len
(
predictions
)
!=
len
(
references
):
return
{
'error'
:
'predictions and references have different '
'length'
}
correct
=
0
count
=
0
details
=
[]
for
i
,
j
in
zip
(
predictions
,
references
):
detail
=
{
'pred'
:
i
,
'answers'
:
j
,
'correct'
:
False
}
count
+=
1
if
i
==
j
:
correct
+=
1
detail
[
'correct'
]
=
True
details
.
append
(
detail
)
result
=
{
'accuracy'
:
100
*
correct
/
count
,
'details'
:
details
}
return
result
opencompass/datasets/hellaswag.py
View file @
dbb20b82
...
...
@@ -49,7 +49,7 @@ class hellaswagDataset_V3(BaseDataset):
@
staticmethod
def
load
(
path
):
dataset
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
data
=
json
.
loads
(
line
)
dataset
.
append
({
...
...
opencompass/datasets/math.py
View file @
dbb20b82
...
...
@@ -148,11 +148,15 @@ class MATHEvaluator(BaseEvaluator):
}
correct
=
0
count
=
0
details
=
[]
for
i
,
j
in
zip
(
predictions
,
references
):
detail
=
{
'pred'
:
i
,
'answer'
:
j
,
'correct'
:
False
}
count
+=
1
if
self
.
is_equiv
(
i
,
j
):
correct
+=
1
result
=
{
'accuracy'
:
100
*
correct
/
count
}
detail
[
'correct'
]
=
True
details
.
append
(
detail
)
result
=
{
'accuracy'
:
100
*
correct
/
count
,
'details'
:
details
}
return
result
def
_fix_fracs
(
self
,
string
):
...
...
opencompass/datasets/natural_question.py
View file @
dbb20b82
...
...
@@ -52,9 +52,14 @@ class NQEvaluator(BaseEvaluator):
processed_answers
=
[[
general_postprocess
(
j
).
lower
()
for
j
in
i
]
for
i
in
references
]
details
=
[]
cnt
=
0
for
pred
,
cand_ans
in
zip
(
processed_predictions
,
processed_answers
):
detail
=
{
'pred'
:
pred
,
'answer'
:
cand_ans
,
'correct'
:
False
}
cnt
+=
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
]))
if
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
])):
detail
[
'correct'
]
=
True
details
.
append
(
detail
)
score
=
cnt
/
len
(
predictions
)
*
100
return
{
'score'
:
score
}
return
{
'score'
:
score
,
'details'
:
details
}
opencompass/datasets/tnews.py
View file @
dbb20b82
...
...
@@ -67,7 +67,7 @@ class TNewsDataset_V2(BaseDataset):
}
data
=
[]
with
open
(
path
,
'r'
)
as
f
:
with
open
(
path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
line
=
json
.
loads
(
line
)
item
=
{
...
...
opencompass/datasets/triviaqa.py
View file @
dbb20b82
...
...
@@ -51,9 +51,14 @@ class TriviaQAEvaluator(BaseEvaluator):
processed_answers
=
[[
general_postprocess
(
j
).
lower
()
for
j
in
i
]
for
i
in
references
]
details
=
[]
cnt
=
0
for
pred
,
cand_ans
in
zip
(
processed_predictions
,
processed_answers
):
detail
=
{
'pred'
:
pred
,
'answer'
:
cand_ans
,
'correct'
:
False
}
cnt
+=
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
]))
if
int
(
any
([
cand
==
pred
for
cand
in
cand_ans
])):
detail
[
'correct'
]
=
True
details
.
append
(
detail
)
score
=
cnt
/
len
(
predictions
)
*
100
return
{
'score'
:
score
}
return
{
'score'
:
score
,
'details'
:
details
}
opencompass/models/claude_api/postprocessors.py
View file @
dbb20b82
...
...
@@ -82,6 +82,20 @@ def strategyqa_pred_postprocess(text: str) -> str:
return
''
def
flores_postprocess
(
text
:
str
)
->
str
:
text
=
text
.
strip
().
split
(
'
\n
'
)[
-
1
].
strip
()
return
text
def
flores_postprocess_chinese
(
text
:
str
)
->
str
:
text
=
text
.
strip
().
split
(
'
\n
'
)[
-
1
].
strip
()
import
jieba
truncated_text
=
text
.
strip
().
split
(
'
\n
'
)[
0
]
cleaned_text
=
re
.
sub
(
r
'\s+'
,
' '
,
truncated_text
).
strip
()
cleaned_text
=
' '
.
join
(
jieba
.
cut
(
cleaned_text
))
return
cleaned_text
def
record_postprocess
(
text
:
str
)
->
str
:
match
=
re
.
search
(
r
'(?<=refers to )[^.]+'
,
text
)
...
...
opencompass/openicl/icl_evaluator/icl_em_evaluator.py
View file @
dbb20b82
...
...
@@ -24,11 +24,18 @@ class EMEvaluator(BaseEvaluator):
for
i
in
references
]
cnt
=
0
details
=
[]
for
pred
,
ans
,
origin_ans
in
zip
(
predictions
,
processed_answers
,
references
):
answers
=
list
(
set
(
ans
+
origin_ans
))
detail
=
{
'pred'
:
pred
,
'answer'
:
answers
}
if
pred
in
ans
or
pred
in
origin_ans
:
cnt
+=
1
detail
[
'correct'
]
=
True
else
:
detail
[
'correct'
]
=
False
details
.
append
(
detail
)
score
=
cnt
/
len
(
predictions
)
*
100
return
{
'score'
:
score
}
return
{
'score'
:
score
,
'details'
:
details
}
opencompass/openicl/icl_inferencer/icl_base_inferencer.py
View file @
dbb20b82
...
...
@@ -51,8 +51,7 @@ class BaseInferencer:
self
.
output_json_filepath
=
output_json_filepath
self
.
output_json_filename
=
output_json_filename
self
.
is_main_process
=
is_main_process
()
if
not
os
.
path
.
exists
(
self
.
output_json_filepath
):
os
.
makedirs
(
self
.
output_json_filepath
)
os
.
makedirs
(
self
.
output_json_filepath
,
exist_ok
=
True
)
def
inference
(
self
,
retriever
:
BaseRetriever
,
...
...
opencompass/openicl/icl_inferencer/icl_ppl_inferencer.py
View file @
dbb20b82
...
...
@@ -94,6 +94,7 @@ class PPLInferencer(BaseInferencer):
index
=
0
prompt_list
=
[]
sub_ppl_list
=
[]
token_num_list
=
[]
normalizing_prompt_list
=
[]
context_length_list
=
[]
...
...
@@ -144,6 +145,7 @@ class PPLInferencer(BaseInferencer):
mode
=
'ppl'
))
normalizing_prompt_list
.
append
(
normalizing_prompt
)
prompt_list
.
append
(
prompt
)
token_num_list
.
append
(
prompt_token_num
)
if
normalizing_str
is
not
None
:
normalizing_str_len
=
self
.
model
.
get_token_len_from_template
(
...
...
@@ -186,6 +188,10 @@ class PPLInferencer(BaseInferencer):
ice_str
=
self
.
model
.
parse_template
(
ice
[
idx
],
mode
=
'ppl'
)
output_handler
.
save_prompt_and_ppl
(
label
,
prompt
.
replace
(
ice_str
,
''
),
prompt
,
res
,
index
)
output_handler
.
results_dict
[
str
(
index
)][
f
'label:
{
str
(
label
)
}
'
][
'BPB'
]
=
res
*
token_num_list
[
idx
]
/
len
(
prompt
.
replace
(
ice_str
,
''
).
encode
())
index
=
index
+
1
ppl
.
append
(
sub_ppl_list
)
...
...
opencompass/partitioners/base.py
View file @
dbb20b82
from
abc
import
abstractmethod
from
copy
import
deepcopy
from
typing
import
Dict
,
List
from
typing
import
Dict
,
List
,
Optional
from
mmengine.config
import
ConfigDict
...
...
@@ -13,16 +13,24 @@ class BasePartitioner:
Args:
out_dir (str): The output directory of tasks.
keep_keys (List[str]): The keys to be kept from the experiment config
to the task config.
keep_keys (Optional[List[str]], optional): The keys to be kept from the
experiment config to the task config. Defaults to None. If None,
the following keys will be kept:
- eval.runner.task.judge_cfg
- eval.runner.task.dump_details
"""
def
__init__
(
self
,
out_dir
:
str
,
keep_keys
:
List
[
str
]
=
[
'eval.runner.task.judge_cfg'
]):
def
__init__
(
self
,
out_dir
:
str
,
keep_keys
:
Optional
[
List
[
str
]]
=
None
):
self
.
logger
=
get_logger
()
self
.
out_dir
=
out_dir
self
.
keep_keys
=
keep_keys
if
keep_keys
is
None
:
self
.
keep_keys
=
[
'eval.runner.task.judge_cfg'
,
'eval.runner.task.dump_details'
,
]
else
:
self
.
keep_keys
=
keep_keys
def
__call__
(
self
,
cfg
:
ConfigDict
)
->
List
[
Dict
]:
"""Generate tasks from config. Each task is defined as a
...
...
@@ -63,7 +71,8 @@ class BasePartitioner:
tgt_ptr
=
tgt_ptr
[
key
]
tgt_ptr
[
key_chain
[
-
1
]]
=
ori_ptr
[
key_chain
[
-
1
]]
except
Exception
:
self
.
logger
.
warning
(
f
'Key
{
k
}
not found in config, ignored.'
)
self
.
logger
.
debug
(
f
'Key
{
k
}
not found in config, ignored.'
)
self
.
logger
.
debug
(
f
'Additional config:
{
add_cfg
}
'
)
tasks
=
self
.
partition
(
models
,
datasets
,
...
...
opencompass/partitioners/naive.py
View file @
dbb20b82
import
os.path
as
osp
from
typing
import
Dict
,
List
from
typing
import
Dict
,
List
,
Optional
from
mmengine.config
import
Config
,
ConfigDict
...
...
@@ -11,15 +11,23 @@ from .base import BasePartitioner
@
PARTITIONERS
.
register_module
()
class
NaivePartitioner
(
BasePartitioner
):
"""Naive task partitioner. This partitioner will generate a task for each
model-dataset pair.
"""Naive task partitioner. This partitioner will generate a task for each
n
model-dataset pair
s
.
Args:
out_dir (str): The output directory of tasks.
n (int): The number of model-dataset pairs in each task.
keep_keys (List[str]): The keys to be kept from the experiment config
to the task config.
"""
def
__init__
(
self
,
out_dir
:
str
,
n
:
int
=
1
,
keep_keys
:
Optional
[
List
[
str
]]
=
None
):
super
().
__init__
(
out_dir
=
out_dir
,
keep_keys
=
keep_keys
)
self
.
n
=
n
def
partition
(
self
,
models
:
List
[
ConfigDict
],
datasets
:
List
[
ConfigDict
],
...
...
@@ -53,13 +61,17 @@ class NaivePartitioner(BasePartitioner):
tasks
=
[]
for
model
in
models
:
chunks
=
[]
for
dataset
in
datasets
:
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
if
osp
.
exists
(
filename
):
continue
chunks
.
append
(
dataset
)
for
i
in
range
(
0
,
len
(
chunks
),
self
.
n
):
task
=
Config
({
'models'
:
[
model
],
'datasets'
:
[
[
dataset
]],
'datasets'
:
[
chunks
[
i
:
i
+
self
.
n
]],
'work_dir'
:
work_dir
,
**
add_cfg
})
...
...
opencompass/partitioners/size.py
View file @
dbb20b82
...
...
@@ -2,7 +2,7 @@ import copy
import
math
import
os.path
as
osp
from
fnmatch
import
fnmatch
from
typing
import
Dict
,
List
,
Tuple
,
Union
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
import
mmengine
from
mmengine.config
import
Config
,
ConfigDict
...
...
@@ -24,6 +24,11 @@ class SizePartitioner(BasePartitioner):
max_task_size (int): The maximum size of a task.
gen_task_coef (int): The dataset cost measurement coefficient for
generation tasks.
strategy (str): The partition strategy. Supported strategies are:
'heuristic' and 'split'. Defaults to 'heuristic'.
heuristic: split large datasets into several tasks, merge small
datasets into one task.
split: split large datasets into several tasks only.
dataset_size_path (str): The path to the dataset size cache file.
keep_keys (list[str]): The keys to be kept from the experiment config
to the task config.
...
...
@@ -33,12 +38,17 @@ class SizePartitioner(BasePartitioner):
out_dir
:
str
,
max_task_size
:
int
=
40000
,
gen_task_coef
:
int
=
20
,
strategy
:
str
=
'heuristic'
,
dataset_size_path
:
str
=
'.cache/dataset_size.json'
,
keep_keys
:
List
[
str
]
=
[
'eval.runner.task.judge_cfg'
]
):
keep_keys
:
Optional
[
List
[
str
]
]
=
None
):
super
().
__init__
(
out_dir
=
out_dir
,
keep_keys
=
keep_keys
)
self
.
max_task_size
=
max_task_size
self
.
gen_task_coef
=
gen_task_coef
self
.
dataset_size_path
=
dataset_size_path
assert
strategy
in
(
'heuristic'
,
'split'
),
\
f
'Unsupported partition strategy:
{
strategy
}
. '
\
'Supported strategies are: `heuristic`, `split` .'
self
.
strategy
=
strategy
def
partition
(
self
,
models
:
List
[
ConfigDict
],
...
...
@@ -79,47 +89,47 @@ class SizePartitioner(BasePartitioner):
reverse
=
True
)
tasks
=
[]
for
model
in
models
:
task
=
Config
({
'models'
:
[
model
],
'datasets'
:
[[]],
'work_dir'
:
work_dir
,
**
add_cfg
})
num_data
=
0
chunks
=
[]
# elements: tuple(size, dataset_chunk)
for
dataset
in
datasets
:
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
root
,
ext
=
osp
.
splitext
(
filename
)
# skip the task if the task output exists
if
osp
.
exists
(
filename
):
continue
dataset_size
=
self
.
get_cost
(
dataset
)
if
dataset_size
>
self
.
max_task_size
:
root
,
ext
=
osp
.
splitext
(
filename
)
dataset_splits
=
self
.
split_dataset
(
dataset
)
for
i
,
dataset_split
in
enumerate
(
dataset_splits
):
# skip the task it the task output exists
if
not
osp
.
exists
(
f
'
{
root
}
_
{
i
}{
ext
}
'
):
tasks
.
append
(
Config
({
'models'
:
[
model
],
'datasets'
:
[[
dataset_split
]],
'work_dir'
:
work_dir
,
**
add_cfg
}))
chunks
.
append
((
self
.
max_task_size
,
dataset_split
))
else
:
if
num_data
+
dataset_size
>
self
.
max_task_size
:
tasks
.
append
(
task
)
task
=
Config
({
chunks
.
append
((
dataset_size
,
dataset
))
if
self
.
strategy
==
'heuristic'
:
chunks
=
sorted
(
chunks
,
key
=
lambda
x
:
x
[
0
],
reverse
=
True
)
current_size
,
current_chunks
=
0
,
[]
for
index
in
range
(
len
(
chunks
)):
current_size
+=
chunks
[
index
][
0
]
current_chunks
.
append
(
chunks
[
index
][
1
])
if
index
==
len
(
chunks
)
-
1
or
current_size
+
chunks
[
index
+
1
][
0
]
>
self
.
max_task_size
:
tasks
.
append
(
Config
({
'models'
:
[
model
],
'datasets'
:
[
current_chunks
],
'work_dir'
:
work_dir
,
**
add_cfg
}))
current_size
,
current_chunks
=
0
,
[]
elif
self
.
strategy
==
'split'
:
for
_
,
dataset
in
chunks
:
tasks
.
append
(
Config
({
'models'
:
[
model
],
'datasets'
:
[[]],
'datasets'
:
[[
dataset
]],
'work_dir'
:
work_dir
,
**
add_cfg
})
num_data
=
0
task
[
'datasets'
][
0
].
append
(
dataset
)
num_data
=
num_data
+
dataset_size
if
task
[
'datasets'
][
0
]:
tasks
.
append
(
task
)
}))
return
tasks
@
property
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment