Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
3a68083e
"...composable_kernel.git" did not exist on "4cf69087c4dbc260ef9016d1d80ce855c018404c"
Unverified
Commit
3a68083e
authored
Dec 25, 2023
by
Fengzhe Zhou
Committed by
GitHub
Dec 25, 2023
Browse files
[Sync] update configs (#734)
parent
ad96f215
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
86 additions
and
26 deletions
+86
-26
opencompass/summarizers/default.py
opencompass/summarizers/default.py
+29
-12
opencompass/tasks/openicl_eval.py
opencompass/tasks/openicl_eval.py
+9
-2
opencompass/utils/build.py
opencompass/utils/build.py
+1
-0
opencompass/utils/run.py
opencompass/utils/run.py
+32
-12
run.py
run.py
+15
-0
No files found.
opencompass/summarizers/default.py
View file @
3a68083e
...
@@ -19,6 +19,13 @@ from opencompass.utils.prompt import get_prompt_hash
...
@@ -19,6 +19,13 @@ from opencompass.utils.prompt import get_prompt_hash
METRIC_WHITELIST
=
[
'score'
,
'auc_score'
,
'accuracy'
,
'humaneval_pass@1'
,
'rouge1'
,
'avg_toxicity_score'
,
'bleurt_diff'
,
'matthews_correlation'
,
'truth'
]
METRIC_WHITELIST
=
[
'score'
,
'auc_score'
,
'accuracy'
,
'humaneval_pass@1'
,
'rouge1'
,
'avg_toxicity_score'
,
'bleurt_diff'
,
'matthews_correlation'
,
'truth'
]
METRIC_BLACKLIST
=
[
'bp'
,
'sys_len'
,
'ref_len'
]
METRIC_BLACKLIST
=
[
'bp'
,
'sys_len'
,
'ref_len'
]
def
model_abbr_from_cfg_used_in_summarizer
(
model
):
if
model
.
get
(
'summarizer_abbr'
,
None
):
return
model
[
'summarizer_abbr'
]
else
:
return
model_abbr_from_cfg
(
model
)
class
DefaultSummarizer
:
class
DefaultSummarizer
:
"""Default summarizer in OpenCompass.
"""Default summarizer in OpenCompass.
...
@@ -49,7 +56,13 @@ class DefaultSummarizer:
...
@@ -49,7 +56,13 @@ class DefaultSummarizer:
self
.
model_cfgs
=
self
.
cfg
[
'models'
]
self
.
model_cfgs
=
self
.
cfg
[
'models'
]
self
.
dataset_cfgs
=
self
.
cfg
[
'datasets'
]
self
.
dataset_cfgs
=
self
.
cfg
[
'datasets'
]
self
.
work_dir
=
self
.
cfg
[
'work_dir'
]
self
.
work_dir
=
self
.
cfg
[
'work_dir'
]
self
.
model_abbrs
=
[
model_abbr_from_cfg
(
model
)
for
model
in
self
.
model_cfgs
]
model_abbrs
=
[]
for
model
in
self
.
model_cfgs
:
model_abbr
=
model_abbr_from_cfg_used_in_summarizer
(
model
)
if
model_abbr
in
model_abbrs
:
continue
model_abbrs
.
append
(
model_abbr
)
self
.
model_abbrs
=
model_abbrs
def
_pick_up_results
(
self
):
def
_pick_up_results
(
self
):
"""The function reads the numerical results of evaluations from the
"""The function reads the numerical results of evaluations from the
...
@@ -71,9 +84,9 @@ class DefaultSummarizer:
...
@@ -71,9 +84,9 @@ class DefaultSummarizer:
dataset_metrics
:
Dict
[
str
,
List
[
str
]]
=
{}
dataset_metrics
:
Dict
[
str
,
List
[
str
]]
=
{}
for
model
in
self
.
model_cfgs
:
for
model
in
self
.
model_cfgs
:
model_abbr
=
model_abbr_from_cfg
(
model
)
model_abbr
=
model_abbr_from_cfg
_used_in_summarizer
(
model
)
parsed_results
[
model_abbr
]
=
{}
parsed_results
.
setdefault
(
model_abbr
,
{}
)
raw_results
[
model_abbr
]
=
{}
raw_results
.
setdefault
(
model_abbr
,
{}
)
for
dataset
in
self
.
dataset_cfgs
:
for
dataset
in
self
.
dataset_cfgs
:
dataset_abbr
=
dataset_abbr_from_cfg
(
dataset
)
dataset_abbr
=
dataset_abbr_from_cfg
(
dataset
)
filepath
=
get_infer_output_path
(
model
,
dataset
,
osp
.
join
(
self
.
work_dir
,
'results'
))
filepath
=
get_infer_output_path
(
model
,
dataset
,
osp
.
join
(
self
.
work_dir
,
'results'
))
...
@@ -165,23 +178,23 @@ class DefaultSummarizer:
...
@@ -165,23 +178,23 @@ class DefaultSummarizer:
if
all
(
isinstance
(
dataset_abbr
,
(
list
,
tuple
))
for
dataset_abbr
in
sg
[
'subsets'
]):
if
all
(
isinstance
(
dataset_abbr
,
(
list
,
tuple
))
for
dataset_abbr
in
sg
[
'subsets'
]):
group_metrics
=
[
default_metric
]
group_metrics
=
[
default_metric
]
for
dataset_abbr
,
metric
in
sg
[
'subsets'
]:
for
dataset_abbr
,
metric
in
sg
[
'subsets'
]:
scores
.
setdefault
(
default_metric
,
{})[
dataset_abbr
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
scores
.
setdefault
(
default_metric
,
{})[
dataset_abbr
+
'@'
+
metric
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
eval_modes
.
append
(
dataset_eval_mode
.
get
(
dataset_abbr
,
'unknown'
))
eval_modes
.
append
(
dataset_eval_mode
.
get
(
dataset_abbr
,
'unknown'
))
else
:
else
:
group_metrics
=
list
(
functools
.
reduce
(
lambda
a
,
b
:
a
&
b
,
[
set
(
dataset_metrics
[
dataset_abbr
])
for
dataset_abbr
in
sg
[
'subsets'
]]))
group_metrics
=
list
(
functools
.
reduce
(
lambda
a
,
b
:
a
&
b
,
[
set
(
dataset_metrics
[
dataset_abbr
])
for
dataset_abbr
in
sg
[
'subsets'
]]))
if
need_smart_metric
and
len
(
group_metrics
)
>
1
:
if
need_smart_metric
and
len
(
group_metrics
)
>
1
:
for
metric
in
group_metrics
:
for
metric
in
group_metrics
:
for
dataset_abbr
in
sg
[
'subsets'
]:
for
dataset_abbr
in
sg
[
'subsets'
]:
scores
.
setdefault
(
metric
,
{})[
dataset_abbr
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
scores
.
setdefault
(
metric
,
{})[
dataset_abbr
+
'@'
+
metric
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
eval_modes
.
append
(
dataset_eval_mode
.
get
(
sg
[
'subsets'
][
0
],
'unknown'
))
eval_modes
.
append
(
dataset_eval_mode
.
get
(
sg
[
'subsets'
][
0
],
'unknown'
))
else
:
else
:
group_metrics
=
[
default_metric
]
group_metrics
=
[
default_metric
]
for
dataset_abbr
in
sg
[
'subsets'
]:
for
dataset_abbr
in
sg
[
'subsets'
]:
metric
=
dataset_metrics
[
dataset_abbr
][
0
]
metric
=
dataset_metrics
[
dataset_abbr
][
0
]
scores
.
setdefault
(
default_metric
,
{})[
dataset_abbr
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
scores
.
setdefault
(
default_metric
,
{})[
dataset_abbr
+
'@'
+
metric
]
=
parsed_results
[
model_abbr
][
dataset_abbr
][
metric
]
eval_modes
.
append
(
dataset_eval_mode
.
get
(
dataset_abbr
,
'unknown'
))
eval_modes
.
append
(
dataset_eval_mode
.
get
(
dataset_abbr
,
'unknown'
))
result
=
parsed_results
[
model_abbr
].
get
(
sg
[
'name'
],
{}
)
result
=
{}
for
metric
in
scores
:
for
metric
in
scores
:
if
default_metric
==
'standard_deviation'
:
if
default_metric
==
'standard_deviation'
:
avg
=
sum
(
scores
[
metric
].
values
())
/
len
(
scores
[
metric
])
avg
=
sum
(
scores
[
metric
].
values
())
/
len
(
scores
[
metric
])
...
@@ -190,7 +203,11 @@ class DefaultSummarizer:
...
@@ -190,7 +203,11 @@ class DefaultSummarizer:
else
:
else
:
if
sg
.
get
(
'weights'
,
[]):
if
sg
.
get
(
'weights'
,
[]):
# check sg['weights'][k] != 0 in case of scores[metric][k] is NaN
# check sg['weights'][k] != 0 in case of scores[metric][k] is NaN
numerator
=
sum
(
scores
[
metric
][
k
]
*
sg
[
'weights'
][
k
]
for
k
in
sg
[
'weights'
]
if
sg
[
'weights'
][
k
]
!=
0
)
try
:
numerator
=
sum
(
scores
[
metric
][
k
]
*
sg
[
'weights'
][
k
]
for
k
in
sg
[
'weights'
]
if
sg
[
'weights'
][
k
]
!=
0
)
except
KeyError
:
tmp_scores
=
{
metric
:
{
k
.
split
(
'@'
)[
0
]:
v
for
k
,
v
in
scores
[
metric
].
items
()}
for
metric
in
scores
}
numerator
=
sum
(
tmp_scores
[
metric
][
k
]
*
sg
[
'weights'
][
k
]
for
k
in
sg
[
'weights'
]
if
sg
[
'weights'
][
k
]
!=
0
)
denominator
=
sum
(
sg
[
'weights'
].
values
())
denominator
=
sum
(
sg
[
'weights'
].
values
())
else
:
else
:
numerator
=
sum
(
scores
[
metric
].
values
())
numerator
=
sum
(
scores
[
metric
].
values
())
...
@@ -200,9 +217,9 @@ class DefaultSummarizer:
...
@@ -200,9 +217,9 @@ class DefaultSummarizer:
eval_mode
=
eval_modes
[
0
]
if
len
(
eval_modes
)
==
1
else
'mixed'
eval_mode
=
eval_modes
[
0
]
if
len
(
eval_modes
)
==
1
else
'mixed'
# add to global results
# add to global results
raw_results
[
model_abbr
]
[
sg
[
'name'
]]
=
scores
raw_results
[
model_abbr
]
.
setdefault
(
sg
[
'name'
],
{}).
update
(
scores
)
parsed_results
[
model_abbr
]
[
sg
[
'name'
]]
=
result
parsed_results
[
model_abbr
]
.
setdefault
(
sg
[
'name'
],
{}).
update
(
result
)
dataset_metrics
[
sg
[
'name'
]]
=
group_metrics
dataset_metrics
.
setdefault
(
sg
[
'name'
],
[]).
extend
(
group_metrics
)
dataset_eval_mode
[
sg
[
'name'
]]
=
eval_mode
dataset_eval_mode
[
sg
[
'name'
]]
=
eval_mode
return
raw_results
,
parsed_results
,
dataset_metrics
,
dataset_eval_mode
return
raw_results
,
parsed_results
,
dataset_metrics
,
dataset_eval_mode
...
...
opencompass/tasks/openicl_eval.py
View file @
3a68083e
...
@@ -198,7 +198,8 @@ class OpenICLEvalTask(BaseTask):
...
@@ -198,7 +198,8 @@ class OpenICLEvalTask(BaseTask):
'incorrect_bpb'
]
=
self
.
calculate_bpb
(
pred_dicts
)
'incorrect_bpb'
]
=
self
.
calculate_bpb
(
pred_dicts
)
else
:
else
:
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
except
Exception
:
except
Exception
as
e
:
self
.
logger
.
warning
(
f
'Skip dumping details due to:
{
e
}
.'
)
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
else
:
else
:
result
.
pop
(
'details'
,
None
)
result
.
pop
(
'details'
,
None
)
...
@@ -288,13 +289,19 @@ class OpenICLEvalTask(BaseTask):
...
@@ -288,13 +289,19 @@ class OpenICLEvalTask(BaseTask):
result
[
'predictions'
]
=
str
(
predictions
[
i
])
result
[
'predictions'
]
=
str
(
predictions
[
i
])
result
[
'references'
]
=
str
(
references
[
i
])
result
[
'references'
]
=
str
(
references
[
i
])
result
[
'correct'
]
=
str
(
predictions
[
i
])
==
str
(
references
[
i
])
result
[
'correct'
]
=
str
(
predictions
[
i
])
==
str
(
references
[
i
])
el
s
e
:
el
if
details
is
not
Non
e
:
results
[
'type'
]
=
'GEN'
results
[
'type'
]
=
'GEN'
result
[
'prompt'
]
=
origin_prediction
[
'origin_prompt'
]
result
[
'prompt'
]
=
origin_prediction
[
'origin_prompt'
]
result
[
'origin_prediction'
]
=
pred_dicts
[
i
][
'prediction'
]
result
[
'origin_prediction'
]
=
pred_dicts
[
i
][
'prediction'
]
result
[
'predictions'
]
=
details
[
i
][
'pred'
]
result
[
'predictions'
]
=
details
[
i
][
'pred'
]
result
[
'references'
]
=
details
[
i
][
'answer'
]
result
[
'references'
]
=
details
[
i
][
'answer'
]
result
[
'correct'
]
=
details
[
i
][
'correct'
]
result
[
'correct'
]
=
details
[
i
][
'correct'
]
else
:
results
[
'type'
]
=
'GEN'
result
[
'prompt'
]
=
origin_prediction
[
'origin_prompt'
]
result
[
'origin_prediction'
]
=
pred_dicts
[
i
][
'prediction'
]
result
[
'predictions'
]
=
str
(
predictions
[
i
])
result
[
'references'
]
=
str
(
references
[
i
])
results
[
str
(
i
)]
=
result
results
[
str
(
i
)]
=
result
return
results
return
results
...
...
opencompass/utils/build.py
View file @
3a68083e
...
@@ -19,5 +19,6 @@ def build_model_from_cfg(model_cfg: ConfigDict):
...
@@ -19,5 +19,6 @@ def build_model_from_cfg(model_cfg: ConfigDict):
model_cfg
.
pop
(
'max_out_len'
,
None
)
model_cfg
.
pop
(
'max_out_len'
,
None
)
model_cfg
.
pop
(
'batch_size'
,
None
)
model_cfg
.
pop
(
'batch_size'
,
None
)
model_cfg
.
pop
(
'abbr'
,
None
)
model_cfg
.
pop
(
'abbr'
,
None
)
model_cfg
.
pop
(
'summarizer_abbr'
,
None
)
model_cfg
.
pop
(
'pred_postprocessor'
,
None
)
model_cfg
.
pop
(
'pred_postprocessor'
,
None
)
return
MODELS
.
build
(
model_cfg
)
return
MODELS
.
build
(
model_cfg
)
opencompass/utils/run.py
View file @
3a68083e
...
@@ -4,6 +4,7 @@ from typing import List, Union
...
@@ -4,6 +4,7 @@ from typing import List, Union
import
tabulate
import
tabulate
from
mmengine.config
import
Config
from
mmengine.config
import
Config
from
opencompass.datasets.custom
import
make_custom_dataset_config
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.tasks
import
OpenICLEvalTask
,
OpenICLInferTask
from
opencompass.tasks
import
OpenICLEvalTask
,
OpenICLInferTask
...
@@ -56,18 +57,37 @@ def get_config_from_arg(args) -> Config:
...
@@ -56,18 +57,37 @@ def get_config_from_arg(args) -> Config:
3. Huggingface parameter groups and args.datasets
3. Huggingface parameter groups and args.datasets
"""
"""
if
args
.
config
:
if
args
.
config
:
return
Config
.
fromfile
(
args
.
config
,
format_python_code
=
False
)
config
=
Config
.
fromfile
(
args
.
config
,
format_python_code
=
False
)
if
args
.
datasets
is
None
:
for
i
,
dataset
in
enumerate
(
config
[
'datasets'
]):
raise
ValueError
(
'You must specify "--datasets" if you do not specify '
if
'type'
not
in
dataset
:
'a config file path.'
)
config
[
'datasets'
][
i
]
=
make_custom_dataset_config
(
dataset
)
return
config
# parse dataset args
if
not
args
.
datasets
and
not
args
.
custom_dataset_path
:
raise
ValueError
(
'You must specify "--datasets" or '
'"--custom-dataset-path" if you do not specify a '
'config file path.'
)
datasets
=
[]
datasets
=
[]
datasets_dir
=
os
.
path
.
join
(
args
.
config_dir
,
'datasets'
)
if
args
.
datasets
:
for
dataset
in
match_cfg_file
(
datasets_dir
,
args
.
datasets
):
datasets_dir
=
os
.
path
.
join
(
args
.
config_dir
,
'datasets'
)
get_logger
().
info
(
f
'Loading
{
dataset
[
0
]
}
:
{
dataset
[
1
]
}
'
)
for
dataset
in
match_cfg_file
(
datasets_dir
,
args
.
datasets
):
cfg
=
Config
.
fromfile
(
dataset
[
1
])
get_logger
().
info
(
f
'Loading
{
dataset
[
0
]
}
:
{
dataset
[
1
]
}
'
)
for
k
in
cfg
.
keys
():
cfg
=
Config
.
fromfile
(
dataset
[
1
])
if
k
.
endswith
(
'_datasets'
):
for
k
in
cfg
.
keys
():
datasets
+=
cfg
[
k
]
if
k
.
endswith
(
'_datasets'
):
datasets
+=
cfg
[
k
]
else
:
dataset
=
{
'path'
:
args
.
custom_dataset_path
}
if
args
.
custom_dataset_infer_method
is
not
None
:
dataset
[
'infer_method'
]
=
args
.
custom_dataset_infer_method
if
args
.
custom_dataset_data_type
is
not
None
:
dataset
[
'data_type'
]
=
args
.
custom_dataset_data_type
if
args
.
custom_dataset_meta_path
is
not
None
:
dataset
[
'meta_path'
]
=
args
.
custom_dataset_meta_path
dataset
=
make_custom_dataset_config
(
dataset
)
datasets
.
append
(
dataset
)
# parse model args
if
not
args
.
models
and
not
args
.
hf_path
:
if
not
args
.
models
and
not
args
.
hf_path
:
raise
ValueError
(
'You must specify a config file path, '
raise
ValueError
(
'You must specify a config file path, '
'or specify --models and --datasets, or '
'or specify --models and --datasets, or '
...
@@ -98,7 +118,7 @@ def get_config_from_arg(args) -> Config:
...
@@ -98,7 +118,7 @@ def get_config_from_arg(args) -> Config:
pad_token_id
=
args
.
pad_token_id
,
pad_token_id
=
args
.
pad_token_id
,
run_cfg
=
dict
(
num_gpus
=
args
.
num_gpus
))
run_cfg
=
dict
(
num_gpus
=
args
.
num_gpus
))
models
.
append
(
model
)
models
.
append
(
model
)
# parse summarizer args
summarizer
=
args
.
summarizer
if
args
.
summarizer
is
not
None
else
'example'
summarizer
=
args
.
summarizer
if
args
.
summarizer
is
not
None
else
'example'
summarizers_dir
=
os
.
path
.
join
(
args
.
config_dir
,
'summarizers'
)
summarizers_dir
=
os
.
path
.
join
(
args
.
config_dir
,
'summarizers'
)
s
=
match_cfg_file
(
summarizers_dir
,
[
summarizer
])[
0
]
s
=
match_cfg_file
(
summarizers_dir
,
[
summarizer
])[
0
]
...
...
run.py
View file @
3a68083e
...
@@ -138,6 +138,9 @@ def parse_args():
...
@@ -138,6 +138,9 @@ def parse_args():
# set hf args
# set hf args
hf_parser
=
parser
.
add_argument_group
(
'hf_args'
)
hf_parser
=
parser
.
add_argument_group
(
'hf_args'
)
parse_hf_args
(
hf_parser
)
parse_hf_args
(
hf_parser
)
# set custom dataset args
custom_dataset_parser
=
parser
.
add_argument_group
(
'custom_dataset_args'
)
parse_custom_dataset_args
(
custom_dataset_parser
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
slurm
:
if
args
.
slurm
:
assert
args
.
partition
is
not
None
,
(
assert
args
.
partition
is
not
None
,
(
...
@@ -199,6 +202,18 @@ def parse_hf_args(hf_parser):
...
@@ -199,6 +202,18 @@ def parse_hf_args(hf_parser):
hf_parser
.
add_argument
(
'--pad-token-id'
,
type
=
int
)
hf_parser
.
add_argument
(
'--pad-token-id'
,
type
=
int
)
def
parse_custom_dataset_args
(
custom_dataset_parser
):
"""These args are all for the quick construction of custom datasets."""
custom_dataset_parser
.
add_argument
(
'--custom-dataset-path'
,
type
=
str
)
custom_dataset_parser
.
add_argument
(
'--custom-dataset-meta-path'
,
type
=
str
)
custom_dataset_parser
.
add_argument
(
'--custom-dataset-data-type'
,
type
=
str
,
choices
=
[
'mcq'
,
'qa'
])
custom_dataset_parser
.
add_argument
(
'--custom-dataset-infer-method'
,
type
=
str
,
choices
=
[
'gen'
,
'ppl'
])
def
main
():
def
main
():
args
=
parse_args
()
args
=
parse_args
()
if
args
.
dry_run
:
if
args
.
dry_run
:
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment