Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
32f40a8f
Unverified
Commit
32f40a8f
authored
Jan 08, 2024
by
Fengzhe Zhou
Committed by
GitHub
Jan 08, 2024
Browse files
[Sync] Sync with internal codes 2023.01.08 (#777)
parent
8194199d
Changes
118
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
418 additions
and
44 deletions
+418
-44
opencompass/openicl/icl_evaluator/__init__.py
opencompass/openicl/icl_evaluator/__init__.py
+1
-0
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
+14
-0
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
+8
-0
opencompass/openicl/icl_inferencer/__init__.py
opencompass/openicl/icl_inferencer/__init__.py
+2
-1
opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
opencompass/openicl/icl_inferencer/icl_ll_inferencer.py
+11
-7
opencompass/openicl/icl_inferencer/icl_mink_percent_inferencer.py
...ass/openicl/icl_inferencer/icl_mink_percent_inferencer.py
+189
-0
opencompass/partitioners/__init__.py
opencompass/partitioners/__init__.py
+1
-0
opencompass/partitioners/num_worker.py
opencompass/partitioners/num_worker.py
+128
-0
opencompass/runners/slurm.py
opencompass/runners/slurm.py
+1
-1
opencompass/runners/slurm_sequential.py
opencompass/runners/slurm_sequential.py
+18
-12
opencompass/summarizers/default.py
opencompass/summarizers/default.py
+2
-0
opencompass/summarizers/summarizer_pretrain.py
opencompass/summarizers/summarizer_pretrain.py
+8
-7
opencompass/tasks/openicl_eval.py
opencompass/tasks/openicl_eval.py
+3
-6
opencompass/tasks/openicl_infer.py
opencompass/tasks/openicl_infer.py
+3
-1
opencompass/utils/text_postprocessors.py
opencompass/utils/text_postprocessors.py
+12
-7
requirements/agent.txt
requirements/agent.txt
+6
-1
requirements/runtime.txt
requirements/runtime.txt
+1
-0
tools/update_dataset_suffix.py
tools/update_dataset_suffix.py
+10
-1
No files found.
opencompass/openicl/icl_evaluator/__init__.py
View file @
32f40a8f
...
@@ -5,6 +5,7 @@ from .icl_circular_evaluator import CircularEvaluator # noqa
...
@@ -5,6 +5,7 @@ from .icl_circular_evaluator import CircularEvaluator # noqa
from
.icl_em_evaluator
import
EMEvaluator
# noqa
from
.icl_em_evaluator
import
EMEvaluator
# noqa
from
.icl_hf_evaluator
import
*
# noqa
from
.icl_hf_evaluator
import
*
# noqa
from
.icl_jieba_rouge_evaluator
import
JiebaRougeEvaluator
# noqa
from
.icl_jieba_rouge_evaluator
import
JiebaRougeEvaluator
# noqa
from
.icl_misc_evaluator
import
AverageMinKEvaluator
# noqa
from
.icl_misc_evaluator
import
AveragePPLEvaluator
# noqa
from
.icl_misc_evaluator
import
AveragePPLEvaluator
# noqa
from
.icl_toxic_evaluator
import
ToxicEvaluator
# noqa
from
.icl_toxic_evaluator
import
ToxicEvaluator
# noqa
from
.lm_evaluator
import
LMEvaluator
# noqa
from
.lm_evaluator
import
LMEvaluator
# noqa
opencompass/openicl/icl_evaluator/icl_hf_evaluator.py
View file @
32f40a8f
...
@@ -210,6 +210,20 @@ class BleuEvaluator(HuggingfaceEvaluator):
...
@@ -210,6 +210,20 @@ class BleuEvaluator(HuggingfaceEvaluator):
super
().
__init__
(
metric
=
'sacrebleu'
)
super
().
__init__
(
metric
=
'sacrebleu'
)
class
BleuFloresEvaluator
(
HuggingfaceEvaluator
):
"""Bleu evaluator using flores200 tokenize."""
def
__init__
(
self
)
->
None
:
super
().
__init__
(
metric
=
'sacrebleu'
)
def
_preprocess
(
self
,
predictions
:
List
,
references
:
List
)
->
dict
:
return
{
'predictions'
:
predictions
,
'references'
:
references
,
'tokenize'
:
'flores200'
,
}
@
ICL_EVALUATORS
.
register_module
()
@
ICL_EVALUATORS
.
register_module
()
class
MccEvaluator
(
AccEvaluator
):
class
MccEvaluator
(
AccEvaluator
):
"""Matthews correlation evaluator."""
"""Matthews correlation evaluator."""
...
...
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
View file @
32f40a8f
...
@@ -9,3 +9,11 @@ class AveragePPLEvaluator(BaseEvaluator):
...
@@ -9,3 +9,11 @@ class AveragePPLEvaluator(BaseEvaluator):
def
score
(
self
,
ppl
):
def
score
(
self
,
ppl
):
average_ppl
=
sum
(
ppl
)
/
len
(
ppl
)
average_ppl
=
sum
(
ppl
)
/
len
(
ppl
)
return
{
'average_ppl'
:
average_ppl
}
return
{
'average_ppl'
:
average_ppl
}
@
ICL_EVALUATORS
.
register_module
()
class
AverageMinKEvaluator
(
BaseEvaluator
):
def
score
(
self
,
mink
):
average_mink
=
sum
(
mink
)
/
len
(
mink
)
return
{
'average_mink'
:
average_mink
}
opencompass/openicl/icl_inferencer/__init__.py
View file @
32f40a8f
...
@@ -4,7 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa
...
@@ -4,7 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa
from
.icl_chat_inferencer
import
ChatInferencer
# noqa
from
.icl_chat_inferencer
import
ChatInferencer
# noqa
from
.icl_clp_inferencer
import
CLPInferencer
# noqa
from
.icl_clp_inferencer
import
CLPInferencer
# noqa
from
.icl_gen_inferencer
import
GenInferencer
# noqa
from
.icl_gen_inferencer
import
GenInferencer
# noqa
from
.icl_loglikelihood_inferencer
import
LoglikelihoodInferencer
# noqa
from
.icl_ll_inferencer
import
LLInferencer
# noqa
from
.icl_mink_percent_inferencer
import
MinKPercentInferencer
# noqa
from
.icl_ppl_inferencer
import
PPLInferencer
# noqa
from
.icl_ppl_inferencer
import
PPLInferencer
# noqa
from
.icl_ppl_only_inferencer
import
PPLOnlyInferencer
# noqa
from
.icl_ppl_only_inferencer
import
PPLOnlyInferencer
# noqa
from
.icl_sc_inferencer
import
SCInferencer
# noqa
from
.icl_sc_inferencer
import
SCInferencer
# noqa
...
...
opencompass/openicl/icl_inferencer/icl_l
oglikelihood
_inferencer.py
→
opencompass/openicl/icl_inferencer/icl_l
l
_inferencer.py
View file @
32f40a8f
...
@@ -18,7 +18,7 @@ logger = get_logger(__name__)
...
@@ -18,7 +18,7 @@ logger = get_logger(__name__)
@
ICL_INFERENCERS
.
register_module
()
@
ICL_INFERENCERS
.
register_module
()
class
L
oglikelihood
Inferencer
(
BaseInferencer
):
class
L
L
Inferencer
(
BaseInferencer
):
"""Loglikelihood Inferencer class to evaluate by loglikelihood.
"""Loglikelihood Inferencer class to evaluate by loglikelihood.
Attributes:
Attributes:
...
@@ -60,7 +60,7 @@ class LoglikelihoodInferencer(BaseInferencer):
...
@@ -60,7 +60,7 @@ class LoglikelihoodInferencer(BaseInferencer):
output_json_filepath
:
Optional
[
str
]
=
None
,
output_json_filepath
:
Optional
[
str
]
=
None
,
output_json_filename
:
Optional
[
str
]
=
None
)
->
List
:
output_json_filename
:
Optional
[
str
]
=
None
)
->
List
:
# 1. Preparation for output logs
# 1. Preparation for output logs
output_handler
=
L
oglikelihood
InferencerOutputHandler
()
output_handler
=
L
L
InferencerOutputHandler
()
sub_predictions
=
[]
sub_predictions
=
[]
ppl
=
[]
ppl
=
[]
...
@@ -126,8 +126,10 @@ class LoglikelihoodInferencer(BaseInferencer):
...
@@ -126,8 +126,10 @@ class LoglikelihoodInferencer(BaseInferencer):
token_num_list
.
append
(
prompt_token_num
)
token_num_list
.
append
(
prompt_token_num
)
cont_list
.
append
(
retriever
.
test_ds
[
idx
][
'cont'
])
cont_list
.
append
(
retriever
.
test_ds
[
idx
][
'cont'
])
# 5.2 Get PPL
# 5.2 Get loglikelihood
logger
.
info
(
f
"Calculating PPL for prompts labeled '
{
label
}
'"
)
logger
.
info
(
f
"Calculating Loglikelihood for prompts labeled '
{
label
}
'"
)
# noqa
for
idx
in
trange
(
0
,
for
idx
in
trange
(
0
,
len
(
prompt_list
),
len
(
prompt_list
),
self
.
batch_size
,
self
.
batch_size
,
...
@@ -137,8 +139,10 @@ class LoglikelihoodInferencer(BaseInferencer):
...
@@ -137,8 +139,10 @@ class LoglikelihoodInferencer(BaseInferencer):
with
torch
.
no_grad
():
with
torch
.
no_grad
():
# mainly modify compared to PPLInferencer
# mainly modify compared to PPLInferencer
sub_res
=
self
.
model
.
get_loglikelihood_from_template
(
sub_inputs
=
self
.
model
.
parse_template
(
sub_prompt_list
,
sub_prompt_list
,
sub_cont_list
).
tolist
()
mode
=
'ppl'
)
sub_res
=
self
.
model
.
get_loglikelihood
(
sub_inputs
,
sub_cont_list
).
tolist
()
for
res
,
prompt
in
zip
(
for
res
,
prompt
in
zip
(
sub_res
,
sub_res
,
self
.
model
.
parse_template
(
sub_prompt_list
,
self
.
model
.
parse_template
(
sub_prompt_list
,
...
@@ -174,7 +178,7 @@ class LoglikelihoodInferencer(BaseInferencer):
...
@@ -174,7 +178,7 @@ class LoglikelihoodInferencer(BaseInferencer):
]
]
class
L
oglikelihood
InferencerOutputHandler
:
class
L
L
InferencerOutputHandler
:
results_dict
=
{}
results_dict
=
{}
def
__init__
(
self
)
->
None
:
def
__init__
(
self
)
->
None
:
...
...
opencompass/openicl/icl_inferencer/icl_mink_percent_inferencer.py
0 → 100644
View file @
32f40a8f
"""PPL Inferencer."""
import
os
from
typing
import
List
,
Optional
import
mmengine
import
torch
from
tqdm
import
tqdm
from
opencompass.models.base
import
BaseModel
from
opencompass.registry
import
ICL_INFERENCERS
from
..icl_prompt_template
import
PromptTemplate
from
..icl_retriever
import
BaseRetriever
from
..utils
import
get_logger
from
.icl_base_inferencer
import
BaseInferencer
,
dump_results_dict
logger
=
get_logger
(
__name__
)
@
ICL_INFERENCERS
.
register_module
()
class
MinKPercentInferencer
(
BaseInferencer
):
"""PPLOnlyInferencer class to calculate PPL and PPL only, no choice is
made. This Inferencer is usually used along with AveragePPLEvaluator.
Attributes:
model (:obj:`BaseModel`, optional): The module to inference.
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
the LM.
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
output_json_filepath (:obj:`str`, optional): File path for output
`JSON` file.
output_json_filename (:obj:`str`, optional): File name for output
`JSON` file.
save_every (:obj:`int`, optional): Save intermediate results every
"""
def
__init__
(
self
,
model
:
BaseModel
,
max_seq_len
:
Optional
[
int
]
=
None
,
batch_size
:
Optional
[
int
]
=
1
,
output_json_filepath
:
Optional
[
str
]
=
'./icl_inference_output'
,
output_json_filename
:
Optional
[
str
]
=
'predictions'
,
save_every
:
Optional
[
int
]
=
1
,
**
kwargs
)
->
None
:
super
().
__init__
(
model
=
model
,
max_seq_len
=
max_seq_len
,
batch_size
=
batch_size
,
output_json_filename
=
output_json_filename
,
output_json_filepath
=
output_json_filepath
,
**
kwargs
,
)
self
.
save_every
=
save_every
def
inference
(
self
,
retriever
:
BaseRetriever
,
ice_template
:
Optional
[
PromptTemplate
]
=
None
,
prompt_template
:
Optional
[
PromptTemplate
]
=
None
,
output_json_filepath
:
Optional
[
str
]
=
None
,
output_json_filename
:
Optional
[
str
]
=
None
)
->
List
:
# 1. Preparation for output logs
output_handler
=
PPLOnlyInferencerOutputHandler
()
if
output_json_filepath
is
None
:
output_json_filepath
=
self
.
output_json_filepath
if
output_json_filename
is
None
:
output_json_filename
=
self
.
output_json_filename
# 2. Get results of retrieval process
ice_idx_list
=
retriever
.
retrieve
()
# 3. Generate prompts for testing input
prompt_list
=
self
.
get_generation_prompt_list_from_retriever_indices
(
ice_idx_list
,
retriever
,
max_seq_len
=
self
.
max_seq_len
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
# 3.1 Fetch and zip prompt & gold answer if output column exists
ds_reader
=
retriever
.
dataset_reader
assert
ds_reader
.
output_column
is
None
,
(
'PPLOnlyInferencer supports `output_column=None` only.'
)
# Create tmp json file for saving intermediate results and future
# resuming
index
=
0
tmp_json_filepath
=
os
.
path
.
join
(
output_json_filepath
,
'tmp_'
+
output_json_filename
)
if
os
.
path
.
exists
(
tmp_json_filepath
):
# TODO: move resume to output handler
try
:
tmp_result_dict
=
mmengine
.
load
(
tmp_json_filepath
)
except
Exception
:
pass
else
:
output_handler
.
results_dict
=
tmp_result_dict
index
=
len
(
tmp_result_dict
)
# 4. Wrap prompts with Dataloader
dataloader
=
self
.
get_dataloader
(
prompt_list
[
index
:],
self
.
batch_size
)
# 5. Inference for prompts in each batch
logger
.
info
(
'Starting inference process...'
)
for
datum
in
tqdm
(
dataloader
,
disable
=
not
self
.
is_main_process
):
entry
=
datum
# 5-1. Inference with local model
with
torch
.
no_grad
():
sub_inputs
=
self
.
model
.
parse_template
(
entry
,
mode
=
'ppl'
)
minks
=
self
.
model
.
get_mink_percent
(
sub_inputs
).
tolist
()
parsed_entries
=
self
.
model
.
parse_template
(
entry
,
mode
=
'gen'
)
# 5-3. Save current output
for
prompt
,
mink
,
in
zip
(
parsed_entries
,
minks
):
output_handler
.
save_results
(
prompt
,
mink
,
index
)
index
=
index
+
1
# 5-4. Save intermediate results
if
(
self
.
save_every
is
not
None
and
index
%
self
.
save_every
==
0
and
self
.
is_main_process
):
output_handler
.
write_to_json
(
output_json_filepath
,
'tmp_'
+
output_json_filename
)
# 6. Output
if
self
.
is_main_process
:
os
.
makedirs
(
output_json_filepath
,
exist_ok
=
True
)
output_handler
.
write_to_json
(
output_json_filepath
,
output_json_filename
)
if
os
.
path
.
exists
(
tmp_json_filepath
):
os
.
remove
(
tmp_json_filepath
)
return
[
sample
[
'mink'
]
for
sample
in
output_handler
.
results_dict
.
values
()
]
def
get_generation_prompt_list_from_retriever_indices
(
self
,
ice_idx_list
:
List
[
List
[
int
]],
retriever
:
BaseRetriever
,
max_seq_len
:
Optional
[
int
]
=
None
,
ice_template
:
Optional
[
PromptTemplate
]
=
None
,
prompt_template
:
Optional
[
PromptTemplate
]
=
None
):
prompt_list
=
[]
for
idx
,
ice_idx
in
enumerate
(
ice_idx_list
):
ice
=
retriever
.
generate_ice
(
ice_idx
,
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_prompt_for_generate_task
(
idx
,
ice
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
if
max_seq_len
is
not
None
:
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'gen'
)
while
len
(
ice_idx
)
>
0
and
prompt_token_num
>
max_seq_len
:
ice_idx
=
ice_idx
[:
-
1
]
ice
=
retriever
.
generate_ice
(
ice_idx
,
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_prompt_for_generate_task
(
idx
,
ice
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'gen'
)
prompt_list
.
append
(
prompt
)
return
prompt_list
class
PPLOnlyInferencerOutputHandler
:
origin_prompt_dict
=
{}
output_dict
=
{}
results_dict
=
{}
def
__init__
(
self
)
->
None
:
self
.
results_dict
=
{}
def
write_to_json
(
self
,
save_dir
:
str
,
filename
:
str
):
"""Dump the result to a json file."""
dump_results_dict
(
self
.
results_dict
,
os
.
path
.
join
(
save_dir
,
filename
))
def
save_results
(
self
,
origin_prompt
,
mink
,
idx
):
self
.
results_dict
[
str
(
idx
)]
=
{
'origin_prompt'
:
origin_prompt
,
'mink'
:
mink
,
}
opencompass/partitioners/__init__.py
View file @
32f40a8f
from
.mm_naive
import
*
# noqa: F401, F403
from
.mm_naive
import
*
# noqa: F401, F403
from
.naive
import
*
# noqa: F401, F403
from
.naive
import
*
# noqa: F401, F403
from
.num_worker
import
*
# noqa: F401, F403
from
.size
import
*
# noqa: F401, F403
from
.size
import
*
# noqa: F401, F403
opencompass/partitioners/num_worker.py
0 → 100644
View file @
32f40a8f
import
copy
import
math
import
os.path
as
osp
from
typing
import
Dict
,
List
,
Optional
import
mmengine
from
mmengine.config
import
Config
,
ConfigDict
from
opencompass.registry
import
PARTITIONERS
from
opencompass.utils
import
(
build_dataset_from_cfg
,
dataset_abbr_from_cfg
,
get_infer_output_path
)
from
.base
import
BasePartitioner
@
PARTITIONERS
.
register_module
()
class
NumWorkerPartitioner
(
BasePartitioner
):
"""Task partitioner based on the pre-defined number of workers.
Args:
out_dir (str): The output directory of tasks.
num_worker (int): The number of workers. default: 8.
min_task_size (int): The minimum size of a task. default: 16.
dataset_size_path (str): The path to the dataset size cache file.
keep_keys (list[str]): The keys to be kept from the experiment config
to the task config.
"""
def
__init__
(
self
,
out_dir
:
str
,
num_worker
:
int
=
8
,
min_task_size
:
int
=
16
,
dataset_size_path
:
str
=
'.cache/dataset_size.json'
,
keep_keys
:
Optional
[
List
[
str
]]
=
None
):
super
().
__init__
(
out_dir
=
out_dir
,
keep_keys
=
keep_keys
)
self
.
num_worker
=
num_worker
self
.
min_task_size
=
min_task_size
self
.
dataset_size_path
=
dataset_size_path
def
partition
(
self
,
model_dataset_combinations
:
List
[
Dict
[
str
,
List
]],
work_dir
:
str
,
out_dir
:
str
,
add_cfg
:
Dict
=
{})
->
List
[
ConfigDict
]:
# intentionally avoid any sort here,
# for user's abaility to manipulate the order
tasks
=
[]
for
comb
in
model_dataset_combinations
:
for
model
in
comb
[
'models'
]:
chunks
=
[]
for
dataset
in
comb
[
'datasets'
]:
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
# skip the task if the task output exists
if
osp
.
exists
(
filename
):
continue
dataset_size
=
self
.
get_size
(
dataset
)
if
dataset_size
>
self
.
min_task_size
:
root
,
ext
=
osp
.
splitext
(
filename
)
dataset_splits
=
self
.
split_dataset
(
dataset
)
for
i
,
dataset_split
in
enumerate
(
dataset_splits
):
if
not
osp
.
exists
(
f
'
{
root
}
_
{
i
}{
ext
}
'
):
chunks
.
append
(
dataset_split
)
else
:
chunks
.
append
(
dataset
)
buckets
=
[[]
for
_
in
range
(
self
.
num_worker
)]
for
i
,
chunk
in
enumerate
(
chunks
):
buckets
[
i
%
self
.
num_worker
].
append
(
chunk
)
for
bucket
in
buckets
:
if
len
(
bucket
)
>
0
:
tasks
.
append
(
Config
({
'models'
:
[
model
],
'datasets'
:
[
bucket
],
'work_dir'
:
work_dir
,
**
add_cfg
}))
return
tasks
@
property
def
dataset_size
(
self
):
if
not
hasattr
(
self
,
'_dataset_size'
):
if
osp
.
exists
(
self
.
dataset_size_path
):
self
.
_dataset_size
=
mmengine
.
load
(
self
.
dataset_size_path
)
else
:
self
.
_dataset_size
=
{}
return
self
.
_dataset_size
def
split_dataset
(
self
,
dataset_cfg
:
ConfigDict
)
->
List
[
ConfigDict
]:
"""Split dataset into several parts."""
dataset_size
=
self
.
get_size
(
dataset_cfg
)
split_configs
=
[]
abbr
=
dataset_abbr_from_cfg
(
dataset_cfg
)
# evenly distribute the task
num_split
=
self
.
num_worker
step
=
max
(
math
.
ceil
(
dataset_size
/
num_split
),
self
.
min_task_size
)
for
part
,
i
in
enumerate
(
range
(
0
,
dataset_size
,
step
)):
cfg
=
copy
.
deepcopy
(
dataset_cfg
)
cfg
[
'abbr'
]
=
abbr
+
f
'_
{
part
}
'
test_range
=
cfg
[
'reader_cfg'
].
get
(
'test_range'
,
''
)
cfg
[
'reader_cfg'
][
'test_range'
]
=
f
'
{
test_range
}
[
{
i
}
:
{
i
+
step
}
]'
split_configs
.
append
(
cfg
)
return
split_configs
def
get_size
(
self
,
dataset
:
ConfigDict
)
->
int
:
dataset_abbr
=
dataset_abbr_from_cfg
(
dataset
)
test_range
=
dataset
.
reader_cfg
.
get
(
'test_range'
,
''
)
if
dataset_abbr
in
self
.
dataset_size
:
actual_size
=
eval
(
'len(range(self.dataset_size[dataset_abbr])'
f
'
{
test_range
}
)'
)
return
actual_size
dataset
=
build_dataset_from_cfg
(
dataset
)
self
.
dataset_size
[
dataset_abbr
]
=
len
(
dataset
.
test
)
mmengine
.
mkdir_or_exist
(
'.cache/'
)
mmengine
.
dump
(
self
.
dataset_size
,
self
.
dataset_size_path
,
indent
=
4
,
ensure_ascii
=
False
)
actual_size
=
eval
(
'len(range(self.dataset_size[dataset_abbr])'
f
'
{
test_range
}
)'
)
return
actual_size
opencompass/runners/slurm.py
View file @
32f40a8f
...
@@ -110,7 +110,7 @@ class SlurmRunner(BaseRunner):
...
@@ -110,7 +110,7 @@ class SlurmRunner(BaseRunner):
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
for
extra_cmd
in
self
.
extra_command
:
for
extra_cmd
in
self
.
extra_command
:
tmpl
+=
f
'
{
extra_cmd
}
'
tmpl
+=
f
'
{
extra_cmd
}
'
tmpl
+=
f
" -N1 -J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
tmpl
+=
f
" -N1
-u
-J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
get_cmd
=
partial
(
task
.
get_command
,
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
cfg_path
=
param_file
,
template
=
tmpl
)
template
=
tmpl
)
...
...
opencompass/runners/slurm_sequential.py
View file @
32f40a8f
...
@@ -140,17 +140,23 @@ class SlurmSequentialRunner(BaseRunner):
...
@@ -140,17 +140,23 @@ class SlurmSequentialRunner(BaseRunner):
tbar
=
tqdm
(
total
=
len
(
job_ids
),
desc
=
'clear sruns'
)
tbar
=
tqdm
(
total
=
len
(
job_ids
),
desc
=
'clear sruns'
)
for
batched_job_ids
in
batched
(
job_ids
,
4
):
for
batched_job_ids
in
batched
(
job_ids
,
4
):
ps
=
[]
while
True
:
for
job_id
in
batched_job_ids
:
ps
=
[]
tbar
.
update
()
try
:
if
job_id
is
None
:
for
job_id
in
batched_job_ids
:
continue
tbar
.
update
()
cmd
=
f
'scancel
{
job_id
}
'
if
job_id
is
None
:
p
=
subprocess
.
Popen
(
cmd
,
continue
shell
=
True
,
cmd
=
f
'scancel
{
job_id
}
'
stdout
=
subprocess
.
PIPE
,
p
=
subprocess
.
Popen
(
cmd
,
stderr
=
subprocess
.
STDOUT
)
shell
=
True
,
ps
.
append
(
p
)
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
ps
.
append
(
p
)
break
except
KeyboardInterrupt
:
logger
=
get_logger
()
logger
.
error
(
'Ignoring KeyboardInterrupt...'
)
for
p
in
ps
:
for
p
in
ps
:
p
.
wait
()
p
.
wait
()
tbar
.
close
()
tbar
.
close
()
...
@@ -182,7 +188,7 @@ class SlurmSequentialRunner(BaseRunner):
...
@@ -182,7 +188,7 @@ class SlurmSequentialRunner(BaseRunner):
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
for
extra_cmd
in
self
.
extra_command
:
for
extra_cmd
in
self
.
extra_command
:
tmpl
+=
f
'
{
extra_cmd
}
'
tmpl
+=
f
'
{
extra_cmd
}
'
tmpl
+=
f
" -N1 -J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
tmpl
+=
f
" -N1
-u
-J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
get_cmd
=
partial
(
task
.
get_command
,
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
cfg_path
=
param_file
,
template
=
tmpl
)
template
=
tmpl
)
...
...
opencompass/summarizers/default.py
View file @
32f40a8f
...
@@ -127,6 +127,8 @@ class DefaultSummarizer:
...
@@ -127,6 +127,8 @@ class DefaultSummarizer:
dataset_eval_mode
[
dataset_abbr
]
=
'gen'
dataset_eval_mode
[
dataset_abbr
]
=
'gen'
elif
'PPLInferencer'
in
inferencer
:
elif
'PPLInferencer'
in
inferencer
:
dataset_eval_mode
[
dataset_abbr
]
=
'ppl'
dataset_eval_mode
[
dataset_abbr
]
=
'ppl'
elif
'LLInferencer'
in
inferencer
:
dataset_eval_mode
[
dataset_abbr
]
=
'll'
else
:
else
:
dataset_eval_mode
[
dataset_abbr
]
=
'unknown'
dataset_eval_mode
[
dataset_abbr
]
=
'unknown'
self
.
logger
.
warning
(
f
'unknown inferencer:
{
inferencer
}
-
{
dataset_abbr
}
'
)
self
.
logger
.
warning
(
f
'unknown inferencer:
{
inferencer
}
-
{
dataset_abbr
}
'
)
...
...
opencompass/summarizers/summarizer_pretrain.py
View file @
32f40a8f
...
@@ -164,8 +164,11 @@ class PretrainSummarizer:
...
@@ -164,8 +164,11 @@ class PretrainSummarizer:
time
=
now
.
strftime
(
'%m/%d %H:%M'
)
time
=
now
.
strftime
(
'%m/%d %H:%M'
)
times
=
[
time
]
*
len
(
model_abbrs
)
times
=
[
time
]
*
len
(
model_abbrs
)
table
.
append
(
header
)
table
.
append
(
header
)
table
.
append
([
'dataset'
,
'version'
,
'metric'
,
'mode'
]
+
times
)
table
.
append
([
'time'
,
'version'
,
'metric'
,
'mode'
]
+
times
)
table
.
append
([
'dataset'
,
'version'
,
'metric'
,
'mode'
]
+
checkpoints
)
table
.
append
([
'checkpoint'
,
'version'
,
'metric'
,
'mode'
]
+
checkpoints
)
# check long bench
max_seq_lens
=
[
str
(
model_cfg
.
max_seq_len
)
for
model_cfg
in
model_cfgs
]
table
.
append
([
'max_seq_len'
,
'version'
,
'metric'
,
'mode'
]
+
max_seq_lens
)
dataset_score
=
[
0
]
*
len
(
model_abbrs
)
dataset_score
=
[
0
]
*
len
(
model_abbrs
)
dataset_num
=
[
0
]
*
len
(
model_abbrs
)
dataset_num
=
[
0
]
*
len
(
model_abbrs
)
...
@@ -187,11 +190,9 @@ class PretrainSummarizer:
...
@@ -187,11 +190,9 @@ class PretrainSummarizer:
row
=
[
dataset_abbr
,
prompt_version
.
get
(
dataset_abbr
,
'-'
),
metric
,
dataset_eval_mode
.
get
(
dataset_abbr
,
'-'
)]
row
=
[
dataset_abbr
,
prompt_version
.
get
(
dataset_abbr
,
'-'
),
metric
,
dataset_eval_mode
.
get
(
dataset_abbr
,
'-'
)]
for
i
,
model_abbr
in
enumerate
(
model_abbrs
):
for
i
,
model_abbr
in
enumerate
(
model_abbrs
):
if
dataset_abbr
in
parsed_results
[
model_abbr
]:
if
dataset_abbr
in
parsed_results
[
model_abbr
]:
if
index
==
0
:
row
.
append
(
'{:.02f}'
.
format
(
parsed_results
[
model_abbr
][
dataset_abbr
][
index
]))
row
.
append
(
'{:.02f}'
.
format
(
parsed_results
[
model_abbr
][
dataset_abbr
][
index
]))
dataset_score
[
i
]
+=
parsed_results
[
model_abbr
][
dataset_abbr
][
index
]
dataset_score
[
i
]
+=
parsed_results
[
model_abbr
][
dataset_abbr
][
index
]
dataset_num
[
i
]
+=
1
dataset_num
[
i
]
+=
1
# row.append('{:.02f}'.format(parsed_results[model_abbr][dataset_abbr][index]))
else
:
else
:
if
dataset_abbr
.
startswith
(
'---'
)
and
dataset_num
[
i
]
!=
0
:
if
dataset_abbr
.
startswith
(
'---'
)
and
dataset_num
[
i
]
!=
0
:
row
.
append
(
'{:.02f}'
.
format
(
dataset_score
[
i
]
/
dataset_num
[
i
]))
row
.
append
(
'{:.02f}'
.
format
(
dataset_score
[
i
]
/
dataset_num
[
i
]))
...
...
opencompass/tasks/openicl_eval.py
View file @
32f40a8f
...
@@ -216,8 +216,8 @@ class OpenICLEvalTask(BaseTask):
...
@@ -216,8 +216,8 @@ class OpenICLEvalTask(BaseTask):
result
=
icl_evaluator
.
score
(
**
preds
)
result
=
icl_evaluator
.
score
(
**
preds
)
if
self
.
dump_details
:
if
self
.
dump_details
:
details
=
result
.
get
(
'details'
,
None
)
try
:
try
:
details
=
result
.
pop
(
'details'
,
None
)
result
[
'details'
]
=
self
.
format_details
(
result
[
'details'
]
=
self
.
format_details
(
pred_strs
,
test_set
[
self
.
output_column
],
details
,
pred_strs
,
test_set
[
self
.
output_column
],
details
,
pred_dicts
)
pred_dicts
)
...
@@ -225,13 +225,10 @@ class OpenICLEvalTask(BaseTask):
...
@@ -225,13 +225,10 @@ class OpenICLEvalTask(BaseTask):
if
'PPL'
in
str
(
if
'PPL'
in
str
(
self
.
dataset_cfg
.
infer_cfg
.
inferencer
.
type
):
self
.
dataset_cfg
.
infer_cfg
.
inferencer
.
type
):
result
[
'correct_bpb'
],
result
[
result
[
'correct_bpb'
],
result
[
'incorrect_bpb'
]
=
\
'incorrect_bpb'
]
=
self
.
calculate_bpb
(
pred_dicts
)
self
.
calculate_bpb
(
pred_dicts
)
else
:
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
except
Exception
as
e
:
except
Exception
as
e
:
self
.
logger
.
warning
(
f
'Skip dumping details due to:
{
e
}
.'
)
self
.
logger
.
warning
(
f
'Skip dumping details due to:
{
e
}
.'
)
result
[
'incorrect_bpb'
]
=
result
[
'correct_bpb'
]
=
-
1
else
:
else
:
result
.
pop
(
'details'
,
None
)
result
.
pop
(
'details'
,
None
)
...
...
opencompass/tasks/openicl_infer.py
View file @
32f40a8f
...
@@ -43,7 +43,9 @@ class OpenICLInferTask(BaseTask):
...
@@ -43,7 +43,9 @@ class OpenICLInferTask(BaseTask):
the command.
the command.
"""
"""
script_path
=
__file__
script_path
=
__file__
if
self
.
num_gpus
>
0
:
has_vllm
=
(
'VLLM'
in
str
(
self
.
model_cfgs
[
0
].
get
(
'type'
,
''
)))
or
\
'VLLM'
in
str
(
self
.
model_cfgs
[
0
].
get
(
'llm'
,
{}).
get
(
'type'
,
''
))
if
self
.
num_gpus
>
0
and
not
has_vllm
:
port
=
random
.
randint
(
12000
,
32000
)
port
=
random
.
randint
(
12000
,
32000
)
command
=
(
f
'torchrun --master_port=
{
port
}
'
command
=
(
f
'torchrun --master_port=
{
port
}
'
f
'--nproc_per_node
{
self
.
num_procs
}
'
f
'--nproc_per_node
{
self
.
num_procs
}
'
...
...
opencompass/utils/text_postprocessors.py
View file @
32f40a8f
...
@@ -57,7 +57,7 @@ def last_capital_postprocess(text: str) -> str:
...
@@ -57,7 +57,7 @@ def last_capital_postprocess(text: str) -> str:
return
''
return
''
def
first_option_postprocess
(
text
:
str
,
options
:
str
)
->
str
:
def
first_option_postprocess
(
text
:
str
,
options
:
str
,
cushion
=
True
)
->
str
:
"""Find first valid option for text."""
"""Find first valid option for text."""
# yapf: disable
# yapf: disable
...
@@ -91,26 +91,31 @@ def first_option_postprocess(text: str, options: str) -> str:
...
@@ -91,26 +91,31 @@ def first_option_postprocess(text: str, options: str) -> str:
f
'[是为。]\s?([
{
options
}
])[。\.]?$'
,
f
'[是为。]\s?([
{
options
}
])[。\.]?$'
,
f
'因此\s?([
{
options
}
])[。\.]?$'
,
f
'因此\s?([
{
options
}
])[。\.]?$'
,
f
'显然\s?([
{
options
}
])[。\.]?$'
,
f
'显然\s?([
{
options
}
])[。\.]?$'
,
f
'1.\s?(.*?)$'
,
f
'答案是\s?(\S+)(?:。|$)'
,
f
'答案是\s?(\S+)(?:。|$)'
,
f
'答案应该是\s?(\S+)(?:。|$)'
,
f
'答案应该是\s?(\S+)(?:。|$)'
,
f
'答案为\s?(\S+)(?:。|$)'
,
f
'答案为\s?(\S+)(?:。|$)'
,
f
'(\s|^)[
{
options
}
][\s。,,::\.$]'
,
f
'[Tt]he answer is ([
{
options
}
])'
,
f
'[Tt]he answer is ([
{
options
}
])'
,
f
'[Tt]he answer is option ([
{
options
}
])'
,
f
'[Tt]he answer is option ([
{
options
}
])'
,
f
'[Tt]he correct answer is ([
{
options
}
])'
,
f
'[Tt]he correct answer is ([
{
options
}
])'
,
f
'[Tt]he correct answer is option ([
{
options
}
])'
,
f
'[Tt]he correct answer is option ([
{
options
}
])'
,
f
'[Tt]he answer to the question is ([
{
options
}
])'
,
f
'[Tt]he answer to the question is ([
{
options
}
])'
,
f
'^选项\s?([
{
options
}
])'
,
f
'^([
{
options
}
])\s?选?项'
,
f
'(\s|^)[
{
options
}
][\s。,,::\.$]'
,
f
'(\s|^)[
{
options
}
](\s|$)'
,
f
'1.\s?(.*?)$'
,
]
cushion_patterns
=
[
f
'([
{
options
}
]):'
,
f
'([
{
options
}
]):'
,
f
'(^|\s)[
{
options
}
](\s|$)'
,
f
'[
{
options
}
]'
,
f
'[
{
options
}
]'
,
]
]
# flake8: noqa
# flake8: noqa
# yapf: enable
# yapf: enable
regexes
=
[
re
.
compile
(
pattern
)
for
pattern
in
patterns
]
if
cushion
:
for
regex
in
regexes
:
patterns
.
extend
(
cushion_patterns
)
match
=
regex
.
search
(
text
)
for
pattern
in
patterns
:
match
=
re
.
search
(
pattern
,
text
)
if
match
:
if
match
:
outputs
=
match
.
group
(
0
)
outputs
=
match
.
group
(
0
)
for
i
in
options
:
for
i
in
options
:
...
...
requirements/agent.txt
View file @
32f40a8f
antlr4-python3-runtime==4.11
git+ssh://git@gitlab.pjlab.org.cn:1122/openmmlab/bigmodel/ilagent.git@czh/eval_gen
ipykernel
ipython
json5
json5
jupyter
jupyter
jupyter_client
jupyter_client
jupytext
jupytext
lagent
lagent
networkx
scikit-image
scikit-image
sympy
sympy
==1.12
requirements/runtime.txt
View file @
32f40a8f
...
@@ -7,6 +7,7 @@ datasets>=2.12.0
...
@@ -7,6 +7,7 @@ datasets>=2.12.0
einops==0.5.0
einops==0.5.0
evaluate>=0.3.0
evaluate>=0.3.0
fairscale
fairscale
func_timeout
fuzzywuzzy
fuzzywuzzy
jieba
jieba
ltp
ltp
...
...
tools/update_dataset_suffix.py
View file @
32f40a8f
...
@@ -30,6 +30,14 @@ def get_prompt_hash(dataset_cfg: Union[ConfigDict, List[ConfigDict]]) -> str:
...
@@ -30,6 +30,14 @@ def get_prompt_hash(dataset_cfg: Union[ConfigDict, List[ConfigDict]]) -> str:
hashes
=
','
.
join
([
get_prompt_hash
(
cfg
)
for
cfg
in
dataset_cfg
])
hashes
=
','
.
join
([
get_prompt_hash
(
cfg
)
for
cfg
in
dataset_cfg
])
hash_object
=
hashlib
.
sha256
(
hashes
.
encode
())
hash_object
=
hashlib
.
sha256
(
hashes
.
encode
())
return
hash_object
.
hexdigest
()
return
hash_object
.
hexdigest
()
# for custom datasets
if
'infer_cfg'
not
in
dataset_cfg
:
dataset_cfg
.
pop
(
'abbr'
,
''
)
dataset_cfg
.
pop
(
'path'
,
''
)
d_json
=
json
.
dumps
(
dataset_cfg
.
to_dict
(),
sort_keys
=
True
)
hash_object
=
hashlib
.
sha256
(
d_json
.
encode
())
return
hash_object
.
hexdigest
()
# for regular datasets
if
'reader_cfg'
in
dataset_cfg
.
infer_cfg
:
if
'reader_cfg'
in
dataset_cfg
.
infer_cfg
:
# new config
# new config
reader_cfg
=
dict
(
type
=
'DatasetReader'
,
reader_cfg
=
dict
(
type
=
'DatasetReader'
,
...
@@ -67,7 +75,7 @@ def get_hash(path):
...
@@ -67,7 +75,7 @@ def get_hash(path):
def
check_and_rename
(
filepath
):
def
check_and_rename
(
filepath
):
base_name
=
os
.
path
.
basename
(
filepath
)
base_name
=
os
.
path
.
basename
(
filepath
)
match
=
re
.
match
(
r
'(.*)_(gen|ppl)_(.*).py'
,
base_name
)
match
=
re
.
match
(
r
'(.*)_(gen|ppl
|ll
)_(.*).py'
,
base_name
)
if
match
:
if
match
:
dataset
,
mode
,
old_hash
=
match
.
groups
()
dataset
,
mode
,
old_hash
=
match
.
groups
()
new_hash
=
get_hash
(
filepath
)
new_hash
=
get_hash
(
filepath
)
...
@@ -119,6 +127,7 @@ def main():
...
@@ -119,6 +127,7 @@ def main():
return
return
with
Pool
(
16
)
as
p
:
with
Pool
(
16
)
as
p
:
p
.
starmap
(
os
.
rename
,
name_pairs
)
p
.
starmap
(
os
.
rename
,
name_pairs
)
root_folder
=
'configs'
python_files
=
glob
.
glob
(
f
'
{
root_folder
}
/**/*.py'
,
recursive
=
True
)
python_files
=
glob
.
glob
(
f
'
{
root_folder
}
/**/*.py'
,
recursive
=
True
)
update_data
=
[(
python_file
,
name_pairs
)
for
python_file
in
python_files
]
update_data
=
[(
python_file
,
name_pairs
)
for
python_file
in
python_files
]
with
Pool
(
16
)
as
p
:
with
Pool
(
16
)
as
p
:
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment