Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
e78857ac
Unverified
Commit
e78857ac
authored
Dec 11, 2023
by
Hubert
Committed by
GitHub
Dec 11, 2023
Browse files
[Sync] minor test (#683)
parent
dd4318f6
Changes
57
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
596 additions
and
97 deletions
+596
-97
opencompass/models/openai_api.py
opencompass/models/openai_api.py
+2
-0
opencompass/openicl/icl_evaluator/__init__.py
opencompass/openicl/icl_evaluator/__init__.py
+1
-0
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
+11
-0
opencompass/openicl/icl_inferencer/__init__.py
opencompass/openicl/icl_inferencer/__init__.py
+2
-0
opencompass/openicl/icl_inferencer/icl_agent_inferencer.py
opencompass/openicl/icl_inferencer/icl_agent_inferencer.py
+6
-5
opencompass/openicl/icl_inferencer/icl_chat_inferencer.py
opencompass/openicl/icl_inferencer/icl_chat_inferencer.py
+11
-7
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
+8
-1
opencompass/openicl/icl_inferencer/icl_loglikelihood_inferencer.py
...ss/openicl/icl_inferencer/icl_loglikelihood_inferencer.py
+215
-0
opencompass/openicl/icl_inferencer/icl_ppl_only_inferencer.py
...compass/openicl/icl_inferencer/icl_ppl_only_inferencer.py
+188
-0
opencompass/partitioners/base.py
opencompass/partitioners/base.py
+44
-7
opencompass/partitioners/naive.py
opencompass/partitioners/naive.py
+21
-19
opencompass/partitioners/size.py
opencompass/partitioners/size.py
+46
-43
opencompass/runners/slurm_sequential.py
opencompass/runners/slurm_sequential.py
+17
-10
opencompass/tasks/openicl_eval.py
opencompass/tasks/openicl_eval.py
+9
-2
opencompass/utils/text_postprocessors.py
opencompass/utils/text_postprocessors.py
+8
-0
requirements/agent.txt
requirements/agent.txt
+7
-0
requirements/extra.txt
requirements/extra.txt
+0
-3
No files found.
opencompass/models/openai_api.py
View file @
e78857ac
...
@@ -58,6 +58,7 @@ class OpenAI(BaseAPIModel):
...
@@ -58,6 +58,7 @@ class OpenAI(BaseAPIModel):
path
:
str
=
'gpt-3.5-turbo'
,
path
:
str
=
'gpt-3.5-turbo'
,
max_seq_len
:
int
=
4096
,
max_seq_len
:
int
=
4096
,
query_per_second
:
int
=
1
,
query_per_second
:
int
=
1
,
rpm_verbose
:
bool
=
False
,
retry
:
int
=
2
,
retry
:
int
=
2
,
key
:
Union
[
str
,
List
[
str
]]
=
'ENV'
,
key
:
Union
[
str
,
List
[
str
]]
=
'ENV'
,
org
:
Optional
[
Union
[
str
,
List
[
str
]]]
=
None
,
org
:
Optional
[
Union
[
str
,
List
[
str
]]]
=
None
,
...
@@ -70,6 +71,7 @@ class OpenAI(BaseAPIModel):
...
@@ -70,6 +71,7 @@ class OpenAI(BaseAPIModel):
max_seq_len
=
max_seq_len
,
max_seq_len
=
max_seq_len
,
meta_template
=
meta_template
,
meta_template
=
meta_template
,
query_per_second
=
query_per_second
,
query_per_second
=
query_per_second
,
rpm_verbose
=
rpm_verbose
,
retry
=
retry
)
retry
=
retry
)
import
tiktoken
import
tiktoken
self
.
tiktoken
=
tiktoken
self
.
tiktoken
=
tiktoken
...
...
opencompass/openicl/icl_evaluator/__init__.py
View file @
e78857ac
...
@@ -5,5 +5,6 @@ from .icl_circular_evaluator import CircularEvaluator # noqa
...
@@ -5,5 +5,6 @@ from .icl_circular_evaluator import CircularEvaluator # noqa
from
.icl_em_evaluator
import
EMEvaluator
# noqa
from
.icl_em_evaluator
import
EMEvaluator
# noqa
from
.icl_hf_evaluator
import
*
# noqa
from
.icl_hf_evaluator
import
*
# noqa
from
.icl_jieba_rouge_evaluator
import
JiebaRougeEvaluator
# noqa
from
.icl_jieba_rouge_evaluator
import
JiebaRougeEvaluator
# noqa
from
.icl_misc_evaluator
import
AveragePPLEvaluator
# noqa
from
.icl_toxic_evaluator
import
ToxicEvaluator
# noqa
from
.icl_toxic_evaluator
import
ToxicEvaluator
# noqa
from
.lm_evaluator
import
LMEvaluator
# noqa
from
.lm_evaluator
import
LMEvaluator
# noqa
opencompass/openicl/icl_evaluator/icl_misc_evaluator.py
0 → 100644
View file @
e78857ac
from
opencompass.registry
import
ICL_EVALUATORS
from
.icl_base_evaluator
import
BaseEvaluator
@
ICL_EVALUATORS
.
register_module
()
class
AveragePPLEvaluator
(
BaseEvaluator
):
def
score
(
self
,
ppl
):
average_ppl
=
sum
(
ppl
)
/
len
(
ppl
)
return
{
'average_ppl'
:
average_ppl
}
opencompass/openicl/icl_inferencer/__init__.py
View file @
e78857ac
...
@@ -4,6 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa
...
@@ -4,6 +4,8 @@ from .icl_base_inferencer import BaseInferencer # noqa
from
.icl_chat_inferencer
import
ChatInferencer
# noqa
from
.icl_chat_inferencer
import
ChatInferencer
# noqa
from
.icl_clp_inferencer
import
CLPInferencer
# noqa
from
.icl_clp_inferencer
import
CLPInferencer
# noqa
from
.icl_gen_inferencer
import
GenInferencer
# noqa
from
.icl_gen_inferencer
import
GenInferencer
# noqa
from
.icl_loglikelihood_inferencer
import
LoglikelihoodInferencer
# noqa
from
.icl_ppl_inferencer
import
PPLInferencer
# noqa
from
.icl_ppl_inferencer
import
PPLInferencer
# noqa
from
.icl_ppl_only_inferencer
import
PPLOnlyInferencer
# noqa
from
.icl_sc_inferencer
import
SCInferencer
# noqa
from
.icl_sc_inferencer
import
SCInferencer
# noqa
from
.icl_tot_inferencer
import
ToTInferencer
# noqa
from
.icl_tot_inferencer
import
ToTInferencer
# noqa
opencompass/openicl/icl_inferencer/icl_agent_inferencer.py
View file @
e78857ac
...
@@ -89,7 +89,7 @@ class AgentInferencer(ChatInferencer):
...
@@ -89,7 +89,7 @@ class AgentInferencer(ChatInferencer):
user_idx
=
assistant_indices
[
-
1
]
-
1
user_idx
=
assistant_indices
[
-
1
]
-
1
self
.
model
.
set_history
(
chat
[:
user_idx
])
self
.
model
.
set_history
(
chat
[:
user_idx
])
answer
,
steps
=
self
.
model
.
chat
(
chat
[
user_idx
][
'content'
])
answer
,
steps
,
_
=
self
.
model
.
chat
(
chat
[
user_idx
][
'content'
])
output_handler
.
save_results
(
output_handler
.
save_results
(
origin_prompt
=
chat
[
user_idx
][
'content'
],
origin_prompt
=
chat
[
user_idx
][
'content'
],
prediction
=
answer
,
prediction
=
answer
,
...
@@ -104,10 +104,11 @@ class AgentInferencer(ChatInferencer):
...
@@ -104,10 +104,11 @@ class AgentInferencer(ChatInferencer):
i
for
i
,
item
in
enumerate
(
chat
)
if
item
[
'role'
]
==
'assistant'
i
for
i
,
item
in
enumerate
(
chat
)
if
item
[
'role'
]
==
'assistant'
]
]
self
.
model
.
set_history
(
chat
[:
assistant_indices
[
0
]
-
1
])
history
=
chat
[:
assistant_indices
[
0
]
-
1
]
for
i
in
assistant_indices
:
for
i
in
assistant_indices
:
answer
,
steps
=
self
.
model
.
chat
(
chat
[
i
-
1
][
'content'
])
answer
,
steps
,
inner_steps
=
self
.
model
.
chat
(
chat
[
i
-
1
][
'content'
],
history
)
history
+=
inner_steps
output_handler
.
save_multiround_results
(
output_handler
.
save_multiround_results
(
origin_prompt
=
chat
[
i
-
1
][
'content'
],
origin_prompt
=
chat
[
i
-
1
][
'content'
],
prediction
=
answer
,
prediction
=
answer
,
...
@@ -125,7 +126,7 @@ class AgentInferencer(ChatInferencer):
...
@@ -125,7 +126,7 @@ class AgentInferencer(ChatInferencer):
for
i
in
assistant_indices
:
for
i
in
assistant_indices
:
self
.
model
.
set_history
(
chat
[:
i
-
1
])
self
.
model
.
set_history
(
chat
[:
i
-
1
])
answer
,
steps
=
self
.
model
.
chat
(
chat
[
i
-
1
][
'content'
])
answer
,
steps
,
_
=
self
.
model
.
chat
(
chat
[
i
-
1
][
'content'
])
output_handler
.
save_multiround_results
(
output_handler
.
save_multiround_results
(
origin_prompt
=
chat
[
i
-
1
][
'content'
],
origin_prompt
=
chat
[
i
-
1
][
'content'
],
prediction
=
answer
,
prediction
=
answer
,
...
...
opencompass/openicl/icl_inferencer/icl_chat_inferencer.py
View file @
e78857ac
...
@@ -68,11 +68,11 @@ class LMTemplateParser:
...
@@ -68,11 +68,11 @@ class LMTemplateParser:
prompt
=
''
prompt
=
''
if
self
.
roles
:
if
self
.
roles
:
for
dialog
in
chat
:
for
dialog
in
chat
:
role_cfg
=
self
.
roles
.
get
(
dialog
[
'role'
])
role_cfg
=
self
.
roles
.
get
(
dialog
[
'role'
]
,
{}
)
prompt
+=
role_cfg
[
'begin'
]
prompt
+=
(
role_cfg
.
get
(
'begin'
)
or
''
)
prompt
+=
(
dialog
.
get
(
'content'
)
or
''
)
prompt
+=
(
dialog
.
get
(
'content'
)
or
''
)
prompt
+=
role_cfg
[
'end'
]
prompt
+=
(
role_cfg
.
get
(
'end'
)
or
''
)
prompt
+=
self
.
roles
[
'assistant'
]
[
'begin'
]
prompt
+=
(
self
.
roles
[
'assistant'
]
.
get
(
'begin'
)
or
''
)
else
:
else
:
# in case the model does not have any meta template
# in case the model does not have any meta template
last_sep
=
''
last_sep
=
''
...
@@ -227,7 +227,11 @@ class ChatInferencer(BaseInferencer):
...
@@ -227,7 +227,11 @@ class ChatInferencer(BaseInferencer):
'tmp_'
+
output_json_filename
)
'tmp_'
+
output_json_filename
)
if
osp
.
exists
(
tmp_json_filepath
):
if
osp
.
exists
(
tmp_json_filepath
):
# TODO: move resume to output handler
# TODO: move resume to output handler
try
:
tmp_result_dict
=
mmengine
.
load
(
tmp_json_filepath
)
tmp_result_dict
=
mmengine
.
load
(
tmp_json_filepath
)
except
Exception
:
pass
else
:
output_handler
.
results_dict
=
tmp_result_dict
output_handler
.
results_dict
=
tmp_result_dict
index
=
len
(
tmp_result_dict
)
index
=
len
(
tmp_result_dict
)
...
...
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
View file @
e78857ac
"""Direct Generation Inferencer."""
"""Direct Generation Inferencer."""
import
inspect
import
os
import
os
import
os.path
as
osp
import
os.path
as
osp
from
typing
import
List
,
Optional
from
typing
import
List
,
Optional
...
@@ -46,6 +47,7 @@ class GenInferencer(BaseInferencer):
...
@@ -46,6 +47,7 @@ class GenInferencer(BaseInferencer):
self
,
self
,
model
:
BaseModel
,
model
:
BaseModel
,
max_out_len
:
int
,
max_out_len
:
int
,
stopping_criteria
:
List
[
str
]
=
[],
max_seq_len
:
Optional
[
int
]
=
None
,
max_seq_len
:
Optional
[
int
]
=
None
,
batch_size
:
Optional
[
int
]
=
1
,
batch_size
:
Optional
[
int
]
=
1
,
gen_field_replace_token
:
Optional
[
str
]
=
''
,
gen_field_replace_token
:
Optional
[
str
]
=
''
,
...
@@ -64,6 +66,7 @@ class GenInferencer(BaseInferencer):
...
@@ -64,6 +66,7 @@ class GenInferencer(BaseInferencer):
self
.
gen_field_replace_token
=
gen_field_replace_token
self
.
gen_field_replace_token
=
gen_field_replace_token
self
.
max_out_len
=
max_out_len
self
.
max_out_len
=
max_out_len
self
.
stopping_criteria
=
stopping_criteria
if
self
.
model
.
is_api
and
save_every
is
None
:
if
self
.
model
.
is_api
and
save_every
is
None
:
save_every
=
1
save_every
=
1
...
@@ -128,10 +131,14 @@ class GenInferencer(BaseInferencer):
...
@@ -128,10 +131,14 @@ class GenInferencer(BaseInferencer):
entry
=
datum
entry
=
datum
golds
=
[
None
for
_
in
range
(
len
(
entry
))]
golds
=
[
None
for
_
in
range
(
len
(
entry
))]
# 5-1. Inference with local model
# 5-1. Inference with local model
extra_gen_kwargs
=
{}
sig
=
inspect
.
signature
(
self
.
model
.
generate
)
if
'stopping_criteria'
in
sig
.
parameters
:
extra_gen_kwargs
[
'stopping_criteria'
]
=
self
.
stopping_criteria
with
torch
.
no_grad
():
with
torch
.
no_grad
():
parsed_entries
=
self
.
model
.
parse_template
(
entry
,
mode
=
'gen'
)
parsed_entries
=
self
.
model
.
parse_template
(
entry
,
mode
=
'gen'
)
results
=
self
.
model
.
generate_from_template
(
results
=
self
.
model
.
generate_from_template
(
entry
,
max_out_len
=
self
.
max_out_len
)
entry
,
max_out_len
=
self
.
max_out_len
,
**
extra_gen_kwargs
)
generated
=
results
generated
=
results
num_return_sequences
=
getattr
(
self
.
model
,
'generation_kwargs'
,
num_return_sequences
=
getattr
(
self
.
model
,
'generation_kwargs'
,
...
...
opencompass/openicl/icl_inferencer/icl_loglikelihood_inferencer.py
0 → 100644
View file @
e78857ac
"""PPL Inferencer."""
import
os
from
typing
import
List
,
Optional
import
torch
from
tqdm
import
trange
from
opencompass.models.base
import
BaseModel
from
opencompass.registry
import
ICL_INFERENCERS
from
..icl_prompt_template
import
PromptTemplate
from
..icl_retriever
import
BaseRetriever
from
..utils
import
get_logger
from
.icl_base_inferencer
import
BaseInferencer
,
dump_results_dict
logger
=
get_logger
(
__name__
)
@
ICL_INFERENCERS
.
register_module
()
class
LoglikelihoodInferencer
(
BaseInferencer
):
"""Loglikelihood Inferencer class to evaluate by loglikelihood.
Attributes:
model (:obj:`BaseModel`, optional): The module to inference.
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
the LM.
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
output_json_filepath (:obj:`str`, optional): File path for output
`JSON` file.
output_json_filename (:obj:`str`, optional): File name for output
`JSON` file.
labels (:obj:`List`, optional): A list of labels for all classes.
"""
def
__init__
(
self
,
model
:
BaseModel
,
max_seq_len
:
Optional
[
int
]
=
None
,
batch_size
:
Optional
[
int
]
=
1
,
output_json_filepath
:
Optional
[
str
]
=
'./icl_inference_output'
,
output_json_filename
:
Optional
[
str
]
=
'predictions'
,
labels
:
Optional
[
List
]
=
None
,
**
kwargs
)
->
None
:
super
().
__init__
(
model
=
model
,
max_seq_len
=
max_seq_len
,
batch_size
=
batch_size
,
output_json_filename
=
output_json_filename
,
output_json_filepath
=
output_json_filepath
,
**
kwargs
,
)
self
.
labels
=
labels
def
inference
(
self
,
retriever
:
BaseRetriever
,
ice_template
:
Optional
[
PromptTemplate
]
=
None
,
prompt_template
:
Optional
[
PromptTemplate
]
=
None
,
output_json_filepath
:
Optional
[
str
]
=
None
,
output_json_filename
:
Optional
[
str
]
=
None
)
->
List
:
# 1. Preparation for output logs
output_handler
=
LoglikelihoodInferencerOutputHandler
()
sub_predictions
=
[]
ppl
=
[]
ice
=
[]
if
output_json_filepath
is
None
:
output_json_filepath
=
self
.
output_json_filepath
if
output_json_filename
is
None
:
output_json_filename
=
self
.
output_json_filename
# 2. Get results of retrieval process
ice_idx_list
=
retriever
.
retrieve
()
# 3. Get labels of all the classes
if
self
.
labels
is
None
:
labels
=
retriever
.
get_labels
(
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
else
:
labels
=
self
.
labels
# 4. Generate in-context examples for testing inputs
for
idx
in
range
(
len
(
ice_idx_list
)):
ice
.
append
(
retriever
.
generate_ice
(
ice_idx_list
[
idx
],
ice_template
=
ice_template
))
output_handler
.
save_ice
(
self
.
model
.
parse_template
(
ice
,
mode
=
'ppl'
))
# 5. Calculating loglikelihood for prompts in each label's class
for
label
in
labels
:
index
=
0
prompt_list
=
[]
sub_ppl_list
=
[]
token_num_list
=
[]
cont_list
=
[]
# 5.1 Generate prompts of current label and truncate
# TODO: Refactor
for
idx
in
range
(
len
(
ice_idx_list
)):
prompt
=
retriever
.
generate_label_prompt
(
idx
,
ice
[
idx
],
label
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
if
self
.
max_seq_len
is
not
None
:
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'ppl'
)
while
len
(
ice_idx_list
[
idx
]
)
>
0
and
prompt_token_num
>
self
.
max_seq_len
:
ice_idx_list
[
idx
]
=
ice_idx_list
[
idx
][:
-
1
]
ice
[
idx
]
=
retriever
.
generate_ice
(
ice_idx_list
[
idx
],
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_label_prompt
(
idx
,
ice
[
idx
],
label
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
# noqa
prompt
,
mode
=
'ppl'
)
# noqa
prompt_list
.
append
(
prompt
)
token_num_list
.
append
(
prompt_token_num
)
cont_list
.
append
(
retriever
.
test_ds
[
idx
][
'cont'
])
# 5.2 Get PPL
logger
.
info
(
f
"Calculating PPL for prompts labeled '
{
label
}
'"
)
for
idx
in
trange
(
0
,
len
(
prompt_list
),
self
.
batch_size
,
disable
=
not
self
.
is_main_process
):
sub_prompt_list
=
prompt_list
[
idx
:
idx
+
self
.
batch_size
]
sub_cont_list
=
cont_list
[
idx
:
idx
+
self
.
batch_size
]
with
torch
.
no_grad
():
# mainly modify compared to PPLInferencer
sub_res
=
self
.
model
.
get_loglikelihood_from_template
(
sub_prompt_list
,
sub_cont_list
).
tolist
()
for
res
,
prompt
in
zip
(
sub_res
,
self
.
model
.
parse_template
(
sub_prompt_list
,
mode
=
'ppl'
)):
sub_ppl_list
.
append
(
res
)
ice_str
=
self
.
model
.
parse_template
(
ice
[
idx
],
mode
=
'ppl'
)
output_handler
.
save_prompt_and_loglikelihood
(
label
,
prompt
.
replace
(
ice_str
,
''
),
prompt
,
res
,
index
)
index
=
index
+
1
ppl
.
append
(
sub_ppl_list
)
# 6. Get lowest PPL class as predictions
ppl
=
list
(
zip
(
*
ppl
))
for
single_ppl
in
ppl
:
sub_predictions
.
append
(
labels
[
single_ppl
.
index
(
max
(
single_ppl
))])
output_handler
.
save_predictions
(
sub_predictions
)
# 7. Fetch gold answers if exist
ds_reader
=
retriever
.
dataset_reader
if
ds_reader
.
output_column
:
golds
=
ds_reader
.
dataset
[
'test'
][
ds_reader
.
output_column
]
output_handler
.
save_golds
(
golds
)
# 8. Output
if
self
.
is_main_process
:
os
.
makedirs
(
output_json_filepath
,
exist_ok
=
True
)
output_handler
.
write_to_json
(
output_json_filepath
,
output_json_filename
)
return
[
sample
[
'prediction'
]
for
sample
in
output_handler
.
results_dict
.
values
()
]
class
LoglikelihoodInferencerOutputHandler
:
results_dict
=
{}
def
__init__
(
self
)
->
None
:
self
.
results_dict
=
{}
def
write_to_json
(
self
,
save_dir
:
str
,
filename
:
str
):
"""Dump the result to a json file."""
dump_results_dict
(
self
.
results_dict
,
os
.
path
.
join
(
save_dir
,
filename
))
def
save_ice
(
self
,
ice
):
for
idx
,
example
in
enumerate
(
ice
):
if
str
(
idx
)
not
in
self
.
results_dict
.
keys
():
self
.
results_dict
[
str
(
idx
)]
=
{}
self
.
results_dict
[
str
(
idx
)][
'in-context examples'
]
=
example
def
save_predictions
(
self
,
predictions
):
for
idx
,
prediction
in
enumerate
(
predictions
):
if
str
(
idx
)
not
in
self
.
results_dict
.
keys
():
self
.
results_dict
[
str
(
idx
)]
=
{}
self
.
results_dict
[
str
(
idx
)][
'prediction'
]
=
prediction
def
save_prompt_and_loglikelihood
(
self
,
label
,
input
,
prompt
,
loglikelihood
,
idx
):
if
str
(
idx
)
not
in
self
.
results_dict
.
keys
():
self
.
results_dict
[
str
(
idx
)]
=
{}
if
'label: '
+
str
(
label
)
not
in
self
.
results_dict
[
str
(
idx
)].
keys
():
self
.
results_dict
[
str
(
idx
)][
'label: '
+
str
(
label
)]
=
{}
self
.
results_dict
[
str
(
idx
)][
'label: '
+
str
(
label
)][
'testing input'
]
=
input
self
.
results_dict
[
str
(
idx
)][
'label: '
+
str
(
label
)][
'prompt'
]
=
prompt
self
.
results_dict
[
str
(
idx
)][
'label: '
+
str
(
label
)][
'Loglikelihood'
]
=
loglikelihood
def
save_golds
(
self
,
golds
):
for
idx
,
gold
in
enumerate
(
golds
):
if
str
(
idx
)
not
in
self
.
results_dict
.
keys
():
self
.
results_dict
[
str
(
idx
)]
=
{}
self
.
results_dict
[
str
(
idx
)][
'gold'
]
=
gold
opencompass/openicl/icl_inferencer/icl_ppl_only_inferencer.py
0 → 100644
View file @
e78857ac
"""PPL Inferencer."""
import
os
from
typing
import
List
,
Optional
import
mmengine
import
torch
from
tqdm
import
tqdm
from
opencompass.models.base
import
BaseModel
from
opencompass.registry
import
ICL_INFERENCERS
from
..icl_prompt_template
import
PromptTemplate
from
..icl_retriever
import
BaseRetriever
from
..utils
import
get_logger
from
.icl_base_inferencer
import
BaseInferencer
,
dump_results_dict
logger
=
get_logger
(
__name__
)
@
ICL_INFERENCERS
.
register_module
()
class
PPLOnlyInferencer
(
BaseInferencer
):
"""PPLOnlyInferencer class to calculate PPL and PPL only, no choice is
made. This Inferencer is usually used along with AveragePPLEvaluator.
Attributes:
model (:obj:`BaseModel`, optional): The module to inference.
max_seq_len (:obj:`int`): Maximum number of tokenized words allowed by
the LM.
batch_size (:obj:`int`, optional): Batch size for the :obj:`DataLoader`
output_json_filepath (:obj:`str`, optional): File path for output
`JSON` file.
output_json_filename (:obj:`str`, optional): File name for output
`JSON` file.
save_every (:obj:`int`, optional): Save intermediate results every
"""
def
__init__
(
self
,
model
:
BaseModel
,
max_seq_len
:
Optional
[
int
]
=
None
,
batch_size
:
Optional
[
int
]
=
1
,
output_json_filepath
:
Optional
[
str
]
=
'./icl_inference_output'
,
output_json_filename
:
Optional
[
str
]
=
'predictions'
,
save_every
:
Optional
[
int
]
=
1
,
**
kwargs
)
->
None
:
super
().
__init__
(
model
=
model
,
max_seq_len
=
max_seq_len
,
batch_size
=
batch_size
,
output_json_filename
=
output_json_filename
,
output_json_filepath
=
output_json_filepath
,
**
kwargs
,
)
self
.
save_every
=
save_every
def
inference
(
self
,
retriever
:
BaseRetriever
,
ice_template
:
Optional
[
PromptTemplate
]
=
None
,
prompt_template
:
Optional
[
PromptTemplate
]
=
None
,
output_json_filepath
:
Optional
[
str
]
=
None
,
output_json_filename
:
Optional
[
str
]
=
None
)
->
List
:
# 1. Preparation for output logs
output_handler
=
PPLOnlyInferencerOutputHandler
()
if
output_json_filepath
is
None
:
output_json_filepath
=
self
.
output_json_filepath
if
output_json_filename
is
None
:
output_json_filename
=
self
.
output_json_filename
# 2. Get results of retrieval process
ice_idx_list
=
retriever
.
retrieve
()
# 3. Generate prompts for testing input
prompt_list
=
self
.
get_generation_prompt_list_from_retriever_indices
(
ice_idx_list
,
retriever
,
max_seq_len
=
self
.
max_seq_len
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
# 3.1 Fetch and zip prompt & gold answer if output column exists
ds_reader
=
retriever
.
dataset_reader
assert
ds_reader
.
output_column
is
None
,
(
'PPLOnlyInferencer supports `output_column=None` only.'
)
# Create tmp json file for saving intermediate results and future
# resuming
index
=
0
tmp_json_filepath
=
os
.
path
.
join
(
output_json_filepath
,
'tmp_'
+
output_json_filename
)
if
os
.
path
.
exists
(
tmp_json_filepath
):
# TODO: move resume to output handler
try
:
tmp_result_dict
=
mmengine
.
load
(
tmp_json_filepath
)
except
Exception
:
pass
else
:
output_handler
.
results_dict
=
tmp_result_dict
index
=
len
(
tmp_result_dict
)
# 4. Wrap prompts with Dataloader
dataloader
=
self
.
get_dataloader
(
prompt_list
[
index
:],
self
.
batch_size
)
# 5. Inference for prompts in each batch
logger
.
info
(
'Starting inference process...'
)
for
datum
in
tqdm
(
dataloader
,
disable
=
not
self
.
is_main_process
):
entry
=
datum
# 5-1. Inference with local model
with
torch
.
no_grad
():
ppls
=
self
.
model
.
get_ppl_from_template
(
entry
).
tolist
()
parsed_entries
=
self
.
model
.
parse_template
(
entry
,
mode
=
'gen'
)
# 5-3. Save current output
for
prompt
,
ppl
,
in
zip
(
parsed_entries
,
ppls
):
output_handler
.
save_results
(
prompt
,
ppl
,
index
)
index
=
index
+
1
# 5-4. Save intermediate results
if
(
self
.
save_every
is
not
None
and
index
%
self
.
save_every
==
0
and
self
.
is_main_process
):
output_handler
.
write_to_json
(
output_json_filepath
,
'tmp_'
+
output_json_filename
)
# 6. Output
if
self
.
is_main_process
:
os
.
makedirs
(
output_json_filepath
,
exist_ok
=
True
)
output_handler
.
write_to_json
(
output_json_filepath
,
output_json_filename
)
if
os
.
path
.
exists
(
tmp_json_filepath
):
os
.
remove
(
tmp_json_filepath
)
return
[
sample
[
'ppl'
]
for
sample
in
output_handler
.
results_dict
.
values
()
]
def
get_generation_prompt_list_from_retriever_indices
(
self
,
ice_idx_list
:
List
[
List
[
int
]],
retriever
:
BaseRetriever
,
max_seq_len
:
Optional
[
int
]
=
None
,
ice_template
:
Optional
[
PromptTemplate
]
=
None
,
prompt_template
:
Optional
[
PromptTemplate
]
=
None
):
prompt_list
=
[]
for
idx
,
ice_idx
in
enumerate
(
ice_idx_list
):
ice
=
retriever
.
generate_ice
(
ice_idx
,
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_prompt_for_generate_task
(
idx
,
ice
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
if
max_seq_len
is
not
None
:
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'gen'
)
while
len
(
ice_idx
)
>
0
and
prompt_token_num
>
max_seq_len
:
ice_idx
=
ice_idx
[:
-
1
]
ice
=
retriever
.
generate_ice
(
ice_idx
,
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_prompt_for_generate_task
(
idx
,
ice
,
ice_template
=
ice_template
,
prompt_template
=
prompt_template
)
prompt_token_num
=
self
.
model
.
get_token_len_from_template
(
prompt
,
mode
=
'gen'
)
prompt_list
.
append
(
prompt
)
return
prompt_list
class
PPLOnlyInferencerOutputHandler
:
origin_prompt_dict
=
{}
output_dict
=
{}
results_dict
=
{}
def
__init__
(
self
)
->
None
:
self
.
results_dict
=
{}
def
write_to_json
(
self
,
save_dir
:
str
,
filename
:
str
):
"""Dump the result to a json file."""
dump_results_dict
(
self
.
results_dict
,
os
.
path
.
join
(
save_dir
,
filename
))
def
save_results
(
self
,
origin_prompt
,
ppl
,
idx
):
self
.
results_dict
[
str
(
idx
)]
=
{
'origin_prompt'
:
origin_prompt
,
'ppl'
:
ppl
,
}
opencompass/partitioners/base.py
View file @
e78857ac
import
inspect
from
abc
import
abstractmethod
from
abc
import
abstractmethod
from
copy
import
deepcopy
from
copy
import
deepcopy
from
typing
import
Dict
,
List
,
Optional
from
typing
import
Dict
,
List
,
Optional
from
mmengine.config
import
ConfigDict
from
mmengine.config
import
ConfigDict
from
opencompass.utils
import
get_logger
,
task_abbr_from_cfg
from
opencompass.utils
import
(
dataset_abbr_from_cfg
,
get_logger
,
model_abbr_from_cfg
,
task_abbr_from_cfg
)
class
BasePartitioner
:
class
BasePartitioner
:
...
@@ -54,8 +56,7 @@ class BasePartitioner:
...
@@ -54,8 +56,7 @@ class BasePartitioner:
List[Dict]: A list of tasks.
List[Dict]: A list of tasks.
"""
"""
cfg
=
deepcopy
(
cfg
)
cfg
=
deepcopy
(
cfg
)
models
=
cfg
[
'models'
]
datasets
=
cfg
[
'datasets'
]
work_dir
=
cfg
[
'work_dir'
]
work_dir
=
cfg
[
'work_dir'
]
add_cfg
=
{}
add_cfg
=
{}
...
@@ -74,10 +75,11 @@ class BasePartitioner:
...
@@ -74,10 +75,11 @@ class BasePartitioner:
self
.
logger
.
debug
(
f
'Key
{
k
}
not found in config, ignored.'
)
self
.
logger
.
debug
(
f
'Key
{
k
}
not found in config, ignored.'
)
self
.
logger
.
debug
(
f
'Additional config:
{
add_cfg
}
'
)
self
.
logger
.
debug
(
f
'Additional config:
{
add_cfg
}
'
)
tasks
=
self
.
partition
(
models
,
model_and_dataset_args
=
self
.
parse_model_dataset_args
(
cfg
)
datasets
,
work_dir
,
tasks
=
self
.
partition
(
**
model_and_dataset_args
,
self
.
out_dir
,
work_dir
=
work_dir
,
out_dir
=
self
.
out_dir
,
add_cfg
=
add_cfg
)
add_cfg
=
add_cfg
)
self
.
logger
.
info
(
f
'Partitioned into
{
len
(
tasks
)
}
tasks.'
)
self
.
logger
.
info
(
f
'Partitioned into
{
len
(
tasks
)
}
tasks.'
)
...
@@ -86,6 +88,41 @@ class BasePartitioner:
...
@@ -86,6 +88,41 @@ class BasePartitioner:
return
tasks
return
tasks
def
parse_model_dataset_args
(
self
,
cfg
:
ConfigDict
):
models
=
cfg
[
'models'
]
datasets
=
cfg
[
'datasets'
]
sig
=
inspect
.
signature
(
self
.
partition
)
if
'model_dataset_combinations'
in
sig
.
parameters
:
combs
=
cfg
.
get
(
'model_dataset_combinations'
,
None
)
if
combs
is
None
:
combs
=
[{
'models'
:
models
,
'datasets'
:
datasets
}]
else
:
# sanity check
model_abbrs
=
[
model_abbr_from_cfg
(
model
)
for
model
in
models
]
dataset_abbrs
=
[
dataset_abbr_from_cfg
(
dataset
)
for
dataset
in
datasets
]
for
comb
in
combs
:
for
model
in
comb
[
'models'
]:
if
model_abbr_from_cfg
(
model
)
not
in
model_abbrs
:
raise
ValueError
(
f
'Model
{
model_abbr_from_cfg
(
model
)
}
'
'not found in config.'
)
for
dataset
in
comb
[
'datasets'
]:
if
dataset_abbr_from_cfg
(
dataset
)
not
in
dataset_abbrs
:
raise
ValueError
(
f
'Dataset
{
dataset_abbr_from_cfg
(
dataset
)
}
'
'not found in config.'
)
used_kwargs
=
{
'model_dataset_combinations'
:
combs
}
else
:
if
cfg
.
get
(
'model_dataset_combinations'
,
None
)
is
not
None
:
self
.
logger
.
warning
(
'model_dataset_combinations is not supported by '
f
'
{
self
.
__class__
.
__name__
}
. Ignored.'
)
used_kwargs
=
{
'models'
:
models
,
'datasets'
:
datasets
}
return
used_kwargs
@
abstractmethod
@
abstractmethod
def
partition
(
self
,
def
partition
(
self
,
models
:
List
[
ConfigDict
],
models
:
List
[
ConfigDict
],
...
...
opencompass/partitioners/naive.py
View file @
e78857ac
...
@@ -29,8 +29,8 @@ class NaivePartitioner(BasePartitioner):
...
@@ -29,8 +29,8 @@ class NaivePartitioner(BasePartitioner):
self
.
n
=
n
self
.
n
=
n
def
partition
(
self
,
def
partition
(
self
,
model
s
:
List
[
ConfigDict
]
,
model
_dataset_combinations
:
List
[
Dict
[
str
,
datasets
:
List
[
ConfigDict
],
List
[
ConfigDict
]
]]
,
work_dir
:
str
,
work_dir
:
str
,
out_dir
:
str
,
out_dir
:
str
,
add_cfg
:
Dict
=
{})
->
List
[
Dict
]:
add_cfg
:
Dict
=
{})
->
List
[
Dict
]:
...
@@ -48,8 +48,9 @@ class NaivePartitioner(BasePartitioner):
...
@@ -48,8 +48,9 @@ class NaivePartitioner(BasePartitioner):
}
}
Args:
Args:
models (List[ConfigDict]): A list of model configs.
model_dataset_combinations (List[Dict]): List of
datasets (List[ConfigDict]): A list of dataset configs.
`{models: [...], datasets: [...]}` dicts. Each dict contains
a list of model configs and a list of dataset configs.
work_dir (str): The work dir for the task.
work_dir (str): The work dir for the task.
out_dir (str): The full output path for the task, intended for
out_dir (str): The full output path for the task, intended for
Partitioners to check whether the task is finished via the
Partitioners to check whether the task is finished via the
...
@@ -60,9 +61,10 @@ class NaivePartitioner(BasePartitioner):
...
@@ -60,9 +61,10 @@ class NaivePartitioner(BasePartitioner):
"""
"""
tasks
=
[]
tasks
=
[]
for
model
in
models
:
for
comb
in
model_dataset_combinations
:
for
model
in
comb
[
'models'
]:
chunks
=
[]
chunks
=
[]
for
dataset
in
datasets
:
for
dataset
in
comb
[
'
datasets
'
]
:
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
if
osp
.
exists
(
filename
):
if
osp
.
exists
(
filename
):
continue
continue
...
...
opencompass/partitioners/size.py
View file @
e78857ac
...
@@ -51,8 +51,8 @@ class SizePartitioner(BasePartitioner):
...
@@ -51,8 +51,8 @@ class SizePartitioner(BasePartitioner):
self
.
strategy
=
strategy
self
.
strategy
=
strategy
def
partition
(
self
,
def
partition
(
self
,
model
s
:
List
[
ConfigDict
]
,
model
_dataset_combinations
:
List
[
Dict
[
str
,
datasets
:
List
[
ConfigDict
],
List
[
ConfigDict
]
]]
,
work_dir
:
str
,
work_dir
:
str
,
out_dir
:
str
,
out_dir
:
str
,
add_cfg
:
Dict
=
{})
->
List
[
ConfigDict
]:
add_cfg
:
Dict
=
{})
->
List
[
ConfigDict
]:
...
@@ -71,8 +71,9 @@ class SizePartitioner(BasePartitioner):
...
@@ -71,8 +71,9 @@ class SizePartitioner(BasePartitioner):
}
}
Args:
Args:
models (List[ConfigDict]): A list of model configs.
model_dataset_combinations (List[Dict]): List of
datasets (List[ConfigDict]): A list of dataset configs.
`{models: [...], datasets: [...]}` dicts. Each dict contains
a list of model configs and a list of dataset configs.
work_dir (str): The work dir for the task.
work_dir (str): The work dir for the task.
out_dir (str): The full output path for the task, intended for
out_dir (str): The full output path for the task, intended for
Partitioners to check whether the task is finished via the
Partitioners to check whether the task is finished via the
...
@@ -84,13 +85,14 @@ class SizePartitioner(BasePartitioner):
...
@@ -84,13 +85,14 @@ class SizePartitioner(BasePartitioner):
List[ConfigDict]: A list of tasks.
List[ConfigDict]: A list of tasks.
"""
"""
datasets
=
sorted
(
datasets
,
tasks
=
[]
for
comb
in
model_dataset_combinations
:
comb
[
'datasets'
]
=
sorted
(
comb
[
'datasets'
],
key
=
lambda
x
:
self
.
get_cost
(
x
),
key
=
lambda
x
:
self
.
get_cost
(
x
),
reverse
=
True
)
reverse
=
True
)
tasks
=
[]
for
model
in
comb
[
'models'
]:
for
model
in
models
:
chunks
=
[]
# elements: tuple(size, dataset_chunk)
chunks
=
[]
# elements: tuple(size, dataset_chunk)
for
dataset
in
datasets
:
for
dataset
in
comb
[
'
datasets
'
]
:
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
filename
=
get_infer_output_path
(
model
,
dataset
,
out_dir
)
# skip the task if the task output exists
# skip the task if the task output exists
if
osp
.
exists
(
filename
):
if
osp
.
exists
(
filename
):
...
@@ -101,7 +103,8 @@ class SizePartitioner(BasePartitioner):
...
@@ -101,7 +103,8 @@ class SizePartitioner(BasePartitioner):
dataset_splits
=
self
.
split_dataset
(
dataset
)
dataset_splits
=
self
.
split_dataset
(
dataset
)
for
i
,
dataset_split
in
enumerate
(
dataset_splits
):
for
i
,
dataset_split
in
enumerate
(
dataset_splits
):
if
not
osp
.
exists
(
f
'
{
root
}
_
{
i
}{
ext
}
'
):
if
not
osp
.
exists
(
f
'
{
root
}
_
{
i
}{
ext
}
'
):
chunks
.
append
((
self
.
max_task_size
,
dataset_split
))
chunks
.
append
(
(
self
.
max_task_size
,
dataset_split
))
else
:
else
:
chunks
.
append
((
dataset_size
,
dataset
))
chunks
.
append
((
dataset_size
,
dataset
))
...
...
opencompass/runners/slurm_sequential.py
View file @
e78857ac
...
@@ -13,7 +13,7 @@ from mmengine.config import ConfigDict
...
@@ -13,7 +13,7 @@ from mmengine.config import ConfigDict
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
opencompass.registry
import
RUNNERS
,
TASKS
from
opencompass.registry
import
RUNNERS
,
TASKS
from
opencompass.utils
import
get_logger
from
opencompass.utils
import
batched
,
get_logger
from
.base
import
BaseRunner
from
.base
import
BaseRunner
...
@@ -131,7 +131,11 @@ class SlurmSequentialRunner(BaseRunner):
...
@@ -131,7 +131,11 @@ class SlurmSequentialRunner(BaseRunner):
break
break
parent_conn
.
close
()
parent_conn
.
close
()
for
job_id
in
tqdm
(
job_ids
,
desc
=
'clear sruns'
):
tbar
=
tqdm
(
total
=
len
(
job_ids
),
desc
=
'clear sruns'
)
for
batched_job_ids
in
batched
(
job_ids
,
4
):
ps
=
[]
for
job_id
in
batched_job_ids
:
tbar
.
update
()
if
job_id
is
None
:
if
job_id
is
None
:
continue
continue
cmd
=
f
'scancel
{
job_id
}
'
cmd
=
f
'scancel
{
job_id
}
'
...
@@ -139,7 +143,10 @@ class SlurmSequentialRunner(BaseRunner):
...
@@ -139,7 +143,10 @@ class SlurmSequentialRunner(BaseRunner):
shell
=
True
,
shell
=
True
,
stdout
=
subprocess
.
PIPE
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
STDOUT
)
stderr
=
subprocess
.
STDOUT
)
ps
.
append
(
p
)
for
p
in
ps
:
p
.
wait
()
p
.
wait
()
tbar
.
close
()
def
_launch
(
self
,
cfg
:
ConfigDict
,
child_conn
:
Pipe
=
None
):
def
_launch
(
self
,
cfg
:
ConfigDict
,
child_conn
:
Pipe
=
None
):
logger
=
get_logger
()
logger
=
get_logger
()
...
...
opencompass/tasks/openicl_eval.py
View file @
e78857ac
...
@@ -121,8 +121,9 @@ class OpenICLEvalTask(BaseTask):
...
@@ -121,8 +121,9 @@ class OpenICLEvalTask(BaseTask):
pred_dicts
=
copy
.
deepcopy
(
preds
)
pred_dicts
=
copy
.
deepcopy
(
preds
)
preds
=
{
k
:
[
pred
.
get
(
k
)
for
pred
in
preds
]
for
k
in
preds
[
0
]}
preds
=
{
k
:
[
pred
.
get
(
k
)
for
pred
in
preds
]
for
k
in
preds
[
0
]}
pred_strs
=
preds
.
pop
(
'prediction'
)
pred_strs
=
preds
.
pop
(
'prediction'
,
None
)
pred_list_flag
=
isinstance
(
pred_strs
[
0
],
list
)
pred_list_flag
=
pred_strs
is
not
None
and
isinstance
(
pred_strs
[
0
],
list
)
if
(
'pred_role'
in
self
.
eval_cfg
if
(
'pred_role'
in
self
.
eval_cfg
and
'meta_template'
in
self
.
model_cfg
and
'meta_template'
in
self
.
model_cfg
and
not
MODELS
.
get
(
self
.
model_cfg
[
'type'
]).
is_api
):
and
not
MODELS
.
get
(
self
.
model_cfg
[
'type'
]).
is_api
):
...
@@ -166,6 +167,12 @@ class OpenICLEvalTask(BaseTask):
...
@@ -166,6 +167,12 @@ class OpenICLEvalTask(BaseTask):
]
]
icl_evaluator
=
ICL_EVALUATORS
.
build
(
self
.
eval_cfg
[
'evaluator'
])
icl_evaluator
=
ICL_EVALUATORS
.
build
(
self
.
eval_cfg
[
'evaluator'
])
# need results dir to save other files
out_path
=
get_infer_output_path
(
self
.
model_cfg
,
self
.
dataset_cfg
,
osp
.
join
(
self
.
work_dir
,
'results'
))
icl_evaluator
.
_out_dir
=
osp
.
splitext
(
out_path
)[
0
]
# strip extension
preds
[
'predictions'
]
=
pred_strs
preds
[
'predictions'
]
=
pred_strs
preds
[
'references'
]
=
(
test_set
[
self
.
output_column
]
preds
[
'references'
]
=
(
test_set
[
self
.
output_column
]
...
...
opencompass/utils/text_postprocessors.py
View file @
e78857ac
...
@@ -49,6 +49,14 @@ def first_capital_postprocess(text: str) -> str:
...
@@ -49,6 +49,14 @@ def first_capital_postprocess(text: str) -> str:
return
''
return
''
@
TEXT_POSTPROCESSORS
.
register_module
(
'last-capital'
)
def
last_capital_postprocess
(
text
:
str
)
->
str
:
for
t
in
text
[::
-
1
]:
if
t
.
isupper
():
return
t
return
''
def
first_option_postprocess
(
text
:
str
,
options
:
str
)
->
str
:
def
first_option_postprocess
(
text
:
str
,
options
:
str
)
->
str
:
"""Find first valid option for text."""
"""Find first valid option for text."""
...
...
requirements/agent.txt
0 → 100644
View file @
e78857ac
json5
jupyter
jupyter_client
jupytext
lagent
scikit-image
sympy
requirements/extra.txt
View file @
e78857ac
faiss_gpu==1.7.2
faiss_gpu==1.7.2
jupyter
lagent
scikit-image
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment