Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
b4afe3e7
Unverified
Commit
b4afe3e7
authored
Jan 17, 2024
by
Fengzhe Zhou
Committed by
GitHub
Jan 17, 2024
Browse files
[Sync] Add InternLM2 Keyset Evaluation Demo (#807)
Co-authored-by:
zhangyifan1
<
zhangyifan1@pjlab.org.cn
>
parent
acae5609
Changes
54
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
181 additions
and
48 deletions
+181
-48
configs/summarizers/longbench.py
configs/summarizers/longbench.py
+3
-3
opencompass/datasets/circular.py
opencompass/datasets/circular.py
+20
-0
opencompass/datasets/ds1000.py
opencompass/datasets/ds1000.py
+2
-0
opencompass/datasets/humaneval.py
opencompass/datasets/humaneval.py
+10
-5
opencompass/datasets/humanevalx.py
opencompass/datasets/humanevalx.py
+31
-9
opencompass/datasets/mbpp.py
opencompass/datasets/mbpp.py
+36
-20
opencompass/datasets/siqa.py
opencompass/datasets/siqa.py
+34
-0
opencompass/lagent/actions/ipython_interpreter.py
opencompass/lagent/actions/ipython_interpreter.py
+9
-1
opencompass/models/huggingface.py
opencompass/models/huggingface.py
+18
-6
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
+6
-0
opencompass/runners/dlc.py
opencompass/runners/dlc.py
+1
-1
opencompass/tasks/openicl_infer.py
opencompass/tasks/openicl_infer.py
+3
-0
opencompass/utils/build.py
opencompass/utils/build.py
+1
-0
tools/prompt_viewer.py
tools/prompt_viewer.py
+7
-3
No files found.
configs/summarizers/longbench.py
View file @
b4afe3e7
summarizer
=
dict
(
dataset_abbrs
=
[
'--------- LongBench Single-Document QA ---------'
,
# category
"
LongBench_narrativeqa
"
,
'
LongBench_narrativeqa
'
,
'LongBench_qasper'
,
'LongBench_multifieldqa_en'
,
"
LongBench_multifieldqa_zh
"
,
'
LongBench_multifieldqa_zh
'
,
'--------- LongBench Multi-Document QA ---------'
,
# category
'LongBench_hotpotqa'
,
'LongBench_2wikimqa'
,
...
...
@@ -28,5 +28,5 @@ summarizer = dict(
'LongBench_lcc'
,
'LongBench_repobench-p'
,
],
summary_groups
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
"
_summary_groups
"
)],
[]),
summary_groups
=
sum
([
v
for
k
,
v
in
locals
().
items
()
if
k
.
endswith
(
'
_summary_groups
'
)],
[]),
)
opencompass/datasets/circular.py
View file @
b4afe3e7
...
...
@@ -13,7 +13,9 @@ from .commonsenseqa import commonsenseqaDataset
from
.hellaswag
import
hellaswagDataset_V2
from
.mmlu
import
MMLUDataset
from
.obqa
import
OBQADataset
from
.piqa
import
piqaDataset_V2
from
.race
import
RaceDataset
from
.siqa
import
siqaDataset_V3
from
.xiezhi
import
XiezhiDataset
...
...
@@ -273,6 +275,24 @@ class CircularXiezhiDataset(XiezhiDataset, metaclass=CircularDatasetMeta):
default_answer_key
=
'answer'
class
CircularsiqaDataset
(
siqaDataset_V3
,
metaclass
=
CircularDatasetMeta
):
dataset_class
=
siqaDataset_V3
default_circular_splits
=
[
'validation'
]
default_option_keys
=
[
'A'
,
'B'
,
'C'
]
default_answer_key
=
'answer'
class
CircularpiqaDataset
(
piqaDataset_V2
,
metaclass
=
CircularDatasetMeta
):
dataset_class
=
piqaDataset_V2
default_circular_splits
=
[
'validation'
]
default_option_keys
=
[
'sol1'
,
'sol2'
]
def
default_answer_key_switch_method
(
item
,
circular_pattern
):
circular_pattern
=
tuple
(
int
(
i
[
-
1
])
-
1
for
i
in
circular_pattern
)
item
[
'answer'
]
=
'AB'
[
circular_pattern
[
'AB'
.
index
(
item
[
'answer'
])]]
return
item
class
CircularEvaluator
(
BaseEvaluator
):
"""This Evaluator assesses datasets post-Circular processing, generating
the following evaluation metrics:
...
...
opencompass/datasets/ds1000.py
View file @
b4afe3e7
...
...
@@ -378,6 +378,8 @@ class DS1000ServiceEvaluator(BaseEvaluator):
processed_predictions
=
{}
assert
len
(
predictions
)
==
len
(
references
)
for
i
,
(
pred
,
gold
)
in
enumerate
(
zip
(
predictions
,
references
)):
if
len
(
pred
)
>
10000
:
pred
=
''
processed_predictions
[
str
(
i
)]
=
{
'prediction'
:
pred
,
'gold'
:
gold
}
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
...
...
opencompass/datasets/humaneval.py
View file @
b4afe3e7
...
...
@@ -155,6 +155,11 @@ def humaneval_postprocess(text: str) -> str:
def
humaneval_postprocess_v2
(
text
:
str
)
->
str
:
"""This is an advanced version of previous postprocess to handle more
situations, better to use this one."""
try
:
# for chatGLM raw text
text
=
eval
(
text
)
except
Exception
:
pass
text
=
text
.
lstrip
(
'
\n
'
)
if
'```'
in
text
:
blocks
=
re
.
findall
(
r
'```(.*?)```'
,
text
,
re
.
DOTALL
)
...
...
@@ -173,11 +178,11 @@ def humaneval_postprocess_v2(text: str) -> str:
text
=
text
.
lstrip
(
'
\n
'
)
if
text
.
strip
().
startswith
(
'def'
):
text
=
'
\n
'
.
join
(
text
.
split
(
'
\n
'
)[
1
:])
if
not
text
.
startswith
(
' '
):
if
text
.
startswith
(
' '
):
text
=
' '
+
text
.
lstrip
()
else
:
text
=
'
\n
'
.
join
([
' '
+
line
for
line
in
text
.
split
(
'
\n
'
)])
# deal with the indentation error
if
text
.
startswith
(
' '
):
text
=
' '
+
text
.
lstrip
()
else
:
text
=
'
\n
'
.
join
([
' '
+
line
for
line
in
text
.
split
(
'
\n
'
)])
text
=
text
.
split
(
'
\n
'
)
# If number of leading space reduces, we assume that the code block ends.
...
...
opencompass/datasets/humanevalx.py
View file @
b4afe3e7
...
...
@@ -14,6 +14,7 @@ from datasets import Dataset
from
opencompass.openicl.icl_evaluator
import
BaseEvaluator
from
.base
import
BaseDataset
from
.humaneval
import
humaneval_postprocess_v2
_LANGUAGE_NAME_DICT
=
{
'cpp'
:
'CPP'
,
...
...
@@ -89,9 +90,11 @@ class HumanevalXEvaluator(BaseEvaluator):
def
score
(
self
,
predictions
,
references
):
predictions
=
[{
'task_id'
:
f
'
{
_LANGUAGE_NAME_DICT
[
self
.
language
]
}
/
{
i
}
'
,
'generation'
:
_clean_up_code
(
pred
,
self
.
language
),
}
for
i
,
pred
in
enumerate
(
predictions
)]
'task_id'
:
f
'
{
_LANGUAGE_NAME_DICT
[
self
.
language
]
}
/
{
i
}
'
,
'generation'
:
_clean_up_code
(
pred
,
self
.
language
,
refer
),
}
for
i
,
(
pred
,
refer
)
in
enumerate
(
zip
(
predictions
,
references
))]
with
tempfile
.
TemporaryDirectory
()
as
tmp_dir
:
tmp_out_path
=
osp
.
join
(
tmp_dir
,
f
'humanevalx_
{
self
.
language
}
.json'
)
...
...
@@ -161,15 +164,28 @@ class HumanevalXEvaluator(BaseEvaluator):
return
False
,
err
def
_clean_up_code
(
text
:
str
,
language_type
:
str
)
->
str
:
def
_clean_up_code
(
text
:
str
,
language_type
:
str
,
reference
)
->
str
:
"""Cleans up the generated code."""
try
:
# for chatGLM related text
text
=
eval
(
text
)
except
Exception
:
pass
# extract code from code block
text
=
text
.
lstrip
(
'
\n
'
)
if
'```'
in
text
:
blocks
=
re
.
findall
(
r
'```(.*?)```'
,
text
,
re
.
DOTALL
)
if
len
(
blocks
)
==
0
:
text
=
text
.
split
(
'```'
)[
1
]
# fall back to default strategy
else
:
text
=
blocks
[
0
]
# fetch the first code block
if
not
text
.
startswith
(
'
\n
'
):
# in case starting with ```xxx
text
=
text
[
max
(
text
.
find
(
'
\n
'
)
+
1
,
0
):]
if
language_type
.
lower
()
==
'python'
:
text
=
humaneval_postprocess_v2
(
text
)
# we need to take care of the first line
# append extra space for first line for correct indentation
for
c_index
,
c
in
enumerate
(
text
[:
5
]):
if
c
!=
' '
:
text
=
' '
*
(
4
-
c_index
)
+
text
break
text
=
' '
+
text
.
lstrip
()
text_splits
=
text
.
split
(
'
\n
'
)
is_empty_line
=
False
...
...
@@ -189,7 +205,13 @@ def _clean_up_code(text: str, language_type: str) -> str:
for
w
in
end_words
:
if
w
in
text
:
text
=
text
[:
text
.
rfind
(
w
)]
elif
language_type
.
lower
()
==
'java'
:
# strip function head for all other language
func_name
=
reference
.
strip
().
split
(
'
\n
'
)[
-
1
]
if
func_name
:
func_name
=
func_name
.
strip
().
strip
(
'{'
)
if
func_name
in
text
:
text
=
'
\n
'
.
join
(
text
[
text
.
find
(
func_name
):].
split
(
'
\n
'
)[
1
:])
if
language_type
.
lower
()
==
'java'
:
main_pos
=
text
.
find
(
'public static void main'
)
if
main_pos
!=
-
1
:
text
=
text
[:
main_pos
]
+
'}'
...
...
opencompass/datasets/mbpp.py
View file @
b4afe3e7
...
...
@@ -200,30 +200,28 @@ class MBPPEvaluator(BaseEvaluator):
def
score
(
self
,
predictions
,
references
):
assert
len
(
predictions
)
==
len
(
references
)
predictions
=
[
self
.
_process_answer
(
pred
)
for
pred
in
predictions
]
if
self
.
metric
==
'MBPP'
:
result
=
{
'pass'
:
0
,
'timeout'
:
0
,
'failed'
:
0
,
'wrong_answer'
:
0
}
details
=
{}
for
index
,
(
test_case
,
pred
)
in
enumerate
(
zip
(
references
,
predictions
)):
programs
=
self
.
_process_test
(
test_case
,
pred
)
try
:
# Add exec globals to prevent the exec to raise
# unnecessary NameError for correct answer
exec_globals
=
{}
with
swallow_io
():
with
time_limit
(
2
):
exec
(
programs
,
exec_globals
)
r
=
'pass'
except
TimeOutException
:
r
=
'timeout'
except
AssertionError
:
r
=
'wrong_answer'
except
BaseException
:
r
=
'failed'
result
[
r
]
+=
1
details
[
str
(
index
)]
=
{
'programs'
:
programs
,
'result'
:
r
}
# change to thread pool for better killing blocked instance
with
ThreadPoolExecutor
()
as
executor
:
futures
=
[]
for
i
,
(
refer
,
pred
)
in
enumerate
(
zip
(
references
,
predictions
)):
pred
=
self
.
_process_answer
(
pred
)
programs
=
self
.
_process_test
(
refer
,
pred
)
future
=
executor
.
submit
(
execution
,
programs
,
i
,
3
)
futures
.
append
(
future
)
from
tqdm
import
tqdm
for
future
in
tqdm
(
as_completed
(
futures
),
total
=
len
(
futures
)):
index
,
key
=
future
.
result
()
result
[
key
]
+=
1
details
[
str
(
index
)]
=
{
'programs'
:
predictions
[
index
],
'result'
:
key
}
result
[
'score'
]
=
result
[
'pass'
]
/
len
(
predictions
)
*
100
result
[
'details'
]
=
details
...
...
@@ -263,6 +261,20 @@ class MBPPEvaluator(BaseEvaluator):
return
{
f
'mbpp_plus_
{
k
}
'
:
score
[
k
]
*
100
for
k
in
score
}
def
_process_answer
(
self
,
text
):
try
:
# for chatGLM related text
text
=
eval
(
text
)
except
Exception
:
pass
# deal with code block
if
'```'
in
text
:
blocks
=
re
.
findall
(
r
'```(.*?)```'
,
text
,
re
.
DOTALL
)
if
len
(
blocks
)
==
0
:
text
=
text
.
split
(
'```'
)[
1
]
# fall back to default strategy
else
:
text
=
blocks
[
0
]
# fetch the first code block
if
not
text
.
startswith
(
'
\n
'
):
# in case starting with ```xxx
text
=
text
[
max
(
text
.
find
(
'
\n
'
)
+
1
,
0
):]
text
=
text
.
strip
()
match
=
re
.
search
(
r
"('\s*|)(\[DONE\]|DONE)"
,
text
)
if
match
:
...
...
@@ -275,6 +287,10 @@ class MBPPEvaluator(BaseEvaluator):
text
=
text
[
1
:]
if
text
.
endswith
(
"'"
):
text
=
text
[:
-
1
]
text
=
text
.
replace
(
'
\\
'
,
''
)
match
=
re
.
search
(
r
'```python(.*)```'
,
text
,
re
.
DOTALL
)
if
match
:
text
=
match
.
group
(
1
).
strip
().
split
(
'```'
)[
0
].
strip
()
return
text
def
_process_test
(
self
,
test_case
,
pred
):
...
...
opencompass/datasets/siqa.py
View file @
b4afe3e7
...
...
@@ -78,3 +78,37 @@ class siqaDataset_V2(BaseDataset):
val_dataset
=
siqaDataset_V2
.
load_single
(
path
,
'dev.jsonl'
,
'dev-labels.lst'
)
return
DatasetDict
({
'train'
:
train_dataset
,
'validation'
:
val_dataset
})
@
LOAD_DATASET
.
register_module
()
class
siqaDataset_V3
(
BaseDataset
):
"""Disconnect from HuggingFace version of HFDataset."""
@
staticmethod
def
load_single
(
path
,
data_filename
,
label_filename
):
data_path
=
os
.
path
.
join
(
path
,
data_filename
)
label_path
=
os
.
path
.
join
(
path
,
label_filename
)
dataset
=
[]
with
open
(
data_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
data_lines
=
f
.
readlines
()
with
open
(
label_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
label_lines
=
f
.
readlines
()
assert
len
(
data_lines
)
==
len
(
label_lines
)
for
data
,
label
in
zip
(
data_lines
,
label_lines
):
i
=
json
.
loads
(
data
.
strip
())
i
[
'A'
]
=
i
.
pop
(
'answerA'
)
i
[
'B'
]
=
i
.
pop
(
'answerB'
)
i
[
'C'
]
=
i
.
pop
(
'answerC'
)
i
[
'answer'
]
=
'ABC'
[
int
(
label
.
strip
())
-
1
]
dataset
.
append
(
i
)
return
Dataset
.
from_list
(
dataset
)
@
staticmethod
def
load
(
path
):
train_dataset
=
siqaDataset_V3
.
load_single
(
path
,
'train.jsonl'
,
'train-labels.lst'
)
val_dataset
=
siqaDataset_V3
.
load_single
(
path
,
'dev.jsonl'
,
'dev-labels.lst'
)
return
DatasetDict
({
'train'
:
train_dataset
,
'validation'
:
val_dataset
})
opencompass/lagent/actions/ipython_interpreter.py
View file @
b4afe3e7
...
...
@@ -57,6 +57,8 @@ class IPythonInterpreter(BaseAction):
user_data_dir (str): Specified the user data directory for files
loading. If set to `ENV`, use `USER_DATA_DIR` environment variable.
Defaults to `ENV`.
force_user_data (bool): Whether to force use user data.
Defaults to True.
"""
_KERNEL_CLIENTS
=
{}
...
...
@@ -68,7 +70,8 @@ class IPythonInterpreter(BaseAction):
disable_description
:
Optional
[
str
]
=
None
,
timeout
:
int
=
20
,
trim_output
:
Optional
[
int
]
=
1024
,
user_data_dir
:
str
=
'ENV'
)
->
None
:
user_data_dir
:
str
=
'ENV'
,
force_user_data
:
bool
=
True
)
->
None
:
super
().
__init__
(
description
,
name
,
enable
,
disable_description
)
self
.
timeout
=
timeout
...
...
@@ -82,6 +85,11 @@ class IPythonInterpreter(BaseAction):
f
'
{
user_data_dir
}
does not exist.'
user_data_dir
=
os
.
path
.
abspath
(
user_data_dir
)
user_data_dir
=
f
"import os
\n
os.chdir('
{
user_data_dir
}
')"
else
:
if
force_user_data
:
raise
ValueError
(
'user_data_dir is not set. Please '
'set force_user_data to False if '
'no extra data needed.'
)
self
.
user_data_dir
=
user_data_dir
self
.
_initialized
=
False
self
.
trim_output
=
trim_output
...
...
opencompass/models/huggingface.py
View file @
b4afe3e7
...
...
@@ -225,6 +225,7 @@ class HuggingFace(BaseModel):
def
generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
min_out_len
:
Optional
[
int
]
=
None
,
stopping_criteria
:
List
[
str
]
=
[],
**
kwargs
)
->
List
[
str
]:
"""Generate results given a list of inputs.
...
...
@@ -232,6 +233,7 @@ class HuggingFace(BaseModel):
Args:
inputs (List[str]): A list of strings.
max_out_len (int): The maximum length of the output.
min_out_len (Optional[int]): The minimum length of the output.
Returns:
List[str]: A list of generated strings.
...
...
@@ -241,12 +243,14 @@ class HuggingFace(BaseModel):
if
self
.
batch_padding
and
len
(
inputs
)
>
1
:
return
self
.
_batch_generate
(
inputs
=
inputs
,
max_out_len
=
max_out_len
,
min_out_len
=
min_out_len
,
stopping_criteria
=
stopping_criteria
,
**
generation_kwargs
)
else
:
return
sum
(
(
self
.
_single_generate
(
inputs
=
[
input_
],
max_out_len
=
max_out_len
,
min_out_len
=
min_out_len
,
stopping_criteria
=
stopping_criteria
,
**
generation_kwargs
)
for
input_
in
inputs
),
[])
...
...
@@ -254,6 +258,7 @@ class HuggingFace(BaseModel):
def
_batch_generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
min_out_len
:
Optional
[
int
]
=
None
,
stopping_criteria
:
List
[
str
]
=
[],
**
kwargs
)
->
List
[
str
]:
"""Support for batch prompts inference.
...
...
@@ -308,6 +313,9 @@ class HuggingFace(BaseModel):
])
kwargs
[
'stopping_criteria'
]
=
stopping_criteria
if
min_out_len
is
not
None
:
kwargs
[
'min_new_tokens'
]
=
min_out_len
# step-2: conduct model forward to generate output
outputs
=
self
.
model
.
generate
(
**
tokens
,
max_new_tokens
=
max_out_len
,
...
...
@@ -331,6 +339,7 @@ class HuggingFace(BaseModel):
def
_single_generate
(
self
,
inputs
:
List
[
str
],
max_out_len
:
int
,
min_out_len
:
Optional
[
int
]
=
None
,
stopping_criteria
:
List
[
str
]
=
[],
**
kwargs
)
->
List
[
str
]:
"""Support for single prompt inference.
...
...
@@ -390,6 +399,9 @@ class HuggingFace(BaseModel):
])
kwargs
[
'stopping_criteria'
]
=
stopping_criteria
if
min_out_len
is
not
None
:
kwargs
[
'min_new_tokens'
]
=
min_out_len
# To accommodate the PeftModel, parameters should be passed in
# key-value format for generate.
outputs
=
self
.
model
.
generate
(
input_ids
=
input_ids
,
...
...
@@ -502,7 +514,7 @@ class HuggingFace(BaseModel):
self
.
tokenizer
.
pad_token_id
).
sum
(
-
1
).
cpu
().
numpy
()
if
mask_length
is
not
None
:
lens
-=
np
.
array
(
mask_length
)
ce_loss
=
loss
.
sum
(
-
1
).
cpu
().
detach
().
numpy
()
/
lens
ce_loss
=
loss
.
float
().
sum
(
-
1
).
cpu
().
detach
().
numpy
()
/
lens
return
ce_loss
def
get_loglikelihood
(
...
...
@@ -554,7 +566,6 @@ class HuggingFace(BaseModel):
input_ids
=
input_tokenizer_out
[
'input_ids'
][:,
:
self
.
max_seq_len
]
input_length
=
input_tokenizer_out
[
'length'
]
attention_mask
=
input_tokenizer_out
[
'attention_mask'
]
context_ids
=
[
self
.
tokenizer
(
inputs
[
i
].
replace
(
conts
[
i
],
''
),
padding
=
False
,
...
...
@@ -563,7 +574,7 @@ class HuggingFace(BaseModel):
for
i
in
range
(
len
(
inputs
))
]
# forward
outputs
=
self
.
model
(
input_ids
,
attention_mask
)[
'logits'
]
outputs
=
self
.
model
(
input_ids
)[
'logits'
]
outputs
=
torch
.
nn
.
functional
.
log_softmax
(
outputs
,
dim
=-
1
)
# calculate loglikelihood
answer
=
np
.
zeros
(
len
(
inputs
))
...
...
@@ -609,9 +620,10 @@ class HuggingFace(BaseModel):
self
.
tokenizer
.
pad_token_id
).
sum
(
-
1
).
cpu
().
numpy
()
mink_percent
=
[]
for
nloss
,
nlen
in
zip
(
loss
,
lens
):
nlen
=
max
(
int
(
nlen
)
*
k
//
100
,
1
)
nloss
=
torch
.
topk
(
loss
,
nlen
,
dim
=-
1
)[
0
]
nloss
=
-
nloss
.
mean
().
cpu
().
detach
().
numpy
()
nlen
=
int
(
nlen
)
minklen
=
max
(
nlen
*
k
//
100
,
1
)
nloss
=
torch
.
topk
(
loss
[
-
nlen
:],
minklen
,
dim
=-
1
)[
0
]
nloss
=
-
nloss
.
float
().
mean
().
cpu
().
detach
().
numpy
()
mink_percent
.
append
(
nloss
)
return
np
.
array
(
mink_percent
)
...
...
opencompass/openicl/icl_inferencer/icl_gen_inferencer.py
View file @
b4afe3e7
...
...
@@ -29,6 +29,8 @@ class GenInferencer(BaseInferencer):
model (:obj:`BaseModelWrapper`, optional): The module to inference.
max_seq_len (:obj:`int`, optional): Maximum number of tokenized words
allowed by the LM.
min_out_len (:obj:`int`, optional): Minimum number of generated tokens
by the LM
batch_size (:obj:`int`, optional): Batch size for the
:obj:`DataLoader`.
output_json_filepath (:obj:`str`, optional): File path for output
...
...
@@ -49,6 +51,7 @@ class GenInferencer(BaseInferencer):
max_out_len
:
int
,
stopping_criteria
:
List
[
str
]
=
[],
max_seq_len
:
Optional
[
int
]
=
None
,
min_out_len
:
Optional
[
int
]
=
None
,
batch_size
:
Optional
[
int
]
=
1
,
gen_field_replace_token
:
Optional
[
str
]
=
''
,
output_json_filepath
:
Optional
[
str
]
=
'./icl_inference_output'
,
...
...
@@ -66,6 +69,7 @@ class GenInferencer(BaseInferencer):
self
.
gen_field_replace_token
=
gen_field_replace_token
self
.
max_out_len
=
max_out_len
self
.
min_out_len
=
min_out_len
self
.
stopping_criteria
=
stopping_criteria
if
self
.
model
.
is_api
and
save_every
is
None
:
...
...
@@ -135,6 +139,8 @@ class GenInferencer(BaseInferencer):
sig
=
inspect
.
signature
(
self
.
model
.
generate
)
if
'stopping_criteria'
in
sig
.
parameters
:
extra_gen_kwargs
[
'stopping_criteria'
]
=
self
.
stopping_criteria
if
'min_out_len'
in
sig
.
parameters
:
extra_gen_kwargs
[
'min_out_len'
]
=
self
.
min_out_len
with
torch
.
no_grad
():
parsed_entries
=
self
.
model
.
parse_template
(
entry
,
mode
=
'gen'
)
results
=
self
.
model
.
generate_from_template
(
...
...
opencompass/runners/dlc.py
View file @
b4afe3e7
...
...
@@ -116,7 +116,7 @@ class DLCRunner(BaseRunner):
' --worker_count 1'
f
' --worker_cpu
{
max
(
num_gpus
*
6
,
8
)
}
'
f
' --worker_gpu
{
num_gpus
}
'
f
' --worker_memory
{
max
(
num_gpus
*
32
,
48
)
}
'
f
' --worker_memory
{
max
(
num_gpus
*
64
,
48
)
}
'
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
' --interactive'
)
get_cmd
=
partial
(
task
.
get_command
,
...
...
opencompass/tasks/openicl_infer.py
View file @
b4afe3e7
...
...
@@ -61,6 +61,7 @@ class OpenICLInferTask(BaseTask):
for
model_cfg
,
dataset_cfgs
in
zip
(
self
.
model_cfgs
,
self
.
dataset_cfgs
):
self
.
max_out_len
=
model_cfg
.
get
(
'max_out_len'
,
None
)
self
.
batch_size
=
model_cfg
.
get
(
'batch_size'
,
None
)
self
.
min_out_len
=
model_cfg
.
get
(
'min_out_len'
,
None
)
self
.
model
=
build_model_from_cfg
(
model_cfg
)
for
dataset_cfg
in
dataset_cfgs
:
...
...
@@ -102,6 +103,8 @@ class OpenICLInferTask(BaseTask):
inferencer_cfg
[
'model'
]
=
self
.
model
self
.
_set_default_value
(
inferencer_cfg
,
'max_out_len'
,
self
.
max_out_len
)
self
.
_set_default_value
(
inferencer_cfg
,
'min_out_len'
,
self
.
min_out_len
)
self
.
_set_default_value
(
inferencer_cfg
,
'batch_size'
,
self
.
batch_size
)
inferencer_cfg
[
'max_seq_len'
]
=
self
.
model_cfg
.
get
(
'max_seq_len'
)
inferencer
=
ICL_INFERENCERS
.
build
(
inferencer_cfg
)
...
...
opencompass/utils/build.py
View file @
b4afe3e7
...
...
@@ -21,4 +21,5 @@ def build_model_from_cfg(model_cfg: ConfigDict):
model_cfg
.
pop
(
'abbr'
,
None
)
model_cfg
.
pop
(
'summarizer_abbr'
,
None
)
model_cfg
.
pop
(
'pred_postprocessor'
,
None
)
model_cfg
.
pop
(
'min_out_len'
,
None
)
return
MODELS
.
build
(
model_cfg
)
tools/prompt_viewer.py
View file @
b4afe3e7
...
...
@@ -5,7 +5,8 @@ from typing import Dict
from
mmengine.config
import
Config
,
ConfigDict
from
opencompass.openicl.icl_inferencer
import
(
CLPInferencer
,
GenInferencer
,
PPLInferencer
)
PPLInferencer
,
PPLOnlyInferencer
)
from
opencompass.registry
import
ICL_PROMPT_TEMPLATES
,
ICL_RETRIEVERS
from
opencompass.utils
import
(
Menu
,
build_dataset_from_cfg
,
build_model_from_cfg
,
dataset_abbr_from_cfg
,
...
...
@@ -77,7 +78,8 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
ice_idx_list
=
retriever
.
retrieve
()
assert
infer_cfg
.
inferencer
.
type
in
[
PPLInferencer
,
GenInferencer
],
\
assert
infer_cfg
.
inferencer
.
type
in
[
PPLInferencer
,
GenInferencer
,
CLPInferencer
,
PPLOnlyInferencer
],
\
'Only PPLInferencer and GenInferencer are supported'
for
idx
in
range
(
min
(
count
,
len
(
ice_idx_list
))):
...
...
@@ -127,7 +129,9 @@ def print_prompts(model_cfg, dataset_cfg, count=1):
print
(
'-'
*
100
)
print
(
prompt
)
print
(
'-'
*
100
)
elif
infer_cfg
.
inferencer
.
type
in
[
GenInferencer
,
CLPInferencer
]:
elif
infer_cfg
.
inferencer
.
type
in
[
GenInferencer
,
CLPInferencer
,
PPLOnlyInferencer
]:
ice_idx
=
ice_idx_list
[
idx
]
ice
=
retriever
.
generate_ice
(
ice_idx
,
ice_template
=
ice_template
)
prompt
=
retriever
.
generate_prompt_for_generate_task
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment