Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
fd389e2d
Unverified
Commit
fd389e2d
authored
Aug 31, 2023
by
Hubert
Committed by
GitHub
Aug 31, 2023
Browse files
[Feat] support codellama and preds collection tools (#335)
parent
2a271dbf
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
376 additions
and
0 deletions
+376
-0
configs/models/codellama/hf_codellama_13b.py
configs/models/codellama/hf_codellama_13b.py
+21
-0
configs/models/codellama/hf_codellama_13b_instruct.py
configs/models/codellama/hf_codellama_13b_instruct.py
+21
-0
configs/models/codellama/hf_codellama_13b_python.py
configs/models/codellama/hf_codellama_13b_python.py
+21
-0
configs/models/codellama/hf_codellama_34b.py
configs/models/codellama/hf_codellama_34b.py
+21
-0
configs/models/codellama/hf_codellama_34b_instruct.py
configs/models/codellama/hf_codellama_34b_instruct.py
+21
-0
configs/models/codellama/hf_codellama_34b_python.py
configs/models/codellama/hf_codellama_34b_python.py
+21
-0
configs/models/codellama/hf_codellama_7b.py
configs/models/codellama/hf_codellama_7b.py
+21
-0
configs/models/codellama/hf_codellama_7b_instruct.py
configs/models/codellama/hf_codellama_7b_instruct.py
+21
-0
configs/models/codellama/hf_codellama_7b_python.py
configs/models/codellama/hf_codellama_7b_python.py
+21
-0
opencompass/datasets/humanevalx.py
opencompass/datasets/humanevalx.py
+7
-0
tools/collect_code_preds.py
tools/collect_code_preds.py
+180
-0
No files found.
configs/models/codellama/hf_codellama_13b.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 13B
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-13b'
,
path
=
"codellama/CodeLlama-13b-hf"
,
tokenizer_path
=
'codellama/CodeLlama-13b-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_13b_instruct.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 13B Instruct
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-13b-Instruct'
,
path
=
"codellama/CodeLlama-13b-Instruct-hf"
,
tokenizer_path
=
'codellama/CodeLlama-13b-Instruct-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_13b_python.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 13B Python
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-13b-Python'
,
path
=
"codellama/CodeLlama-13b-Python-hf"
,
tokenizer_path
=
'codellama/CodeLlama-13b-Python-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
2
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_34b.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 34B
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-34b'
,
path
=
"codellama/CodeLlama-34b-hf"
,
tokenizer_path
=
'codellama/CodeLlama-34b-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
4
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_34b_instruct.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 34B Instruct
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-34b-Instruct'
,
path
=
"codellama/CodeLlama-34b-Instruct-hf"
,
tokenizer_path
=
'codellama/CodeLlama-34b-Instruct-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
4
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_34b_python.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 34B Python
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-34b-Python'
,
path
=
"codellama/CodeLlama-34b-Python-hf"
,
tokenizer_path
=
'codellama/CodeLlama-34b-Python-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
4
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_7b.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 7B
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-7b'
,
path
=
"codellama/CodeLlama-7b-hf"
,
tokenizer_path
=
'codellama/CodeLlama-7b-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_7b_instruct.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 7B Instruct
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-7b-Instruct'
,
path
=
"codellama/CodeLlama-7b-Instruct-hf"
,
tokenizer_path
=
'codellama/CodeLlama-7b-Instruct-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
),
]
configs/models/codellama/hf_codellama_7b_python.py
0 → 100644
View file @
fd389e2d
from
opencompass.models
import
HuggingFaceCausalLM
models
=
[
# CodeLlama 7B Python
dict
(
type
=
HuggingFaceCausalLM
,
abbr
=
'CodeLlama-7b-Python'
,
path
=
"codellama/CodeLlama-7b-Python-hf"
,
tokenizer_path
=
'codellama/CodeLlama-7b-Python-hf'
,
tokenizer_kwargs
=
dict
(
padding_side
=
'left'
,
truncation_side
=
'left'
,
trust_remote_code
=
True
,
),
max_out_len
=
1024
,
max_seq_len
=
2048
,
batch_size
=
8
,
model_kwargs
=
dict
(
trust_remote_code
=
True
,
device_map
=
'auto'
),
run_cfg
=
dict
(
num_gpus
=
1
,
num_procs
=
1
),
),
]
opencompass/datasets/humanevalx.py
View file @
fd389e2d
...
...
@@ -145,6 +145,13 @@ class HumanevalXEvaluator(BaseEvaluator):
def
_clean_up_code
(
text
:
str
,
language_type
:
str
)
->
str
:
"""Cleans up the generated code."""
if
language_type
.
lower
()
==
'python'
:
# we need to take care of the first line
# append extra space for first line for correct indentation
for
c_index
,
c
in
enumerate
(
text
[:
5
]):
if
c
!=
' '
:
text
=
' '
*
(
4
-
c_index
)
+
text
break
text_splits
=
text
.
split
(
'
\n
'
)
is_empty_line
=
False
ind_empty_line
=
None
...
...
tools/collect_code_preds.py
0 → 100644
View file @
fd389e2d
import
argparse
import
json
import
os
import
os.path
as
osp
import
re
import
mmengine
from
mmengine
import
Config
from
mmengine.utils
import
mkdir_or_exist
from
opencompass.datasets.humanevalx
import
_clean_up_code
from
opencompass.utils
import
(
dataset_abbr_from_cfg
,
get_infer_output_path
,
get_logger
,
model_abbr_from_cfg
)
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Collect Humanevalx dataset predictions.'
)
parser
.
add_argument
(
'config'
,
help
=
'Config file path'
)
parser
.
add_argument
(
'-r'
,
'--reuse'
,
nargs
=
'?'
,
type
=
str
,
const
=
'latest'
,
help
=
'Reuse previous outputs & results, and run any '
'missing jobs presented in the config. If its '
'argument is not specified, the latest results in '
'the work_dir will be reused. The argument should '
'also be a specific timestamp, e.g. 20230516_144254'
),
args
=
parser
.
parse_args
()
return
args
_LANGUAGE_NAME_DICT
=
{
'cpp'
:
'CPP'
,
'go'
:
'Go'
,
'java'
:
'Java'
,
'js'
:
'JavaScript'
,
'python'
:
'Python'
,
'rust'
:
'Rust'
,
}
FAILED
=
0
SUCCEED
=
1
def
gpt_python_postprocess
(
ori_prompt
:
str
,
text
:
str
)
->
str
:
"""Better answer postprocessor for better instruction-aligned models like
GPT."""
if
'```'
in
text
:
blocks
=
re
.
findall
(
r
'```(.*?)```'
,
text
,
re
.
DOTALL
)
if
len
(
blocks
)
==
0
:
text
=
text
.
split
(
'```'
)[
1
]
# fall back to default strategy
else
:
text
=
blocks
[
0
]
# fetch the first code block
if
not
text
.
startswith
(
'
\n
'
):
# in case starting with ```python
text
=
text
[
max
(
text
.
find
(
'
\n
'
)
+
1
,
0
):]
match_ori
=
re
.
search
(
r
'def(.*?)\('
,
ori_prompt
)
match
=
re
.
search
(
r
'def(.*?)\('
,
text
)
if
match
:
if
match
.
group
()
==
match_ori
.
group
():
text
=
re
.
sub
(
'def(.*?)
\n
'
,
''
,
text
,
count
=
1
)
for
c_index
,
c
in
enumerate
(
text
[:
5
]):
if
c
!=
' '
:
text
=
' '
*
(
4
-
c_index
)
+
text
break
text
=
text
.
split
(
'
\n\n\n
'
)[
0
]
return
text
def
collect_preds
(
filename
:
str
):
# in case the prediction is partial
root
,
ext
=
osp
.
splitext
(
filename
)
partial_filename
=
root
+
'_0'
+
ext
# collect all the prediction results
if
not
osp
.
exists
(
osp
.
realpath
(
filename
))
and
not
osp
.
exists
(
osp
.
realpath
(
partial_filename
)):
print
(
f
'No predictions found for
{
filename
}
'
)
return
FAILED
,
None
,
None
else
:
if
osp
.
exists
(
osp
.
realpath
(
filename
)):
preds
=
mmengine
.
load
(
filename
)
pred_strs
=
[
preds
[
str
(
i
)][
'prediction'
]
for
i
in
range
(
len
(
preds
))
]
ori_prompt_strs
=
[
preds
[
str
(
i
)][
'origin_prompt'
]
for
i
in
range
(
len
(
preds
))
]
else
:
filename
=
partial_filename
pred_strs
=
[]
ori_prompt_strs
=
[]
i
=
1
while
osp
.
exists
(
osp
.
realpath
(
filename
)):
preds
=
mmengine
.
load
(
filename
)
filename
=
root
+
f
'_
{
i
}
'
+
ext
i
+=
1
pred_strs
+=
[
preds
[
str
(
i
)][
'prediction'
]
for
i
in
range
(
len
(
preds
))
]
ori_prompt_strs
+=
[
preds
[
str
(
i
)][
'origin_prompt'
]
for
i
in
range
(
len
(
preds
))
]
return
SUCCEED
,
ori_prompt_strs
,
pred_strs
def
main
():
args
=
parse_args
()
# initialize logger
logger
=
get_logger
(
log_level
=
'INFO'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
cfg
.
setdefault
(
'work_dir'
,
'./outputs/default/'
)
assert
args
.
reuse
,
'Please provide the experienment work dir.'
if
args
.
reuse
:
if
args
.
reuse
==
'latest'
:
if
not
os
.
path
.
exists
(
cfg
.
work_dir
)
or
not
os
.
listdir
(
cfg
.
work_dir
):
logger
.
warning
(
'No previous results to reuse!'
)
else
:
dirs
=
os
.
listdir
(
cfg
.
work_dir
)
dir_time_str
=
sorted
(
dirs
)[
-
1
]
else
:
dir_time_str
=
args
.
reuse
logger
.
info
(
f
'Reusing experiements from
{
dir_time_str
}
'
)
# update "actual" work_dir
cfg
[
'work_dir'
]
=
osp
.
join
(
cfg
.
work_dir
,
dir_time_str
)
for
model
in
cfg
.
models
:
model_abbr
=
model_abbr_from_cfg
(
model
)
for
dataset
in
cfg
.
datasets
:
dataset_abbr
=
dataset_abbr_from_cfg
(
dataset
)
filename
=
get_infer_output_path
(
model
,
dataset
,
osp
.
join
(
cfg
.
work_dir
,
'predictions'
))
succeed
,
ori_prompt_strs
,
pred_strs
=
collect_preds
(
filename
)
if
not
succeed
:
continue
# infer the language type
for
k
,
v
in
_LANGUAGE_NAME_DICT
.
items
():
if
k
in
dataset_abbr
:
lang
=
k
task
=
v
break
# special postprocess for GPT
if
'CodeLlama'
not
in
model_abbr
and
lang
==
'python'
:
predictions
=
[{
'task_id'
:
f
'
{
task
}
/
{
i
}
'
,
'generation'
:
gpt_python_postprocess
(
ori_prompt
,
pred
),
}
for
i
,
(
ori_prompt
,
pred
)
in
enumerate
(
zip
(
ori_prompt_strs
,
pred_strs
))]
else
:
predictions
=
[{
'task_id'
:
f
'
{
task
}
/
{
i
}
'
,
'generation'
:
_clean_up_code
(
pred
,
lang
),
}
for
i
,
pred
in
enumerate
(
pred_strs
)]
# save processed results if not exists
result_file_path
=
os
.
path
.
join
(
cfg
[
'work_dir'
],
'humanevalx'
,
model_abbr
,
f
'humanevalx_
{
lang
}
.json'
)
if
osp
.
exists
(
result_file_path
):
logger
.
info
(
f
'File exists for
{
model_abbr
}
, skip copy from predictions.'
# noqa
)
else
:
mkdir_or_exist
(
osp
.
split
(
result_file_path
)[
0
])
with
open
(
result_file_path
,
'w'
)
as
f
:
for
pred
in
predictions
:
f
.
write
(
json
.
dumps
(
pred
)
+
'
\n
'
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment