Unverified Commit ddb81972 authored by Hubert's avatar Hubert Committed by GitHub
Browse files

[Feat] support wizardcoder series (#344)

* [Feat] support wizardcoder series

* minor fix
parent 2c71b0f6
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import HumanevalXDataset, HumanevalXEvaluator
humanevalx_reader_cfg = dict(
input_columns=['prompt'], output_column='task_id', train_split='test')
# This prompt is used for WizardLMCode series
# You can use 620cfa for basic generation
humanevalx_infer_cfg = {
lang: dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Create a {lang} script for this problem:
{{prompt}}
### Response:"""),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=1024))
for lang in ['python', 'cpp', 'go', 'java', 'js']
}
humanevalx_eval_cfg_dict = {
lang: dict(
evaluator=dict(
type=HumanevalXEvaluator,
language=lang,
ip_address=
"localhost", # replace to your code_eval_server ip_address, port
port=5000
), # refer to https://github.com/Ezra-Yu/code-evaluator to launch a server
pred_role='BOT')
for lang in ['python', 'cpp', 'go', 'java', 'js'
] # do not support rust now
}
humanevalx_datasets = [
dict(
type=HumanevalXDataset,
abbr=f'humanevalx-{lang}',
language=lang,
path='./backup_data/humanevalx',
reader_cfg=humanevalx_reader_cfg,
infer_cfg=humanevalx_infer_cfg[lang],
eval_cfg=humanevalx_eval_cfg_dict[lang])
for lang in ['python', 'cpp', 'go', 'java', 'js']
]
\ No newline at end of file
from opencompass.openicl.icl_prompt_template import PromptTemplate
from opencompass.openicl.icl_retriever import ZeroRetriever
from opencompass.openicl.icl_inferencer import GenInferencer
from opencompass.datasets import MBPPDataset, MBPPEvaluator2
mbpp_reader_cfg = dict(
input_columns=['text', 'test_list'], output_column='test_list_2')
# This prompt is used for WizardLMCode series
# You can use other config file for basic 3-shot generation
mbpp_infer_cfg = dict(
prompt_template=dict(
type=PromptTemplate,
template=dict(round=[
dict(
role='HUMAN',
prompt=
"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Create a Python script for this problem:
{text}
Test examples:
{test_list}
### Response:"""),
])),
retriever=dict(type=ZeroRetriever),
inferencer=dict(type=GenInferencer, max_out_len=512))
mbpp_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator2), pred_role="BOT")
mbpp_datasets = [
dict(
type=MBPPDataset,
abbr='mbpp',
path='./data/mbpp/mbpp.jsonl',
reader_cfg=mbpp_reader_cfg,
infer_cfg=mbpp_infer_cfg,
eval_cfg=mbpp_eval_cfg)
]
from opencompass.models import HuggingFaceCausalLM
models = [
# WizardCoder 15B
dict(
type=HuggingFaceCausalLM,
abbr='WizardCoder-15B-V1.0',
path="WizardLM/WizardCoder-15B-V1.0",
tokenizer_path='WizardLM/WizardCoder-15B-V1.0',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=1024,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto'),
run_cfg=dict(num_gpus=2, num_procs=1),
),
]
from opencompass.models import HuggingFaceCausalLM
models = [
# WizardCoder 1B
dict(
type=HuggingFaceCausalLM,
abbr='WizardCoder-1B-V1.0',
path="WizardLM/WizardCoder-1B-V1.0",
tokenizer_path='WizardLM/WizardCoder-1B-V1.0',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=1024,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto'),
run_cfg=dict(num_gpus=1, num_procs=1),
),
]
from opencompass.models import HuggingFaceCausalLM
models = [
# WizardCoder 1B
dict(
type=HuggingFaceCausalLM,
abbr='WizardCoder-1B-V1.0',
path="WizardLM/WizardCoder-1B-V1.0",
tokenizer_path='WizardLM/WizardCoder-1B-V1.0',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=1024,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto'),
run_cfg=dict(num_gpus=1, num_procs=1),
),
]
from opencompass.models import HuggingFaceCausalLM
models = [
# WizardCoder Python 13B
dict(
type=HuggingFaceCausalLM,
abbr='WizardCoder-Python-13B-V1.0',
path="WizardLM/WizardCoder-Python-13B-V1.0",
tokenizer_path='WizardLM/WizardCoder-Python-13B-V1.0',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=1024,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto'),
run_cfg=dict(num_gpus=2, num_procs=1),
),
]
from opencompass.models import HuggingFaceCausalLM
models = [
# WizardCoder Python 34B
dict(
type=HuggingFaceCausalLM,
abbr='WizardCoder-Python-34B-V1.0',
path="WizardLM/WizardCoder-Python-34B-V1.0",
tokenizer_path='WizardLM/WizardCoder-Python-34B-V1.0',
tokenizer_kwargs=dict(
padding_side='left',
truncation_side='left',
trust_remote_code=True,
),
max_out_len=1024,
max_seq_len=2048,
batch_size=8,
model_kwargs=dict(trust_remote_code=True, device_map='auto'),
run_cfg=dict(num_gpus=4, num_procs=1),
),
]
...@@ -45,9 +45,12 @@ class MBPPEvaluator(BaseEvaluator): ...@@ -45,9 +45,12 @@ class MBPPEvaluator(BaseEvaluator):
for test_case, pred in zip(references, predictions): for test_case, pred in zip(references, predictions):
programs = self._process_test(test_case, pred) programs = self._process_test(test_case, pred)
try: try:
# Add exec globals to prevent the exec to raise
# unnecessary NameError for correct answer
exec_globals = {}
with self.swallow_io(): with self.swallow_io():
with self.time_limit(2): with self.time_limit(2):
exec(programs) exec(programs, exec_globals)
result['pass'] += 1 result['pass'] += 1
except TimeOutException: except TimeOutException:
result['timeout'] += 1 result['timeout'] += 1
...@@ -118,3 +121,41 @@ class MBPPEvaluator(BaseEvaluator): ...@@ -118,3 +121,41 @@ class MBPPEvaluator(BaseEvaluator):
class redirect_stdin(contextlib._RedirectStream): # type: ignore class redirect_stdin(contextlib._RedirectStream): # type: ignore
_stream = 'stdin' _stream = 'stdin'
@ICL_EVALUATORS.register_module()
class MBPPEvaluator2(MBPPEvaluator):
"""Better use for WizardCoder evaluation."""
def _process_answer(self, text):
if '```' in text:
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
if len(blocks) == 0:
text = text.split('```')[1] # fall back to default strategy
else:
text = blocks[0] # fetch the first code block
if not text.startswith(
'\n'): # in case starting with ```python
text = text[max(text.find('\n') + 1, 0):]
else:
match = re.search(r'Here(.*?)\n', text)
if match:
text = re.sub('Here(.*?)\n', '', text, count=1)
# remove test in generation
test_list = ['# Test', '#Test', '#test', '# test']
for s in test_list:
if s in text:
text = text[:text.find(s)]
text = text.strip()
match = re.search(r"('\s*|)(\[DONE\]|DONE)", text)
if match:
text = text[:match.start()]
match = re.search(r"(\[BEGIN\]|BEGIN)('\s*|)", text)
if match:
text = text[match.end():]
text = text.strip()
if text.startswith("'"):
text = text[1:]
return text
...@@ -70,6 +70,24 @@ def gpt_python_postprocess(ori_prompt: str, text: str) -> str: ...@@ -70,6 +70,24 @@ def gpt_python_postprocess(ori_prompt: str, text: str) -> str:
return text return text
def wizardcoder_postprocess(text: str) -> str:
"""Postprocess for WizardCoder Models."""
if '```' in text:
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
if len(blocks) == 0:
text = text.split('```')[1] # fall back to default strategy
else:
text = blocks[0] # fetch the first code block
if not text.startswith('\n'): # in case starting with ```python
text = text[max(text.find('\n') + 1, 0):]
else:
match = re.search(r'Here(.*?)\n', text)
if match:
text = re.sub('Here(.*?)\n', '', text, count=1)
return text
def collect_preds(filename: str): def collect_preds(filename: str):
# in case the prediction is partial # in case the prediction is partial
root, ext = osp.splitext(filename) root, ext = osp.splitext(filename)
...@@ -147,7 +165,18 @@ def main(): ...@@ -147,7 +165,18 @@ def main():
break break
# special postprocess for GPT # special postprocess for GPT
if 'CodeLlama' not in model_abbr and lang == 'python': if model_abbr in [
'WizardCoder-1B-V1.0',
'WizardCoder-3B-V1.0',
'WizardCoder-15B-V1.0',
'WizardCoder-Python-13B-V1.0',
'WizardCoder-Python-34B-V1.0',
]:
predictions = [{
'task_id': f'{task}/{i}',
'generation': wizardcoder_postprocess(pred),
} for i, pred in enumerate(pred_strs)]
elif 'CodeLlama' not in model_abbr and lang == 'python':
predictions = [{ predictions = [{
'task_id': 'task_id':
f'{task}/{i}', f'{task}/{i}',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment