Unverified Commit dd4318f6 authored by Jingming's avatar Jingming Committed by GitHub
Browse files

[Feature] enhance the ability of humaneval_postprocess (#676)



* [Feature] enhance the ability of humaneval_postprocess

* refactor

* [Feature] Keep the old version of the function and realize the new function in humaneval_postprocess_v2.

* Update opencompass/datasets/humaneval.py

---------
Co-authored-by: default avatarLeymore <zfz-960727@163.com>
Co-authored-by: default avatarHubert <42952108+yingfhu@users.noreply.github.com>
parent 1029119e
......@@ -104,6 +104,53 @@ def humaneval_postprocess(text: str) -> str:
return text
def humaneval_postprocess_v2(text: str) -> str:
"""This is an advanced version of previous postprocess to handle more
situations, better to use this one."""
text = text.lstrip('\n')
if '```' in text:
blocks = re.findall(r'```(.*?)```', text, re.DOTALL)
if len(blocks) == 0:
text = text.split('```')[1] # fall back to default strategy
else:
text = blocks[0] # fetch the first code block
if not text.startswith('\n'): # in case starting with ```python
text = text[max(text.find('\n') + 1, 0):]
if text.strip().startswith('from') or text.strip().startswith('import'):
def_idx = text.find('def')
if def_idx != -1:
text = text[max(text.find('\n', def_idx) + 1, 0):]
# remove empty lines
text = '\n'.join([line for line in text.split('\n') if line != ''])
text = text.lstrip('\n')
if text.strip().startswith('def'):
text = '\n'.join(text.split('\n')[1:])
if not text.startswith(' '):
if text.startswith(' '):
text = ' ' + text.lstrip()
else:
text = '\n'.join([' ' + line for line in text.split('\n')])
text = text.split('\n')
# If number of leading space reduces, we assume that the code block ends.
min_leading_space = None
end_index = None
for index, line in enumerate(text):
if line.strip() == '' or line.strip()[0] in ["'", '"', '#']:
continue
current_leading_space = len(line.rstrip()) - len(line.strip())
if min_leading_space is None:
min_leading_space = current_leading_space
elif current_leading_space < min_leading_space:
end_index = index
break
if end_index is not None:
text = '\n'.join(text[:end_index])
else:
text = '\n'.join(text)
return text
def humaneval_gpt_postprocess(text: str) -> str:
"""Better answer postprocessor for better instruction-aligned models like
GPT."""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment