"docs/source/en/api/outputs.md" did not exist on "856dad57bb7a9ee13af4a08492e524b0a145a2c5"
Commit 52192906 authored by Baber's avatar Baber
Browse files

update mbpp

parent 13aa5096
......@@ -4,20 +4,31 @@ dataset_name: full
unsafe_code: true
output_type: generate_until
test_split: test
doc_to_text: "You are an expert Python programmer, and here is your task: {{text}} Your code should pass these tests:\n\n{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}\n[BEGIN]\n"
doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}"
repeats: 20
doc_to_text: "{{text|trim}}\n{{code}}.split(':')[0]:\n"
doc_to_target: "{% if is_fewshot is defined %}{{code}}\n{% else %}{{test_list[0]}}\n{{test_list[1]}}\n{{test_list[2]}}{% endif %}"
target_delimiter: ""
gen_prefix: "Here is the completed function:\n\n```python\n"
metric_list:
- metric: !function utils.pass_at_1
- metric: !function utils.pass_at_k
aggregation: mean
higher_is_better: true
k: [ 10 ]
generation_kwargs:
until:
- "[DONE]"
until: [
"\nclass",
"\nassert",
'\n"""',
"\nprint",
"\nif",
"\n```",
"\n#",
"\n<|/",
"<|eot_id|>",
]
do_sample: false
num_fewshot: 3
fewshot_config:
sampler: first_n
samples: !function utils.list_fewshot_samples
metadata:
version: 1.0
version: 2.0
......@@ -17,9 +17,10 @@ doc_to_target: "{% if is_fewshot is defined %}{{code}}\n[DONE]{% else %}{{test_l
target_delimiter: ""
gen_prefix: "Here is the completed function:\n\n```python\n"
metric_list:
- metric: !function utils.pass_at_10
- metric: !function utils.pass_at_k
aggregation: mean
higher_is_better: true
k: [ 10 ]
filter_list:
- name: "create_test"
filter:
......@@ -27,16 +28,16 @@ filter_list:
filter_fn: !function utils.build_predictions
generation_kwargs:
until: [
"\nclass",
"\nassert",
'\n"""',
"\nprint",
"\nif",
"\n```",
"\n#",
"\n<|/",
"<|eot_id|>",
]
"\nclass",
"\nassert",
'\n"""',
"\nprint",
"\nif",
"\n```",
"\n#",
"\n<|/",
"<|eot_id|>",
]
do_sample: true
temperature: 0.8
top_p: 0.95
......
......@@ -9,9 +9,10 @@ doc_to_target: "{% if is_fewshot is defined %}{{code}}\n```{% else %}{{test_list
gen_prefix: "\n```python\n"
target_delimiter: ""
metric_list:
- metric: !function utils.pass_at_1
- metric: !function utils.pass_at_k
aggregation: mean
higher_is_better: true
k: [ 1 ]
filter_list:
- name: "extract_code"
filter:
......@@ -19,7 +20,7 @@ filter_list:
filter_fn: !function utils.build_predictions
generation_kwargs:
max_gen_toks: 256
until: []
until: [ ]
do_sample: false
num_fewshot: 3
fewshot_config:
......
......@@ -5,44 +5,36 @@ import evaluate as hf_evaluate
try:
pass_at_k = hf_evaluate.load("code_eval")
# run simple test to check code execution is enabled before model generation
compute_ = hf_evaluate.load("code_eval")
test_cases = ["assert add(2, 3)==5"]
candidates = [["def add(a,b): return a*b"]]
results = pass_at_k.compute(references=test_cases, predictions=candidates, k=[1])
results = compute_.compute(references=test_cases, predictions=candidates, k=[1])
except Exception as e:
raise e
def pass_at_1(
references: Union[str, list[str]], predictions: Union[str, list[list[str]]]
) -> float:
if isinstance(references, str):
references = [references]
if isinstance(predictions[0], str):
predictions = [[p] for p in predictions]
return pass_at_k.compute(
def pass_at_k(references: list[str], predictions: list[list[str]], k: list[int] = None):
global compute_
assert k is not None
if isinstance(k, int):
k = [k]
res = compute_.compute(
references=references,
predictions=predictions,
k=[1],
)[0]["pass@1"]
def pass_at_10(
references: Union[str, list[str]], predictions: Union[str, list[list[str]]]
) -> float:
global pass_at_k
if isinstance(references, str):
references = [references]
if isinstance(predictions[0], str):
predictions = [[p] for p in predictions]
res = pass_at_k.compute(
references=references, predictions=predictions, k=[10], num_workers=20
k=k,
)
return res[0]
def extract_python_block(text: str) -> str:
if not text.startswith("```"):
text = "```python\n" + text + "\n```"
# capture only fences whose language tag is 'python'
pattern = re.compile(r"```python\n([\s\S]*?)\n?```", re.IGNORECASE)
m = pattern.search(text)
return "from __future__ import annotations\n" + m.group(1) if m else ""
def extract_code_blocks(text: str) -> str:
# Pattern to match ```...``` blocks
ignore_annotations = "from __future__ import annotations\n"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment