Commit 5b159bf9 authored by Baber's avatar Baber
Browse files

test humaneval

parent 173b2bc3
......@@ -1503,9 +1503,9 @@ class ConfigurableTask(Task):
# we expect multiple_targets to be a list.
elif self.multiple_target:
gold = list(gold)
elif (
type(gold) is not type(result)
and "bypass" not in self._metric_fn_list.keys()
# TODO: handle this better
elif type(gold) is not type(result) and not (
"bypass" in self._metric_fn_list.keys() or isinstance(result, list)
):
# cast gold to the same type as result
gold = type(result)(gold)
......
......@@ -3,11 +3,12 @@ dataset_path: openai/openai_humaneval
output_type: generate_until
test_split: test
doc_to_text: "{{prompt}}"
doc_to_target: !function utils.build_references
doc_to_target: "{{test}}\ncheck({{entry_point}})"
metric_list:
- metric: !function utils.pass_at_1
aggregation: mean
higher_is_better: true
k: 64
generation_kwargs:
until:
- "\nclass"
......@@ -18,7 +19,7 @@ generation_kwargs:
do_sample: true
temperature: 0.2
top_p: 0.95
repeats: 64
repeats: 2
num_fewshot: 0
filter_list:
- name: "n=64" # number of samples to estimate pass@k
......
import evaluate as hf_evaluate
pass_at_k = hf_evaluate.load("code_eval")
# run simple test to check code execution is enabled before model generation
test_cases = ["assert add(2, 3)==5"]
candidates = [["def add(a,b): return a*b"]]
results = pass_at_k.compute(references=test_cases, predictions=candidates, k=[1])
def pass_at_1(references, predictions):
return pass_at_k.compute(
# pass_at_k = hf_evaluate.load("code_eval")
#
# # run simple test to check code execution is enabled before model generation
# test_cases = ["assert add(2, 3)==5"]
# candidates = [["def add(a,b): return a*b"]]
# results = pass_at_k.compute(references=test_cases, predictions=candidates, k=[1])
def pass_at_1(references: list[str], predictions: list[list[str]], k: list[int] = None):
pass_at_k = hf_evaluate.load("code_eval")
assert k is not None
if isinstance(k, int):
k = [k]
res = pass_at_k.compute(
references=references,
predictions=predictions,
k=[1],
)[0]["pass@1"]
k=k,
)[0]
return {
key: val for key, val in res.items() if key in map(lambda x: f"pass@{x}", k)
}
def build_references(doc):
return doc["test"] + "\n" + f"check({doc['entry_point']})"
def build_predictions(resps, docs):
preds = []
for resp, doc in zip(resps, docs):
pred = [doc["prompt"] + r for r in resp]
preds.append(pred)
return preds
def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[list[str]]:
return [[doc["prompt"] + r for r in resp] for resp, doc in zip(resps, docs)]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment