utils.py 1011 Bytes
Newer Older
Hojin Lee's avatar
Hojin Lee committed
1
2
3
import evaluate as hf_evaluate


Baber's avatar
Baber committed
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# pass_at_k = hf_evaluate.load("code_eval")
#
# # run simple test to check code execution is enabled before model generation
# test_cases = ["assert add(2, 3)==5"]
# candidates = [["def add(a,b): return a*b"]]
# results = pass_at_k.compute(references=test_cases, predictions=candidates, k=[1])


def pass_at_1(references: list[str], predictions: list[list[str]], k: list[int] = None):
    pass_at_k = hf_evaluate.load("code_eval")
    assert k is not None
    if isinstance(k, int):
        k = [k]
    res = pass_at_k.compute(
Hojin Lee's avatar
Hojin Lee committed
18
19
        references=references,
        predictions=predictions,
Baber's avatar
Baber committed
20
21
22
23
24
25
        k=k,
    )[0]

    return {
        key: val for key, val in res.items() if key in map(lambda x: f"pass@{x}", k)
    }
Hojin Lee's avatar
Hojin Lee committed
26
27
28
29
30
31


def build_references(doc):
    return doc["test"] + "\n" + f"check({doc['entry_point']})"


Baber's avatar
Baber committed
32
33
def build_predictions(resps: list[list[str]], docs: list[dict]) -> list[list[str]]:
    return [[doc["prompt"] + r for r in resp] for resp, doc in zip(resps, docs)]