judge_task.py 581 Bytes
Newer Older
Baber's avatar
Baber committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import json

import datasets

from lm_eval.api.task import ConfigurableTask


class JudgeTask(ConfigurableTask):
    def __init__(self, config, output_path):
        super().__init__(config)
        self.output_path = output_path

    def process_docs(self, dataset: datasets.Dataset):
        resps = []
        # load json
        with open(self.output_path, "r") as f:
            resp = json.load(f)
            resps.append({"resp": resp["resps"], "doc": resp["doc"]})

        resps.sort(key=lambda x: x["doc"])
        dataset.add_column("resp", resps)
        return resps