test_evaluator.py 881 Bytes
Newer Older
Leo Gao's avatar
Leo Gao committed
1
2
3
import lm_eval.tasks as tasks
import lm_eval.models as models
import lm_eval.evaluator as evaluator
4
import random
Leo Gao's avatar
Leo Gao committed
5
6
7
8
9
10
11
12
13
14
import pytest


# TODO: more fine grained unit tests rather than this big honking integration
# test once we break evaluator into smaller, more manageable pieces

@pytest.mark.parametrize("taskname,Task", tasks.TASK_REGISTRY.items())
def test_evaluator(taskname, Task):
    task_dict = tasks.get_task_dict([taskname])
    lm = models.get_model('dummy')()
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31

    def ll_fn(reqs):
        for ctx, cont in reqs:
            # space convention
            assert ctx[-1] != ' '
            assert cont[0] == ' ' or ctx[-1] == '\n'
        
        res = []
        
        random.seed(42)
        for _ in reqs:
            res.append((-random.random(), False))

        return res
        

    lm.loglikelihood = ll_fn
Leo Gao's avatar
Leo Gao committed
32
    evaluator.evaluate(lm, task_dict, False, 0, 3)