test_neuralmagic.py 1.49 KB
Newer Older
1
2
import pytest

3
from lm_eval import evaluator
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from lm_eval.api.registry import get_model


SPARSEML_MODELS_TASKS = [
    # loglikelihood
    ("facebook/opt-125m", "lambada_openai"),
    # loglikelihood_rolling
    ("hf-internal-testing/tiny-random-gpt2", "wikitext"),
    # generate_until
    ("mgoin/tiny-random-llama-2-quant", "gsm8k"),
]

DEEPSPARSE_MODELS_TASKS = [
    # loglikelihood
    ("hf:mgoin/llama2.c-stories15M-quant-ds", "lambada_openai"),
    # loglikelihood_rolling (not supported yet)
    # ("hf:mgoin/llama2.c-stories15M-quant-ds", "wikitext"),
    # generate_until
    ("hf:mgoin/llama2.c-stories15M-quant-ds", "gsm8k"),
]


Nathan Habib's avatar
Nathan Habib committed
26
@pytest.mark.skip(reason="test failing")
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS)
def test_sparseml_eval(model_id, task):
    lm = get_model("sparseml").create_from_arg_string(
        f"pretrained={model_id}",
        {
            "batch_size": 1,
            "device": "cpu",
            "dtype": "float32",
        },
    )

    limit = 5
    evaluator.simple_evaluate(
        model=lm,
        tasks=[task],
        num_fewshot=0,
        limit=limit,
    )


@pytest.mark.parametrize("model_id,task", DEEPSPARSE_MODELS_TASKS)
def test_deepsparse_eval(model_id, task):
    lm = get_model("deepsparse").create_from_arg_string(
        f"pretrained={model_id}",
        {
            "batch_size": 1,
        },
    )

    limit = 5
    evaluator.simple_evaluate(
        model=lm,
        tasks=[task],
        num_fewshot=0,
        limit=limit,
    )