test_neuralmagic.py 1.49 KB
Newer Older
1
2
import pytest

3
from lm_eval import evaluator
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from lm_eval.api.registry import get_model


SPARSEML_MODELS_TASKS = [
    # loglikelihood
    ("facebook/opt-125m", "lambada_openai"),
    # loglikelihood_rolling
    ("hf-internal-testing/tiny-random-gpt2", "wikitext"),
    # generate_until
    ("mgoin/tiny-random-llama-2-quant", "gsm8k"),
]

DEEPSPARSE_MODELS_TASKS = [
    # loglikelihood
    ("hf:mgoin/llama2.c-stories15M-quant-ds", "lambada_openai"),
    # loglikelihood_rolling (not supported yet)
    # ("hf:mgoin/llama2.c-stories15M-quant-ds", "wikitext"),
    # generate_until
    ("hf:mgoin/llama2.c-stories15M-quant-ds", "gsm8k"),
]


26
@pytest.mark.skip(reason="test failing")
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@pytest.mark.parametrize("model_id,task", SPARSEML_MODELS_TASKS)
def test_sparseml_eval(model_id, task):
    lm = get_model("sparseml").create_from_arg_string(
        f"pretrained={model_id}",
        {
            "batch_size": 1,
            "device": "cpu",
            "dtype": "float32",
        },
    )

    limit = 5
    evaluator.simple_evaluate(
        model=lm,
        tasks=[task],
        num_fewshot=0,
        limit=limit,
    )


@pytest.mark.parametrize("model_id,task", DEEPSPARSE_MODELS_TASKS)
def test_deepsparse_eval(model_id, task):
    lm = get_model("deepsparse").create_from_arg_string(
        f"pretrained={model_id}",
        {
            "batch_size": 1,
        },
    )

    limit = 5
    evaluator.simple_evaluate(
        model=lm,
        tasks=[task],
        num_fewshot=0,
        limit=limit,
    )