test_accuracy.py 2.13 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
"""
This file test accuracy of the vLLM server via LMEval.
It uses local-completions, which interacts with vLLM
through the OAI API with N concurrent connections.
This simulates real work usage of the API and makes
sure that the zmq frontend mp RPC message passing and
AsyncLLMEngine are working correctly.
"""

import lm_eval
import pytest

from vllm.platforms import current_platform

MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct"
NUM_CONCURRENT = 500
TASK = "gsm8k"
FILTER = "exact_match,strict-match"
RTOL = 0.03
EXPECTED_VALUE = 0.58


24
def run_test(more_args=None):
25
26
    """Run the end to end accuracy test."""

27
28
29
30
    model_args = f"pretrained={MODEL_NAME},max_model_len=4096"

    if more_args is not None:
        model_args = "{},{}".format(model_args, more_args)
31
32
33
34
35
36
37
38
39
40
41
42
43
44

    results = lm_eval.simple_evaluate(
        model="vllm",
        model_args=model_args,
        tasks="gsm8k",
        batch_size="auto",
    )

    measured_value = results["results"][TASK][FILTER]
    assert (measured_value - RTOL < EXPECTED_VALUE
            and measured_value + RTOL > EXPECTED_VALUE
            ), f"Expected: {EXPECTED_VALUE} |  Measured: {measured_value}"


45
46
47
48
# TODO: [AlexM] Fix it with new CI/CD tests
TPU_TP_TEST_STR = ""  #"tensor_parallel_size=4"


49
50
51
@pytest.mark.skipif(not current_platform.is_cuda()
                    and not current_platform.is_tpu(),
                    reason="V1 is currently only supported on CUDA and TPU")
52
def test_lm_eval_accuracy_v1_engine(monkeypatch: pytest.MonkeyPatch):
53
54
55
56
    """Run with the V1 Engine."""

    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "1")
57
58
59
60

        more_args = None
        if current_platform.is_tpu():
            # Limit compilation time for TPU V1
61
            more_args = "max_model_len=2048,max_num_seqs=64"
62

63
64
65
66
            # Add TP test (if provided)
            if TPU_TP_TEST_STR:
                more_args += ",{}".format(TPU_TP_TEST_STR)

67
        run_test(more_args)
68
69


70
def test_lm_eval_accuracy_v0_engine(monkeypatch: pytest.MonkeyPatch):
71
72
73
74
75
    """Run with the V0 Engine."""

    with monkeypatch.context() as m:
        m.setenv("VLLM_USE_V1", "0")
        run_test()