test_custom_dispatcher.py 729 Bytes
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
import pytest
4

5
from vllm.config import CompilationLevel
6

7
8
from ..utils import compare_two_settings

9
10
11
12
# --enforce-eager on TPU causes graph compilation
# this times out default Health Check in the MQLLMEngine,
# so we set the timeout here to 30s

13

14
15
16
17
18
19
20
21
22
23
24
25
def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
    with monkeypatch.context() as m:
        m.setenv("VLLM_RPC_TIMEOUT", "30000")
        compare_two_settings(
            "google/gemma-2b",
            arg1=[
                "--enforce-eager",
                f"-O{CompilationLevel.DYNAMO_ONCE}",
            ],
            arg2=["--enforce-eager", f"-O{CompilationLevel.DYNAMO_AS_IS}"],
            env1={},
            env2={})