test_custom_dispatcher.py 1 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
import pytest
5

6
from vllm.config import CompilationMode
7

8
9
from ..utils import compare_two_settings

10
11
12
13
# --enforce-eager on TPU causes graph compilation
# this times out default Health Check in the MQLLMEngine,
# so we set the timeout here to 30s

14

15
16
17
def test_custom_dispatcher(monkeypatch: pytest.MonkeyPatch):
    with monkeypatch.context() as m:
        m.setenv("VLLM_RPC_TIMEOUT", "30000")
18
19
20
21
22
23
        compare_two_settings(
            "Qwen/Qwen2.5-1.5B-Instruct",
            arg1=[
                "--max-model-len=256",
                "--max-num-seqs=32",
                "--enforce-eager",
24
                f"-O{CompilationMode.DYNAMO_TRACE_ONCE}",
25
26
27
28
29
            ],
            arg2=[
                "--max-model-len=256",
                "--max-num-seqs=32",
                "--enforce-eager",
30
                f"-O{CompilationMode.STOCK_TORCH_COMPILE}",
31
32
33
34
            ],
            env1={},
            env2={},
        )