test_custom_dispatcher.py 616 Bytes
Newer Older
1
2
# SPDX-License-Identifier: Apache-2.0

3
4
import os

5
from vllm.config import CompilationLevel
6

7
8
from ..utils import compare_two_settings

9
10
11
12
13
# --enforce-eager on TPU causes graph compilation
# this times out default Health Check in the MQLLMEngine,
# so we set the timeout here to 30s
os.environ["VLLM_RPC_TIMEOUT"] = "30000"

14
15

def test_custom_dispatcher():
16
17
    compare_two_settings(
        "google/gemma-2b",
18
19
20
21
22
        arg1=[
            "--enforce-eager",
            f"-O{CompilationLevel.DYNAMO_ONCE}",
        ],
        arg2=["--enforce-eager", f"-O{CompilationLevel.DYNAMO_AS_IS}"],
23
24
        env1={},
        env2={})