test_custom_dispatcher.py 524 Bytes
Newer Older
1
2
import os

3
4
from ..utils import compare_two_settings

5
6
7
8
9
# --enforce-eager on TPU causes graph compilation
# this times out default Health Check in the MQLLMEngine,
# so we set the timeout here to 30s
os.environ["VLLM_RPC_TIMEOUT"] = "30000"

10
11
12
13
14
15
16

def test_custom_dispatcher():
    compare_two_settings("google/gemma-2b",
                         arg1=["--enforce-eager"],
                         arg2=["--enforce-eager"],
                         env1={"VLLM_DYNAMO_USE_CUSTOM_DISPATCHER": "0"},
                         env2={})