test_serving_chat.py 1.23 KB
Newer Older
1
2
3
import asyncio
from dataclasses import dataclass

4
5
import pytest

6
7
8
9
10
from vllm.entrypoints.openai.serving_chat import OpenAIServingChat

MODEL_NAME = "openai-community/gpt2"
CHAT_TEMPLATE = "Dummy chat template for testing {}"

11
12
pytestmark = pytest.mark.openai

13
14
15
16
17
18
19
20

@dataclass
class MockModelConfig:
    tokenizer = MODEL_NAME
    trust_remote_code = False
    tokenizer_mode = "auto"
    max_model_len = 100
    tokenizer_revision = None
21
    embedding_mode = False
22
23
24
25
26
27


@dataclass
class MockEngine:

    async def get_model_config(self):
28
        return MockModelConfig()
29
30
31


async def _async_serving_chat_init():
32
33
34
35
36
    engine = MockEngine()
    model_config = await engine.get_model_config()

    serving_completion = OpenAIServingChat(engine,
                                           model_config,
37
38
39
40
41
42
43
44
45
46
                                           served_model_names=[MODEL_NAME],
                                           response_role="assistant",
                                           chat_template=CHAT_TEMPLATE)
    return serving_completion


def test_async_serving_chat_init():
    serving_completion = asyncio.run(_async_serving_chat_init())
    assert serving_completion.tokenizer is not None
    assert serving_completion.tokenizer.chat_template == CHAT_TEMPLATE