test_serving_models.py 4.81 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3

4
5
6
7
8
9
from http import HTTPStatus
from unittest.mock import MagicMock

import pytest

from vllm.config import ModelConfig
10
from vllm.engine.protocol import EngineClient
11
12
13
14
15
16
from vllm.entrypoints.openai.protocol import (
    ErrorResponse,
    LoadLoRAAdapterRequest,
    UnloadLoRAAdapterRequest,
)
from vllm.entrypoints.openai.serving_models import BaseModelPath, OpenAIServingModels
17
from vllm.lora.request import LoRARequest
18

19
MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct"
20
BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)]
21
LORA_LOADING_SUCCESS_MESSAGE = "Success: LoRA adapter '{lora_name}' added successfully."
22
LORA_UNLOADING_SUCCESS_MESSAGE = (
23
24
    "Success: LoRA adapter '{lora_name}' removed successfully."
)
25
26


27
async def _async_serving_models_init() -> OpenAIServingModels:
28
    mock_model_config = MagicMock(spec=ModelConfig)
29
    mock_engine_client = MagicMock(spec=EngineClient)
30
31
32
    # Set the max_model_len attribute to avoid missing attribute
    mock_model_config.max_model_len = 2048

33
34
35
36
37
38
    serving_models = OpenAIServingModels(
        engine_client=mock_engine_client,
        base_model_paths=BASE_MODEL_PATHS,
        model_config=mock_model_config,
        lora_modules=None,
    )
39
    await serving_models.init_static_loras()
40
41

    return serving_models
42
43


44
45
@pytest.mark.asyncio
async def test_serving_model_name():
46
47
    serving_models = await _async_serving_models_init()
    assert serving_models.model_name(None) == MODEL_NAME
48
49
50
    request = LoRARequest(
        lora_name="adapter", lora_path="/path/to/adapter2", lora_int_id=1
    )
51
    assert serving_models.model_name(request) == request.lora_name
52
53


54
55
@pytest.mark.asyncio
async def test_load_lora_adapter_success():
56
    serving_models = await _async_serving_models_init()
57
    request = LoadLoRAAdapterRequest(lora_name="adapter", lora_path="/path/to/adapter2")
58
    response = await serving_models.load_lora_adapter(request)
59
    assert response == LORA_LOADING_SUCCESS_MESSAGE.format(lora_name="adapter")
60
    assert len(serving_models.lora_requests) == 1
61
62
    assert "adapter" in serving_models.lora_requests
    assert serving_models.lora_requests["adapter"].lora_name == "adapter"
63
64
65
66


@pytest.mark.asyncio
async def test_load_lora_adapter_missing_fields():
67
    serving_models = await _async_serving_models_init()
68
    request = LoadLoRAAdapterRequest(lora_name="", lora_path="")
69
    response = await serving_models.load_lora_adapter(request)
70
    assert isinstance(response, ErrorResponse)
71
72
    assert response.error.type == "InvalidUserInput"
    assert response.error.code == HTTPStatus.BAD_REQUEST
73
74
75
76


@pytest.mark.asyncio
async def test_load_lora_adapter_duplicate():
77
    serving_models = await _async_serving_models_init()
78
79
80
    request = LoadLoRAAdapterRequest(
        lora_name="adapter1", lora_path="/path/to/adapter1"
    )
81
    response = await serving_models.load_lora_adapter(request)
82
    assert response == LORA_LOADING_SUCCESS_MESSAGE.format(lora_name="adapter1")
83
    assert len(serving_models.lora_requests) == 1
84

85
86
87
    request = LoadLoRAAdapterRequest(
        lora_name="adapter1", lora_path="/path/to/adapter1"
    )
88
    response = await serving_models.load_lora_adapter(request)
89
    assert isinstance(response, ErrorResponse)
90
91
    assert response.error.type == "InvalidUserInput"
    assert response.error.code == HTTPStatus.BAD_REQUEST
92
    assert len(serving_models.lora_requests) == 1
93
94
95
96


@pytest.mark.asyncio
async def test_unload_lora_adapter_success():
97
    serving_models = await _async_serving_models_init()
98
99
100
    request = LoadLoRAAdapterRequest(
        lora_name="adapter1", lora_path="/path/to/adapter1"
    )
101
102
    response = await serving_models.load_lora_adapter(request)
    assert len(serving_models.lora_requests) == 1
103

104
    request = UnloadLoRAAdapterRequest(lora_name="adapter1")
105
    response = await serving_models.unload_lora_adapter(request)
106
    assert response == LORA_UNLOADING_SUCCESS_MESSAGE.format(lora_name="adapter1")
107
    assert len(serving_models.lora_requests) == 0
108
109
110
111


@pytest.mark.asyncio
async def test_unload_lora_adapter_missing_fields():
112
    serving_models = await _async_serving_models_init()
113
    request = UnloadLoRAAdapterRequest(lora_name="", lora_int_id=None)
114
    response = await serving_models.unload_lora_adapter(request)
115
    assert isinstance(response, ErrorResponse)
116
117
    assert response.error.type == "InvalidUserInput"
    assert response.error.code == HTTPStatus.BAD_REQUEST
118
119
120
121


@pytest.mark.asyncio
async def test_unload_lora_adapter_not_found():
122
    serving_models = await _async_serving_models_init()
123
    request = UnloadLoRAAdapterRequest(lora_name="nonexistent_adapter")
124
    response = await serving_models.unload_lora_adapter(request)
125
    assert isinstance(response, ErrorResponse)
126
127
    assert response.error.type == "NotFoundError"
    assert response.error.code == HTTPStatus.NOT_FOUND