test_online_dimensions.py 4.06 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6
7
8
9
"""
Run `pytest tests/entrypoints/openai/test_embedding_dimensions.py`.
"""

import openai
import pytest

10
from tests.conftest import HfRunner
11
from tests.models.language.pooling.embed_utils import run_embedding_correctness_test
12
13
from tests.models.utils import EmbedModelInfo
from tests.utils import RemoteOpenAIServer
14
from vllm.entrypoints.pooling.embed.protocol import EmbeddingResponse
15
16
from vllm.platforms import current_platform

17
MODELS = [
18
    EmbedModelInfo("intfloat/multilingual-e5-small", is_matryoshka=False),
19
20
21
22
23
    EmbedModelInfo(
        "Snowflake/snowflake-arctic-embed-m-v1.5",
        is_matryoshka=True,
        matryoshka_dimensions=[256],
    ),
24
25
26
27
]

input_texts = [
    "The chef prepared a delicious meal.",
28
]
29
30


31
32
33
34
35
36
37
38
39
40
41
42
@pytest.fixture(scope="module", params=MODELS)
def model_info(request):
    return request.param


@pytest.fixture(scope="module", params=["bfloat16"])
def dtype(request):
    return request.param


@pytest.fixture(scope="module")
def server(model_info, dtype: str):
43
    args = [
44
45
        "--runner",
        "pooling",
46
47
        # use half precision for speed and memory savings in CI environment
        "--dtype",
48
        dtype,
49
50
        "--enforce-eager",
        "--max-model-len",
51
        "512",
52
53
    ]

54
55
    if model_info.name == "Snowflake/snowflake-arctic-embed-m-v1.5":
        # Manually enable Matryoshka Embeddings
56
57
58
        args.extend(
            ["--trust_remote_code", "--hf_overrides", '{"matryoshka_dimensions":[256]}']
        )
59

60
61
62
63
    # ROCm: Use Flex Attention to support encoder-only self-attention.
    if current_platform.is_rocm():
        args.extend(["--attention-backend", "FLEX_ATTENTION"])

64
65
66
67
68
69
    with RemoteOpenAIServer(model_info.name, args) as remote_server:
        yield remote_server


@pytest.fixture(scope="module")
def hf_model(hf_runner, model_info, dtype: str):
70
71
72
    with hf_runner(
        model_info.name, dtype=dtype, is_sentence_transformer=True
    ) as hf_model:
73
74
75
76
        yield hf_model


@pytest.mark.asyncio
77
78
79
async def test_matryoshka(
    model_info: EmbedModelInfo, server: RemoteOpenAIServer, hf_model: HfRunner
):
80
81
82
83
84
85
86
87
88
89
90
91
    client = server.get_async_client()

    async def make_request_and_correctness_test(dimensions):
        prompts = input_texts * 3

        embedding_response = await client.embeddings.create(
            model=model_info.name,
            input=prompts,
            dimensions=dimensions,
            encoding_format="float",
        )
        embeddings = EmbeddingResponse.model_validate(
92
93
            embedding_response.model_dump(mode="json")
        )
94
95
96
97
98
99
100
101
102
103
104
105

        assert embeddings.id is not None
        assert len(embeddings.data) == 3
        assert len(embeddings.data[0].embedding) > 0
        assert embeddings.usage.completion_tokens == 0
        assert embeddings.usage.prompt_tokens > 0
        assert embeddings.usage.total_tokens > 0

        if dimensions is not None:
            assert len(embeddings.data[0].embedding) == dimensions

        vllm_outputs = [d.embedding for d in embeddings.data]
106
        run_embedding_correctness_test(hf_model, prompts, vllm_outputs, dimensions)
107
108

    if model_info.is_matryoshka:
109
        valid_dimensions: list[int | None] = [None]
110
111
112
113
114
115
        if model_info.matryoshka_dimensions is not None:
            valid_dimensions += model_info.matryoshka_dimensions[:2]

        for dimensions in valid_dimensions:
            await make_request_and_correctness_test(dimensions)

116
        invalid_dimensions: list[int | None] = [-1]
117
118
119
120
121
        if model_info.matryoshka_dimensions is not None:
            assert 5 not in model_info.matryoshka_dimensions
            invalid_dimensions.append(5)

        for dimensions in invalid_dimensions:
122
            with pytest.raises(openai.BadRequestError):
123
                await make_request_and_correctness_test(dimensions)
124

125
126
127
    else:
        for dimensions in [None]:
            await make_request_and_correctness_test(dimensions)
128

129
        for dimensions in [-1, 16]:
130
            with pytest.raises(openai.BadRequestError):
131
                await make_request_and_correctness_test(dimensions)