test_translation_validation.py 6.3 KB
Newer Older
1
2
3
4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import io
5

6
# imports for structured outputs tests
7
8
import json

9
import httpx
10
11
12
import librosa
import numpy as np
import pytest
13
import pytest_asyncio
14
15
16
17
import soundfile as sf

from ...utils import RemoteOpenAIServer

18
19
SERVER_ARGS = ["--enforce-eager"]

20

21
22
23
@pytest.fixture(
    scope="module", params=["openai/whisper-small", "google/gemma-3n-E2B-it"]
)
24
25
26
27
def server(request):
    # Parametrize over model name
    with RemoteOpenAIServer(request.param, SERVER_ARGS) as remote_server:
        yield remote_server, request.param
28
29


30
@pytest_asyncio.fixture
31
32
async def client_and_model(server):
    server, model_name = server
33
    async with server.get_async_client() as async_client:
34
        yield async_client, model_name
35
36
37
38
39
40


@pytest.mark.asyncio
async def test_non_asr_model(foscolo):
    # text to text model
    model_name = "JackFram/llama-68m"
41
    with RemoteOpenAIServer(model_name, SERVER_ARGS) as remote_server:
42
        client = remote_server.get_async_client()
43
44
45
        res = await client.audio.translations.create(
            model=model_name, file=foscolo, temperature=0.0
        )
46
47
48
        err = res.error
        assert err["code"] == 400 and not res.text
        assert err["message"] == "The model does not support Translations API"
49
50


51
52
# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
@pytest.mark.asyncio
53
54
async def test_basic_audio(foscolo, client_and_model):
    client, model_name = client_and_model
55
    translation = await client.audio.translations.create(
56
        model=model_name,
57
58
        file=foscolo,
        response_format="text",
59
60
        # TODO remove `language="it"` once language detection is implemented
        extra_body=dict(language="it", to_language="en"),
61
62
63
        temperature=0.0,
    )
    out = json.loads(translation)["text"].strip().lower()
64
65
66
67
    assert "greek sea" in out


@pytest.mark.asyncio
68
69
async def test_audio_prompt(foscolo, client_and_model):
    client, model_name = client_and_model
70
71
72
    # Condition whisper on starting text
    prompt = "Nor have I ever"
    transcription = await client.audio.translations.create(
73
        model=model_name,
74
75
        file=foscolo,
        prompt=prompt,
76
        extra_body=dict(language="it", to_language="en"),
77
        response_format="text",
78
79
80
        temperature=0.0,
    )
    out = json.loads(transcription)["text"]
81
82
83
84
    assert "Nor will I ever touch the sacred" not in out
    assert prompt not in out


85
@pytest.mark.asyncio
86
87
async def test_streaming_response(foscolo, client_and_model, server):
    client, model_name = client_and_model
88
    translation = ""
89
    res_no_stream = await client.audio.translations.create(
90
        model=model_name,
91
92
        file=foscolo,
        response_format="json",
93
        extra_body=dict(language="it", to_language="en", seed=42),
94
95
        temperature=0.0,
    )
96

97
    # Stream via HTTPX since OpenAI translation client doesn't expose streaming
98
    server, model_name = server
99
100
101
    url = server.url_for("v1/audio/translations")
    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
    data = {
102
        "model": model_name,
103
        "language": "it",
104
        "to_language": "en",
105
106
        "stream": True,
        "temperature": 0.0,
107
        "seed": 42,
108
109
110
111
    }
    foscolo.seek(0)
    async with httpx.AsyncClient() as http_client:
        files = {"file": foscolo}
112
113
114
        async with http_client.stream(
            "POST", url, headers=headers, data=data, files=files
        ) as response:
115
116
117
118
            async for line in response.aiter_lines():
                if not line:
                    continue
                if line.startswith("data: "):
119
                    line = line[len("data: ") :]
120
121
122
123
124
125
                if line.strip() == "[DONE]":
                    break
                chunk = json.loads(line)
                text = chunk["choices"][0].get("delta", {}).get("content")
                translation += text or ""

126
127
128
129
    res_stream = translation.split()
    # NOTE There's a small non-deterministic issue here, likely in the attn
    # computation, which will cause a few tokens to be different, while still
    # being very close semantically.
130
131
132
133
    assert (
        sum([x == y for x, y in zip(res_stream, res_no_stream.text.split())])
        >= len(res_stream) * 0.9
    )
134
135
136


@pytest.mark.asyncio
137
138
async def test_stream_options(foscolo, server):
    server, model_name = server
139
140
141
    url = server.url_for("v1/audio/translations")
    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
    data = {
142
        "model": model_name,
143
        "language": "it",
144
        "to_language": "en",
145
146
147
148
149
150
151
152
153
154
        "stream": True,
        "stream_include_usage": True,
        "stream_continuous_usage_stats": True,
        "temperature": 0.0,
    }
    foscolo.seek(0)
    final = False
    continuous = True
    async with httpx.AsyncClient() as http_client:
        files = {"file": foscolo}
155
156
157
        async with http_client.stream(
            "POST", url, headers=headers, data=data, files=files
        ) as response:
158
159
160
161
            async for line in response.aiter_lines():
                if not line:
                    continue
                if line.startswith("data: "):
162
                    line = line[len("data: ") :]
163
164
165
166
167
                if line.strip() == "[DONE]":
                    break
                chunk = json.loads(line)
                choices = chunk.get("choices", [])
                if not choices:
168
169
170
                    # final usage sent
                    final = True
                else:
171
172
                    continuous = continuous and ("usage" in chunk)
    assert final and continuous
173
174
175


@pytest.mark.asyncio
176
177
178
179
async def test_long_audio_request(foscolo, client_and_model):
    client, model_name = client_and_model
    if model_name == "google/gemma-3n-E2B-it":
        pytest.skip("Gemma3n does not support long audio requests")
180
181
182
183
184
    foscolo.seek(0)
    audio, sr = librosa.load(foscolo)
    repeated_audio = np.tile(audio, 2)
    # Repeated audio to buffer
    buffer = io.BytesIO()
185
    sf.write(buffer, repeated_audio, sr, format="WAV")
186
    buffer.seek(0)
187
    translation = await client.audio.translations.create(
188
        model=model_name,
189
        file=buffer,
190
        extra_body=dict(language="it", to_language="en"),
191
        response_format="text",
192
193
194
        temperature=0.0,
    )
    out = json.loads(translation)["text"].strip().lower()
195
    assert out.count("greek sea") == 2