test_translation_validation.py 6.61 KB
Newer Older
1
2
3
4
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import io
5
# imports for structured outputs tests
6
7
import json

8
import httpx
9
10
11
import librosa
import numpy as np
import pytest
12
import pytest_asyncio
13
14
15
16
import soundfile as sf

from ...utils import RemoteOpenAIServer

17
18
SERVER_ARGS = ["--enforce-eager"]

19

20
21
22
23
24
25
@pytest.fixture(scope="module",
                params=["openai/whisper-small", "google/gemma-3n-E2B-it"])
def server(request):
    # Parametrize over model name
    with RemoteOpenAIServer(request.param, SERVER_ARGS) as remote_server:
        yield remote_server, request.param
26
27


28
@pytest_asyncio.fixture
29
30
async def client_and_model(server):
    server, model_name = server
31
    async with server.get_async_client() as async_client:
32
        yield async_client, model_name
33
34
35
36
37
38


@pytest.mark.asyncio
async def test_non_asr_model(foscolo):
    # text to text model
    model_name = "JackFram/llama-68m"
39
    with RemoteOpenAIServer(model_name, SERVER_ARGS) as remote_server:
40
41
42
43
        client = remote_server.get_async_client()
        res = await client.audio.translations.create(model=model_name,
                                                     file=foscolo,
                                                     temperature=0.0)
44
45
46
        err = res.error
        assert err["code"] == 400 and not res.text
        assert err["message"] == "The model does not support Translations API"
47
48


49
50
# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
@pytest.mark.asyncio
51
52
async def test_basic_audio(foscolo, client_and_model):
    client, model_name = client_and_model
53
    translation = await client.audio.translations.create(
54
        model=model_name,
55
56
        file=foscolo,
        response_format="text",
57
58
        # TODO remove `language="it"` once language detection is implemented
        extra_body=dict(language="it", to_language="en"),
59
60
61
62
63
64
        temperature=0.0)
    out = json.loads(translation)['text'].strip().lower()
    assert "greek sea" in out


@pytest.mark.asyncio
65
66
async def test_audio_prompt(foscolo, client_and_model):
    client, model_name = client_and_model
67
68
69
    # Condition whisper on starting text
    prompt = "Nor have I ever"
    transcription = await client.audio.translations.create(
70
        model=model_name,
71
72
        file=foscolo,
        prompt=prompt,
73
        extra_body=dict(language="it", to_language="en"),
74
75
76
77
78
79
80
        response_format="text",
        temperature=0.0)
    out = json.loads(transcription)['text']
    assert "Nor will I ever touch the sacred" not in out
    assert prompt not in out


81
@pytest.mark.asyncio
82
83
async def test_streaming_response(foscolo, client_and_model, server):
    client, model_name = client_and_model
84
    translation = ""
85
    res_no_stream = await client.audio.translations.create(
86
        model=model_name,
87
88
        file=foscolo,
        response_format="json",
89
        extra_body=dict(language="it", to_language="en", seed=42),
90
        temperature=0.0)
91

92
    # Stream via HTTPX since OpenAI translation client doesn't expose streaming
93
    server, model_name = server
94
95
96
    url = server.url_for("v1/audio/translations")
    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
    data = {
97
        "model": model_name,
98
        "language": "it",
99
        "to_language": "en",
100
101
        "stream": True,
        "temperature": 0.0,
102
        "seed": 42,
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
    }
    foscolo.seek(0)
    async with httpx.AsyncClient() as http_client:
        files = {"file": foscolo}
        async with http_client.stream("POST",
                                      url,
                                      headers=headers,
                                      data=data,
                                      files=files) as response:
            async for line in response.aiter_lines():
                if not line:
                    continue
                if line.startswith("data: "):
                    line = line[len("data: "):]
                if line.strip() == "[DONE]":
                    break
                chunk = json.loads(line)
                text = chunk["choices"][0].get("delta", {}).get("content")
                translation += text or ""

123
124
125
126
127
128
129
    res_stream = translation.split()
    # NOTE There's a small non-deterministic issue here, likely in the attn
    # computation, which will cause a few tokens to be different, while still
    # being very close semantically.
    assert sum([
        x == y for x, y in zip(res_stream, res_no_stream.text.split())
    ]) >= len(res_stream) * 0.9
130
131
132


@pytest.mark.asyncio
133
134
async def test_stream_options(foscolo, server):
    server, model_name = server
135
136
137
    url = server.url_for("v1/audio/translations")
    headers = {"Authorization": f"Bearer {server.DUMMY_API_KEY}"}
    data = {
138
        "model": model_name,
139
        "language": "it",
140
        "to_language": "en",
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
        "stream": True,
        "stream_include_usage": True,
        "stream_continuous_usage_stats": True,
        "temperature": 0.0,
    }
    foscolo.seek(0)
    final = False
    continuous = True
    async with httpx.AsyncClient() as http_client:
        files = {"file": foscolo}
        async with http_client.stream("POST",
                                      url,
                                      headers=headers,
                                      data=data,
                                      files=files) as response:
            async for line in response.aiter_lines():
                if not line:
                    continue
                if line.startswith("data: "):
                    line = line[len("data: "):]
                if line.strip() == "[DONE]":
                    break
                chunk = json.loads(line)
                choices = chunk.get("choices", [])
                if not choices:
166
167
168
                    # final usage sent
                    final = True
                else:
169
170
                    continuous = continuous and ("usage" in chunk)
    assert final and continuous
171
172
173


@pytest.mark.asyncio
174
175
176
177
async def test_long_audio_request(foscolo, client_and_model):
    client, model_name = client_and_model
    if model_name == "google/gemma-3n-E2B-it":
        pytest.skip("Gemma3n does not support long audio requests")
178
179
180
181
182
183
184
    foscolo.seek(0)
    audio, sr = librosa.load(foscolo)
    repeated_audio = np.tile(audio, 2)
    # Repeated audio to buffer
    buffer = io.BytesIO()
    sf.write(buffer, repeated_audio, sr, format='WAV')
    buffer.seek(0)
185
    translation = await client.audio.translations.create(
186
        model=model_name,
187
        file=buffer,
188
        extra_body=dict(language="it", to_language="en"),
189
190
191
192
        response_format="text",
        temperature=0.0)
    out = json.loads(translation)['text'].strip().lower()
    assert out.count("greek sea") == 2