test_translation_validation.py 6.28 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import io
# imports for guided decoding tests
import json
from unittest.mock import patch

import librosa
import numpy as np
import pytest
import soundfile as sf
from openai._base_client import AsyncAPIClient

from vllm.assets.audio import AudioAsset

from ...utils import RemoteOpenAIServer


@pytest.fixture
def foscolo():
    # Test translation it->en
    path = AudioAsset('azacinto_foscolo').get_local_path()
    with open(str(path), "rb") as f:
        yield f


# NOTE: (NickLucche) the large-v3-turbo model was not trained on translation!
@pytest.mark.asyncio
async def test_basic_audio(foscolo):
    model_name = "openai/whisper-small"
    server_args = ["--enforce-eager"]
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        client = remote_server.get_async_client()
        translation = await client.audio.translations.create(
            model=model_name,
            file=foscolo,
            response_format="text",
            # TODO remove once language detection is implemented
            extra_body=dict(language="it"),
            temperature=0.0)
42
43
        out = json.loads(translation)['text'].strip().lower()
        assert "greek sea" in out
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75


@pytest.mark.asyncio
async def test_audio_prompt(foscolo):
    model_name = "openai/whisper-small"
    server_args = ["--enforce-eager"]
    # Condition whisper on starting text
    prompt = "Nor have I ever"
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        client = remote_server.get_async_client()
        transcription = await client.audio.translations.create(
            model=model_name,
            file=foscolo,
            prompt=prompt,
            extra_body=dict(language="it"),
            response_format="text",
            temperature=0.0)
        out = json.loads(transcription)['text']
        assert "Nor will I ever touch the sacred" not in out
        assert prompt not in out


@pytest.mark.asyncio
async def test_non_asr_model(foscolo):
    # text to text model
    model_name = "JackFram/llama-68m"
    server_args = ["--enforce-eager"]
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        client = remote_server.get_async_client()
        res = await client.audio.translations.create(model=model_name,
                                                     file=foscolo,
                                                     temperature=0.0)
76
77
78
        err = res.error
        assert err["code"] == 400 and not res.text
        assert err["message"] == "The model does not support Translations API"
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171


@pytest.mark.asyncio
async def test_streaming_response(foscolo):
    model_name = "openai/whisper-small"
    server_args = ["--enforce-eager"]
    translation = ""
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        client = remote_server.get_async_client()
        res_no_stream = await client.audio.translations.create(
            model=model_name,
            file=foscolo,
            response_format="json",
            extra_body=dict(language="it"),
            temperature=0.0)
        # Unfortunately this only works when the openai client is patched
        # to use streaming mode, not exposed in the translation api.
        original_post = AsyncAPIClient.post

        async def post_with_stream(*args, **kwargs):
            kwargs['stream'] = True
            return await original_post(*args, **kwargs)

        with patch.object(AsyncAPIClient, "post", new=post_with_stream):
            client = remote_server.get_async_client()
            res = await client.audio.translations.create(model=model_name,
                                                         file=foscolo,
                                                         temperature=0.0,
                                                         extra_body=dict(
                                                             stream=True,
                                                             language="it"))
            # Reconstruct from chunks and validate
            async for chunk in res:
                # just a chunk
                text = chunk.choices[0]['delta']['content']
                translation += text

        assert translation == res_no_stream.text


@pytest.mark.asyncio
async def test_stream_options(foscolo):
    model_name = "openai/whisper-small"
    server_args = ["--enforce-eager"]
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        original_post = AsyncAPIClient.post

        async def post_with_stream(*args, **kwargs):
            kwargs['stream'] = True
            return await original_post(*args, **kwargs)

        with patch.object(AsyncAPIClient, "post", new=post_with_stream):
            client = remote_server.get_async_client()
            res = await client.audio.translations.create(
                model=model_name,
                file=foscolo,
                temperature=0.0,
                extra_body=dict(language="it",
                                stream=True,
                                stream_include_usage=True,
                                stream_continuous_usage_stats=True))
            final = False
            continuous = True
            async for chunk in res:
                if not len(chunk.choices):
                    # final usage sent
                    final = True
                else:
                    continuous = continuous and hasattr(chunk, 'usage')
            assert final and continuous


@pytest.mark.asyncio
async def test_long_audio_request(foscolo):
    model_name = "openai/whisper-small"
    server_args = ["--enforce-eager"]

    foscolo.seek(0)
    audio, sr = librosa.load(foscolo)
    repeated_audio = np.tile(audio, 2)
    # Repeated audio to buffer
    buffer = io.BytesIO()
    sf.write(buffer, repeated_audio, sr, format='WAV')
    buffer.seek(0)
    with RemoteOpenAIServer(model_name, server_args) as remote_server:
        client = remote_server.get_async_client()
        translation = await client.audio.translations.create(
            model=model_name,
            file=buffer,
            extra_body=dict(language="it"),
            response_format="text",
            temperature=0.0)
        out = json.loads(translation)['text'].strip().lower()
172
        assert out.count("greek sea") == 2