openai_transcription_client.py 2.34 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
3
4
5
import asyncio
import json

import httpx
6
7
8
9
from openai import OpenAI

from vllm.assets.audio import AudioAsset

10
11
mary_had_lamb = AudioAsset("mary_had_lamb").get_local_path()
winning_call = AudioAsset("winning_call").get_local_path()
12
13
14
15
16
17
18
19

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
20
21
22
23
24
25


def sync_openai():
    with open(str(mary_had_lamb), "rb") as f:
        transcription = client.audio.transcriptions.create(
            file=f,
26
            model="openai/whisper-large-v3",
27
28
            language="en",
            response_format="json",
29
30
31
32
33
            temperature=0.0,
            # Additional sampling params not provided by OpenAI API.
            extra_body=dict(
                seed=4419,
                repetition_penalty=1.3,
34
35
            ),
        )
36
37
38
39
40
41
42
43
44
45
        print("transcription result:", transcription.text)


sync_openai()


# OpenAI Transcription API client does not support streaming.
async def stream_openai_response():
    data = {
        "language": "en",
46
        "stream": True,
47
48
49
        "model": "openai/whisper-large-v3",
    }
    url = openai_api_base + "/audio/transcriptions"
50
    headers = {"Authorization": f"Bearer {openai_api_key}"}
51
    print("transcription result:", end=" ")
52
53
    async with httpx.AsyncClient() as client:
        with open(str(winning_call), "rb") as f:
54
55
56
            async with client.stream(
                "POST", url, files={"file": f}, data=data, headers=headers
            ) as response:
57
58
59
                async for line in response.aiter_lines():
                    # Each line is a JSON object prefixed with 'data: '
                    if line:
60
61
                        if line.startswith("data: "):
                            line = line[len("data: ") :]
62
                        # Last chunk, stream ends
63
                        if line.strip() == "[DONE]":
64
65
66
67
                            break
                        # Parse the JSON response
                        chunk = json.loads(line)
                        # Extract and print the content
68
69
                        content = chunk["choices"][0].get("delta", {}).get("content")
                        print(content, end="")
70
71
72
73


# Run the asynchronous function
asyncio.run(stream_openai_response())