openai_transcription_client.py 3.18 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
"""
This script demonstrates how to use the vLLM API server to perform audio
transcription with the `openai/whisper-large-v3` model.

Before running this script, you must start the vLLM server with the following command:

    vllm serve openai/whisper-large-v3

Requirements:
- vLLM with audio support
- openai Python SDK
- httpx for streaming support

The script performs:
1. Synchronous transcription using OpenAI-compatible API.
2. Streaming transcription using raw HTTP request to the vLLM server.
"""

21
22
23
24
import asyncio
import json

import httpx
25
26
27
28
from openai import OpenAI

from vllm.assets.audio import AudioAsset

29
30
mary_had_lamb = AudioAsset("mary_had_lamb").get_local_path()
winning_call = AudioAsset("winning_call").get_local_path()
31
32
33
34
35
36
37
38

# Modify OpenAI's API key and API base to use vLLM's API server.
openai_api_key = "EMPTY"
openai_api_base = "http://localhost:8000/v1"
client = OpenAI(
    api_key=openai_api_key,
    base_url=openai_api_base,
)
39
40
41


def sync_openai():
42
43
44
    """
    Perform synchronous transcription using OpenAI-compatible API.
    """
45
46
47
    with open(str(mary_had_lamb), "rb") as f:
        transcription = client.audio.transcriptions.create(
            file=f,
48
            model="openai/whisper-large-v3",
49
50
            language="en",
            response_format="json",
51
52
53
54
55
            temperature=0.0,
            # Additional sampling params not provided by OpenAI API.
            extra_body=dict(
                seed=4419,
                repetition_penalty=1.3,
56
57
            ),
        )
58
59
60
61
62
        print("transcription result:", transcription.text)


# OpenAI Transcription API client does not support streaming.
async def stream_openai_response():
63
64
65
    """
    Perform streaming transcription using vLLM's raw HTTP streaming API.
    """
66
67
    data = {
        "language": "en",
68
        "stream": True,
69
70
71
        "model": "openai/whisper-large-v3",
    }
    url = openai_api_base + "/audio/transcriptions"
72
    headers = {"Authorization": f"Bearer {openai_api_key}"}
73
    print("transcription result:", end=" ")
74
75
    async with httpx.AsyncClient() as client:
        with open(str(winning_call), "rb") as f:
76
77
78
            async with client.stream(
                "POST", url, files={"file": f}, data=data, headers=headers
            ) as response:
79
80
81
                async for line in response.aiter_lines():
                    # Each line is a JSON object prefixed with 'data: '
                    if line:
82
83
                        if line.startswith("data: "):
                            line = line[len("data: ") :]
84
                        # Last chunk, stream ends
85
                        if line.strip() == "[DONE]":
86
87
88
89
                            break
                        # Parse the JSON response
                        chunk = json.loads(line)
                        # Extract and print the content
90
91
                        content = chunk["choices"][0].get("delta", {}).get("content")
                        print(content, end="")
92
93
94
95
96
97
98
99
    print()  # Final newline after stream ends


def main():
    sync_openai()

    # Run the asynchronous function
    asyncio.run(stream_openai_response())
100
101


102
103
if __name__ == "__main__":
    main()