Unverified Commit 314af861 authored by Nicolò Lucchesi's avatar Nicolò Lucchesi Committed by GitHub
Browse files

[Docs] Update transcriptions API to use openai client with `stream=True` (#20271)


Signed-off-by: default avatarNickLucche <nlucches@redhat.com>
parent 0e96cc9b
...@@ -19,10 +19,8 @@ The script performs: ...@@ -19,10 +19,8 @@ The script performs:
""" """
import asyncio import asyncio
import json
import httpx from openai import AsyncOpenAI, OpenAI
from openai import OpenAI
from vllm.assets.audio import AudioAsset from vllm.assets.audio import AudioAsset
...@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI): ...@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
print("transcription result:", transcription.text) print("transcription result:", transcription.text)
async def stream_openai_response(audio_path: str, base_url: str, api_key: str): async def stream_openai_response(audio_path: str, client: AsyncOpenAI):
""" """
Perform streaming transcription using vLLM's raw HTTP streaming API. Perform asynchronous transcription using OpenAI-compatible API.
""" """
data = { print("\ntranscription result:", end=" ")
"language": "en",
"stream": True,
"model": "openai/whisper-large-v3",
}
url = base_url + "/audio/transcriptions"
headers = {"Authorization": f"Bearer {api_key}"}
print("transcription result:", end=" ")
# OpenAI Transcription API client does not support streaming.
async with httpx.AsyncClient() as client:
with open(audio_path, "rb") as f: with open(audio_path, "rb") as f:
async with client.stream( transcription = await client.audio.transcriptions.create(
"POST", url, files={"file": f}, data=data, headers=headers file=f,
) as response: model="openai/whisper-large-v3",
async for line in response.aiter_lines(): language="en",
# Each line is a JSON object prefixed with 'data: ' response_format="json",
if line: temperature=0.0,
if line.startswith("data: "): # Additional sampling params not provided by OpenAI API.
line = line[len("data: ") :] extra_body=dict(
# Last chunk, stream ends seed=420,
if line.strip() == "[DONE]": top_p=0.6,
break ),
# Parse the JSON response stream=True,
chunk = json.loads(line) )
# Extract and print the content async for chunk in transcription:
content = chunk["choices"][0].get("delta", {}).get("content") if chunk.choices:
print(content, end="") content = chunk.choices[0].get("delta", {}).get("content")
print(content, end="", flush=True)
print() # Final newline after stream ends print() # Final newline after stream ends
...@@ -95,7 +86,11 @@ def main(): ...@@ -95,7 +86,11 @@ def main():
sync_openai(mary_had_lamb, client) sync_openai(mary_had_lamb, client)
# Run the asynchronous function # Run the asynchronous function
asyncio.run(stream_openai_response(winning_call, openai_api_base, openai_api_key)) client = AsyncOpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
asyncio.run(stream_openai_response(winning_call, client))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel): ...@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
timestamps incurs additional latency. timestamps incurs additional latency.
""" """
# --8<-- [start:transcription-extra-params]
stream: Optional[bool] = False stream: Optional[bool] = False
"""Custom field not present in the original OpenAI definition. When set, """When set, it will enable output to be streamed in a similar fashion
it will enable output to be streamed in a similar fashion as the Chat as the Chat Completion endpoint.
Completion endpoint.
""" """
# --8<-- [start:transcription-extra-params]
# Flattened stream option to simplify form data. # Flattened stream option to simplify form data.
stream_include_usage: Optional[bool] = False stream_include_usage: Optional[bool] = False
stream_continuous_usage_stats: Optional[bool] = False stream_continuous_usage_stats: Optional[bool] = False
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment