Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
314af861
Unverified
Commit
314af861
authored
Jul 01, 2025
by
Nicolò Lucchesi
Committed by
GitHub
Jul 01, 2025
Browse files
[Docs] Update transcriptions API to use openai client with `stream=True` (#20271)
Signed-off-by:
NickLucche
<
nlucches@redhat.com
>
parent
0e96cc9b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
37 deletions
+31
-37
examples/online_serving/openai_transcription_client.py
examples/online_serving/openai_transcription_client.py
+28
-33
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+3
-4
No files found.
examples/online_serving/openai_transcription_client.py
View file @
314af861
...
@@ -19,10 +19,8 @@ The script performs:
...
@@ -19,10 +19,8 @@ The script performs:
"""
"""
import
asyncio
import
asyncio
import
json
import
httpx
from
openai
import
AsyncOpenAI
,
OpenAI
from
openai
import
OpenAI
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
...
@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
...
@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
print
(
"transcription result:"
,
transcription
.
text
)
print
(
"transcription result:"
,
transcription
.
text
)
async
def
stream_openai_response
(
audio_path
:
str
,
base_url
:
str
,
api_key
:
str
):
async
def
stream_openai_response
(
audio_path
:
str
,
client
:
AsyncOpenAI
):
"""
"""
Perform
streaming
transcription using
vLLM's raw HTTP streaming
API.
Perform
asynchronous
transcription using
OpenAI-compatible
API.
"""
"""
data
=
{
print
(
"
\n
transcription result:"
,
end
=
" "
)
"language"
:
"en"
,
"stream"
:
True
,
"model"
:
"openai/whisper-large-v3"
,
}
url
=
base_url
+
"/audio/transcriptions"
headers
=
{
"Authorization"
:
f
"Bearer
{
api_key
}
"
}
print
(
"transcription result:"
,
end
=
" "
)
# OpenAI Transcription API client does not support streaming.
async
with
httpx
.
AsyncClient
()
as
client
:
with
open
(
audio_path
,
"rb"
)
as
f
:
with
open
(
audio_path
,
"rb"
)
as
f
:
async
with
client
.
stream
(
transcription
=
await
client
.
audio
.
transcriptions
.
create
(
"POST"
,
url
,
files
=
{
"file"
:
f
},
data
=
data
,
headers
=
headers
file
=
f
,
)
as
response
:
model
=
"openai/whisper-large-v3"
,
async
for
line
in
response
.
aiter_lines
():
language
=
"en"
,
# Each line is a JSON object prefixed with 'data: '
response_format
=
"json"
,
if
line
:
temperature
=
0.0
,
if
line
.
startswith
(
"data: "
):
# Additional sampling params not provided by OpenAI API.
line
=
line
[
len
(
"data: "
)
:]
extra_body
=
dict
(
# Last chunk, stream ends
seed
=
420
,
if
line
.
strip
()
==
"[DONE]"
:
top_p
=
0.6
,
break
),
# Parse the JSON response
stream
=
True
,
chunk
=
json
.
loads
(
line
)
)
# Extract and print the content
async
for
chunk
in
transcription
:
content
=
chunk
[
"choices"
][
0
].
get
(
"delta"
,
{}).
get
(
"content"
)
if
chunk
.
choices
:
print
(
content
,
end
=
""
)
content
=
chunk
.
choices
[
0
].
get
(
"delta"
,
{}).
get
(
"content"
)
print
(
content
,
end
=
""
,
flush
=
True
)
print
()
# Final newline after stream ends
print
()
# Final newline after stream ends
...
@@ -95,7 +86,11 @@ def main():
...
@@ -95,7 +86,11 @@ def main():
sync_openai
(
mary_had_lamb
,
client
)
sync_openai
(
mary_had_lamb
,
client
)
# Run the asynchronous function
# Run the asynchronous function
asyncio
.
run
(
stream_openai_response
(
winning_call
,
openai_api_base
,
openai_api_key
))
client
=
AsyncOpenAI
(
api_key
=
openai_api_key
,
base_url
=
openai_api_base
,
)
asyncio
.
run
(
stream_openai_response
(
winning_call
,
client
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
vllm/entrypoints/openai/protocol.py
View file @
314af861
...
@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
...
@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
timestamps incurs additional latency.
timestamps incurs additional latency.
"""
"""
# --8<-- [start:transcription-extra-params]
stream
:
Optional
[
bool
]
=
False
stream
:
Optional
[
bool
]
=
False
"""Custom field not present in the original OpenAI definition. When set,
"""When set, it will enable output to be streamed in a similar fashion
it will enable output to be streamed in a similar fashion as the Chat
as the Chat Completion endpoint.
Completion endpoint.
"""
"""
# --8<-- [start:transcription-extra-params]
# Flattened stream option to simplify form data.
# Flattened stream option to simplify form data.
stream_include_usage
:
Optional
[
bool
]
=
False
stream_include_usage
:
Optional
[
bool
]
=
False
stream_continuous_usage_stats
:
Optional
[
bool
]
=
False
stream_continuous_usage_stats
:
Optional
[
bool
]
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment