Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
314af861
Unverified
Commit
314af861
authored
Jul 01, 2025
by
Nicolò Lucchesi
Committed by
GitHub
Jul 01, 2025
Browse files
[Docs] Update transcriptions API to use openai client with `stream=True` (#20271)
Signed-off-by:
NickLucche
<
nlucches@redhat.com
>
parent
0e96cc9b
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
31 additions
and
37 deletions
+31
-37
examples/online_serving/openai_transcription_client.py
examples/online_serving/openai_transcription_client.py
+28
-33
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/protocol.py
+3
-4
No files found.
examples/online_serving/openai_transcription_client.py
View file @
314af861
...
@@ -19,10 +19,8 @@ The script performs:
...
@@ -19,10 +19,8 @@ The script performs:
"""
"""
import
asyncio
import
asyncio
import
json
import
httpx
from
openai
import
AsyncOpenAI
,
OpenAI
from
openai
import
OpenAI
from
vllm.assets.audio
import
AudioAsset
from
vllm.assets.audio
import
AudioAsset
...
@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
...
@@ -47,37 +45,30 @@ def sync_openai(audio_path: str, client: OpenAI):
print
(
"transcription result:"
,
transcription
.
text
)
print
(
"transcription result:"
,
transcription
.
text
)
async
def
stream_openai_response
(
audio_path
:
str
,
base_url
:
str
,
api_key
:
str
):
async
def
stream_openai_response
(
audio_path
:
str
,
client
:
AsyncOpenAI
):
"""
"""
Perform
streaming
transcription using
vLLM's raw HTTP streaming
API.
Perform
asynchronous
transcription using
OpenAI-compatible
API.
"""
"""
data
=
{
print
(
"
\n
transcription result:"
,
end
=
" "
)
"language"
:
"en"
,
with
open
(
audio_path
,
"rb"
)
as
f
:
"stream"
:
True
,
transcription
=
await
client
.
audio
.
transcriptions
.
create
(
"model"
:
"openai/whisper-large-v3"
,
file
=
f
,
}
model
=
"openai/whisper-large-v3"
,
url
=
base_url
+
"/audio/transcriptions"
language
=
"en"
,
headers
=
{
"Authorization"
:
f
"Bearer
{
api_key
}
"
}
response_format
=
"json"
,
print
(
"transcription result:"
,
end
=
" "
)
temperature
=
0.0
,
# OpenAI Transcription API client does not support streaming.
# Additional sampling params not provided by OpenAI API.
async
with
httpx
.
AsyncClient
()
as
client
:
extra_body
=
dict
(
with
open
(
audio_path
,
"rb"
)
as
f
:
seed
=
420
,
async
with
client
.
stream
(
top_p
=
0.6
,
"POST"
,
url
,
files
=
{
"file"
:
f
},
data
=
data
,
headers
=
headers
),
)
as
response
:
stream
=
True
,
async
for
line
in
response
.
aiter_lines
():
)
# Each line is a JSON object prefixed with 'data: '
async
for
chunk
in
transcription
:
if
line
:
if
chunk
.
choices
:
if
line
.
startswith
(
"data: "
):
content
=
chunk
.
choices
[
0
].
get
(
"delta"
,
{}).
get
(
"content"
)
line
=
line
[
len
(
"data: "
)
:]
print
(
content
,
end
=
""
,
flush
=
True
)
# Last chunk, stream ends
if
line
.
strip
()
==
"[DONE]"
:
break
# Parse the JSON response
chunk
=
json
.
loads
(
line
)
# Extract and print the content
content
=
chunk
[
"choices"
][
0
].
get
(
"delta"
,
{}).
get
(
"content"
)
print
(
content
,
end
=
""
)
print
()
# Final newline after stream ends
print
()
# Final newline after stream ends
...
@@ -95,7 +86,11 @@ def main():
...
@@ -95,7 +86,11 @@ def main():
sync_openai
(
mary_had_lamb
,
client
)
sync_openai
(
mary_had_lamb
,
client
)
# Run the asynchronous function
# Run the asynchronous function
asyncio
.
run
(
stream_openai_response
(
winning_call
,
openai_api_base
,
openai_api_key
))
client
=
AsyncOpenAI
(
api_key
=
openai_api_key
,
base_url
=
openai_api_base
,
)
asyncio
.
run
(
stream_openai_response
(
winning_call
,
client
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
vllm/entrypoints/openai/protocol.py
View file @
314af861
...
@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
...
@@ -1750,12 +1750,11 @@ class TranscriptionRequest(OpenAIBaseModel):
timestamps incurs additional latency.
timestamps incurs additional latency.
"""
"""
# --8<-- [start:transcription-extra-params]
stream
:
Optional
[
bool
]
=
False
stream
:
Optional
[
bool
]
=
False
"""Custom field not present in the original OpenAI definition. When set,
"""When set, it will enable output to be streamed in a similar fashion
it will enable output to be streamed in a similar fashion as the Chat
as the Chat Completion endpoint.
Completion endpoint.
"""
"""
# --8<-- [start:transcription-extra-params]
# Flattened stream option to simplify form data.
# Flattened stream option to simplify form data.
stream_include_usage
:
Optional
[
bool
]
=
False
stream_include_usage
:
Optional
[
bool
]
=
False
stream_continuous_usage_stats
:
Optional
[
bool
]
=
False
stream_continuous_usage_stats
:
Optional
[
bool
]
=
False
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment