Unverified Commit ebd5a77b authored by Guillaume Calmettes's avatar Guillaume Calmettes Committed by GitHub
Browse files

feat: add usage to TranscriptionResponse (text and json response_format) (#23576)


Signed-off-by: default avatarGuillaume Calmettes <gcalmettes@scaleway.com>
parent 384dd1b0
...@@ -69,8 +69,11 @@ async def test_basic_audio(mary_had_lamb, model_name): ...@@ -69,8 +69,11 @@ async def test_basic_audio(mary_had_lamb, model_name):
language="en", language="en",
response_format="text", response_format="text",
temperature=0.0) temperature=0.0)
out = json.loads(transcription)['text'] out = json.loads(transcription)
assert "Mary had a little lamb," in out out_text = out['text']
out_usage = out['usage']
assert "Mary had a little lamb," in out_text
assert out_usage["seconds"] == 16, out_usage["seconds"]
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -116,9 +119,12 @@ async def test_long_audio_request(mary_had_lamb, client): ...@@ -116,9 +119,12 @@ async def test_long_audio_request(mary_had_lamb, client):
language="en", language="en",
response_format="text", response_format="text",
temperature=0.0) temperature=0.0)
out = json.loads(transcription)['text'] out = json.loads(transcription)
counts = out.count("Mary had a little lamb") out_text = out['text']
out_usage = out['usage']
counts = out_text.count("Mary had a little lamb")
assert counts == 10, counts assert counts == 10, counts
assert out_usage["seconds"] == 161, out_usage["seconds"]
@pytest.mark.asyncio @pytest.mark.asyncio
......
...@@ -2232,9 +2232,15 @@ class TranscriptionRequest(OpenAIBaseModel): ...@@ -2232,9 +2232,15 @@ class TranscriptionRequest(OpenAIBaseModel):
# Transcription response objects # Transcription response objects
class TranscriptionUsageAudio(OpenAIBaseModel):
type: Literal["duration"] = "duration"
seconds: int
class TranscriptionResponse(OpenAIBaseModel): class TranscriptionResponse(OpenAIBaseModel):
text: str text: str
"""The transcribed text.""" """The transcribed text."""
usage: TranscriptionUsageAudio
class TranscriptionWord(OpenAIBaseModel): class TranscriptionWord(OpenAIBaseModel):
......
...@@ -200,7 +200,22 @@ class OpenAISpeechToText(OpenAIServing): ...@@ -200,7 +200,22 @@ class OpenAISpeechToText(OpenAIServing):
for result_generator in list_result_generator: for result_generator in list_result_generator:
async for op in result_generator: async for op in result_generator:
text += op.outputs[0].text text += op.outputs[0].text
return cast(T, response_class(text=text))
if self.task_type == "transcribe":
# add usage in TranscriptionResponse.
usage = {
"type": "duration",
# rounded up as per openAI specs
"seconds": int(math.ceil(duration_s)),
}
final_response = cast(T, response_class(text=text,
usage=usage))
else:
# no usage in response for translation task
final_response = cast(
T, response_class(text=text)) # type: ignore[call-arg]
return final_response
except asyncio.CancelledError: except asyncio.CancelledError:
return self.create_error_response("Client disconnected") return self.create_error_response("Client disconnected")
except ValueError as e: except ValueError as e:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment