"vllm/model_executor/models/nemotron_h.py" did not exist on "b411418ff090a168c85eab243b14b7350bf73db4"
serving_transcription.py 5.85 KB
Newer Older
1
# SPDX-License-Identifier: Apache-2.0
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
from collections.abc import AsyncGenerator
4
5
6
7
8

from fastapi import Request

from vllm.engine.protocol import EngineClient
from vllm.entrypoints.logger import RequestLogger
9
from vllm.entrypoints.openai.protocol import (
10
11
12
13
14
    ErrorResponse,
    RequestResponseMetadata,
    TranscriptionRequest,
    TranscriptionResponse,
    TranscriptionResponseStreamChoice,
15
    TranscriptionResponseVerbose,
16
17
18
19
    TranscriptionStreamResponse,
    TranslationRequest,
    TranslationResponse,
    TranslationResponseStreamChoice,
20
    TranslationResponseVerbose,
21
22
    TranslationStreamResponse,
)
23
from vllm.entrypoints.openai.serving_models import OpenAIServingModels
24
from vllm.entrypoints.openai.speech_to_text import OpenAISpeechToText
25
26
27
28
29
30
from vllm.logger import init_logger
from vllm.outputs import RequestOutput

logger = init_logger(__name__)


31
32
class OpenAIServingTranscription(OpenAISpeechToText):
    """Handles transcription requests."""
33
34
35
36
37
38

    def __init__(
        self,
        engine_client: EngineClient,
        models: OpenAIServingModels,
        *,
39
        request_logger: RequestLogger | None,
40
        return_tokens_as_token_ids: bool = False,
41
        log_error_stack: bool = False,
42
        enable_force_include_usage: bool = False,
43
    ):
44
45
46
47
48
49
50
        super().__init__(
            engine_client=engine_client,
            models=models,
            request_logger=request_logger,
            return_tokens_as_token_ids=return_tokens_as_token_ids,
            task_type="transcribe",
            log_error_stack=log_error_stack,
51
            enable_force_include_usage=enable_force_include_usage,
52
        )
53
54

    async def create_transcription(
55
        self, audio_data: bytes, request: TranscriptionRequest, raw_request: Request
56
57
58
59
60
61
    ) -> (
        TranscriptionResponse
        | TranscriptionResponseVerbose
        | AsyncGenerator[str, None]
        | ErrorResponse
    ):
62
63
64
65
66
        """Transcription API similar to OpenAI's API.

        See https://platform.openai.com/docs/api-reference/audio/createTranscription
        for the API specification. This API mimics the OpenAI transcription API.
        """
67
68
69
70
        return await self._create_speech_to_text(
            audio_data=audio_data,
            request=request,
            raw_request=raw_request,
71
72
73
74
75
            response_class=(
                TranscriptionResponseVerbose
                if request.response_format == "verbose_json"
                else TranscriptionResponse
            ),
76
77
            stream_generator_method=self.transcription_stream_generator,
        )
78
79

    async def transcription_stream_generator(
80
81
82
83
84
85
86
        self,
        request: TranscriptionRequest,
        result_generator: list[AsyncGenerator[RequestOutput, None]],
        request_id: str,
        request_metadata: RequestResponseMetadata,
        audio_duration_s: float,
    ) -> AsyncGenerator[str, None]:
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
        generator = self._speech_to_text_stream_generator(
            request=request,
            list_result_generator=result_generator,
            request_id=request_id,
            request_metadata=request_metadata,
            audio_duration_s=audio_duration_s,
            chunk_object_type="transcription.chunk",
            response_stream_choice_class=TranscriptionResponseStreamChoice,
            stream_response_class=TranscriptionStreamResponse,
        )
        async for chunk in generator:
            yield chunk


class OpenAIServingTranslation(OpenAISpeechToText):
    """Handles translation requests."""
103

104
105
106
107
108
    def __init__(
        self,
        engine_client: EngineClient,
        models: OpenAIServingModels,
        *,
109
        request_logger: RequestLogger | None,
110
        return_tokens_as_token_ids: bool = False,
111
        log_error_stack: bool = False,
112
        enable_force_include_usage: bool = False,
113
    ):
114
115
116
117
118
119
120
        super().__init__(
            engine_client=engine_client,
            models=models,
            request_logger=request_logger,
            return_tokens_as_token_ids=return_tokens_as_token_ids,
            task_type="translate",
            log_error_stack=log_error_stack,
121
            enable_force_include_usage=enable_force_include_usage,
122
        )
123

124
    async def create_translation(
125
        self, audio_data: bytes, request: TranslationRequest, raw_request: Request
126
127
128
129
130
131
    ) -> (
        TranslationResponse
        | TranslationResponseVerbose
        | AsyncGenerator[str, None]
        | ErrorResponse
    ):
132
        """Translation API similar to OpenAI's API.
133

134
135
        See https://platform.openai.com/docs/api-reference/audio/createTranslation
        for the API specification. This API mimics the OpenAI translation API.
136
        """
137
138
139
140
        return await self._create_speech_to_text(
            audio_data=audio_data,
            request=request,
            raw_request=raw_request,
141
142
143
144
145
            response_class=(
                TranslationResponseVerbose
                if request.response_format == "verbose_json"
                else TranslationResponse
            ),
146
147
148
149
            stream_generator_method=self.translation_stream_generator,
        )

    async def translation_stream_generator(
150
151
152
153
154
155
156
        self,
        request: TranslationRequest,
        result_generator: list[AsyncGenerator[RequestOutput, None]],
        request_id: str,
        request_metadata: RequestResponseMetadata,
        audio_duration_s: float,
    ) -> AsyncGenerator[str, None]:
157
158
159
160
161
162
163
164
165
166
167
168
        generator = self._speech_to_text_stream_generator(
            request=request,
            list_result_generator=result_generator,
            request_id=request_id,
            request_metadata=request_metadata,
            audio_duration_s=audio_duration_s,
            chunk_object_type="translation.chunk",
            response_stream_choice_class=TranslationResponseStreamChoice,
            stream_response_class=TranslationStreamResponse,
        )
        async for chunk in generator:
            yield chunk