main.py 4.98 KB
Newer Older
1
import os
2
import logging
Timothy J. Baek's avatar
Timothy J. Baek committed
3
4
5
6
7
8
9
10
11
12
from fastapi import (
    FastAPI,
    Request,
    Depends,
    HTTPException,
    status,
    UploadFile,
    File,
    Form,
)
Timothy J. Baek's avatar
Timothy J. Baek committed
13
14
15

from fastapi.responses import StreamingResponse, JSONResponse, FileResponse

Timothy J. Baek's avatar
Timothy J. Baek committed
16
17
18
from fastapi.middleware.cors import CORSMiddleware
from faster_whisper import WhisperModel

Timothy J. Baek's avatar
Timothy J. Baek committed
19
20
21
22
23
24
import requests
import hashlib
from pathlib import Path
import json


Timothy J. Baek's avatar
Timothy J. Baek committed
25
26
27
28
29
30
31
32
33
from constants import ERROR_MESSAGES
from utils.utils import (
    decode_token,
    get_current_user,
    get_verified_user,
    get_admin_user,
)
from utils.misc import calculate_sha256

Timothy J. Baek's avatar
Timothy J. Baek committed
34
35
36
37
38
39
from config import (
    SRC_LOG_LEVELS,
    CACHE_DIR,
    UPLOAD_DIR,
    WHISPER_MODEL,
    WHISPER_MODEL_DIR,
40
    WHISPER_MODEL_AUTO_UPDATE,
Jannik Streidl's avatar
Jannik Streidl committed
41
    DEVICE_TYPE,
Timothy J. Baek's avatar
Timothy J. Baek committed
42
43
    OPENAI_API_BASE_URL,
    OPENAI_API_KEY,
Timothy J. Baek's avatar
Timothy J. Baek committed
44
)
45
46
47

log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["AUDIO"])
Timothy J. Baek's avatar
Timothy J. Baek committed
48
49
50
51
52
53
54
55
56
57

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

Timothy J. Baek's avatar
Timothy J. Baek committed
58
59
60
61

app.state.OPENAI_API_BASE_URL = OPENAI_API_BASE_URL
app.state.OPENAI_API_KEY = OPENAI_API_KEY

Jannik Streidl's avatar
Jannik Streidl committed
62
63
64
65
# setting device type for whisper model
whisper_device_type = DEVICE_TYPE if DEVICE_TYPE and DEVICE_TYPE == "cuda" else "cpu"
log.info(f"whisper_device_type: {whisper_device_type}")

Timothy J. Baek's avatar
Timothy J. Baek committed
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
SPEECH_CACHE_DIR = Path(CACHE_DIR).joinpath("./audio/speech/")
SPEECH_CACHE_DIR.mkdir(parents=True, exist_ok=True)


@app.post("/speech")
async def speech(request: Request, user=Depends(get_verified_user)):
    idx = None
    try:
        body = await request.body()
        name = hashlib.sha256(body).hexdigest()

        file_path = SPEECH_CACHE_DIR.joinpath(f"{name}.mp3")
        file_body_path = SPEECH_CACHE_DIR.joinpath(f"{name}.json")

        # Check if the file already exists in the cache
        if file_path.is_file():
            return FileResponse(file_path)

        headers = {}
        headers["Authorization"] = f"Bearer {app.state.OPENAI_API_KEY}"
        headers["Content-Type"] = "application/json"

        r = None
        try:
            r = requests.post(
                url=f"{app.state.OPENAI_API_BASE_URL}/audio/speech",
                data=body,
                headers=headers,
                stream=True,
            )

            r.raise_for_status()

            # Save the streaming content to a file
            with open(file_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)

            with open(file_body_path, "w") as f:
                json.dump(json.loads(body.decode("utf-8")), f)

            # Return the saved file
            return FileResponse(file_path)

        except Exception as e:
            log.exception(e)
            error_detail = "Open WebUI: Server Connection Error"
            if r is not None:
                try:
                    res = r.json()
                    if "error" in res:
                        error_detail = f"External: {res['error']}"
                except:
                    error_detail = f"External: {e}"

            raise HTTPException(
                status_code=r.status_code if r else 500, detail=error_detail
            )

    except ValueError:
        raise HTTPException(status_code=401, detail=ERROR_MESSAGES.OPENAI_NOT_FOUND)

Timothy J. Baek's avatar
Timothy J. Baek committed
128

Timothy J. Baek's avatar
Timothy J. Baek committed
129
@app.post("/transcriptions")
Timothy J. Baek's avatar
Timothy J. Baek committed
130
131
132
133
def transcribe(
    file: UploadFile = File(...),
    user=Depends(get_current_user),
):
134
    log.info(f"file.content_type: {file.content_type}")
Timothy J. Baek's avatar
Timothy J. Baek committed
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149

    if file.content_type not in ["audio/mpeg", "audio/wav"]:
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.FILE_NOT_SUPPORTED,
        )

    try:
        filename = file.filename
        file_path = f"{UPLOAD_DIR}/{filename}"
        contents = file.file.read()
        with open(file_path, "wb") as f:
            f.write(contents)
            f.close()

150
151
152
153
154
155
156
157
158
159
        whisper_kwargs = {
            "model_size_or_path": WHISPER_MODEL,
            "device": whisper_device_type,
            "compute_type": "int8",
            "download_root": WHISPER_MODEL_DIR,
            "local_files_only": not WHISPER_MODEL_AUTO_UPDATE,
        }

        log.debug(f"whisper_kwargs: {whisper_kwargs}")

Self Denial's avatar
Self Denial committed
160
        try:
161
162
            model = WhisperModel(**whisper_kwargs)
        except:
163
            log.warning(
Self Denial's avatar
Self Denial committed
164
165
                "WhisperModel initialization failed, attempting download with local_files_only=False"
            )
166
167
            whisper_kwargs["local_files_only"] = False
            model = WhisperModel(**whisper_kwargs)
Timothy J. Baek's avatar
Timothy J. Baek committed
168
169

        segments, info = model.transcribe(file_path, beam_size=5)
170
        log.info(
Timothy J. Baek's avatar
Timothy J. Baek committed
171
172
173
174
175
176
            "Detected language '%s' with probability %f"
            % (info.language, info.language_probability)
        )

        transcript = "".join([segment.text for segment in list(segments)])

177
        return {"text": transcript.strip()}
Timothy J. Baek's avatar
Timothy J. Baek committed
178
179

    except Exception as e:
180
        log.exception(e)
Timothy J. Baek's avatar
Timothy J. Baek committed
181
182
183
184
185

        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )