main.py 32.6 KB
Newer Older
Timothy J. Baek's avatar
Timothy J. Baek committed
1
2
3
4
5
6
7
8
9
from fastapi import (
    FastAPI,
    Depends,
    HTTPException,
    status,
    UploadFile,
    File,
    Form,
)
Timothy J. Baek's avatar
Timothy J. Baek committed
10
from fastapi.middleware.cors import CORSMiddleware
11
import os, shutil, logging, re
12
13

from pathlib import Path
14
from typing import List, Union, Sequence
Timothy J. Baek's avatar
Timothy J. Baek committed
15

16
from chromadb.utils.batch_utils import create_batches
Timothy J. Baek's avatar
Timothy J. Baek committed
17

Timothy J. Baek's avatar
Timothy J. Baek committed
18
19
20
21
22
from langchain_community.document_loaders import (
    WebBaseLoader,
    TextLoader,
    PyPDFLoader,
    CSVLoader,
23
    BSHTMLLoader,
Timothy J. Baek's avatar
Timothy J. Baek committed
24
    Docx2txtLoader,
Dave Bauman's avatar
Dave Bauman committed
25
    UnstructuredEPubLoader,
Timothy J. Baek's avatar
Timothy J. Baek committed
26
27
    UnstructuredWordDocumentLoader,
    UnstructuredMarkdownLoader,
28
    UnstructuredXMLLoader,
Marclass's avatar
Marclass committed
29
    UnstructuredRSTLoader,
Marclass's avatar
Marclass committed
30
    UnstructuredExcelLoader,
Timothy J. Baek's avatar
Timothy J. Baek committed
31
    UnstructuredPowerPointLoader,
Timothy J. Baek's avatar
Timothy J. Baek committed
32
    YoutubeLoader,
Timothy J. Baek's avatar
Timothy J. Baek committed
33
)
34
35
from langchain.text_splitter import RecursiveCharacterTextSplitter

36
37
38
39
40
import validators
import urllib.parse
import socket


41
42
from pydantic import BaseModel
from typing import Optional
43
import mimetypes
44
import uuid
45
46
import json

47
import sentence_transformers
48

Timothy J. Baek's avatar
fix  
Timothy J. Baek committed
49
from apps.webui.models.documents import (
50
51
52
53
    Documents,
    DocumentForm,
    DocumentResponse,
)
Jannik Streidl's avatar
Jannik Streidl committed
54

55
from apps.rag.utils import (
56
    get_model_path,
Timothy J. Baek's avatar
Timothy J. Baek committed
57
58
59
60
61
    get_embedding_function,
    query_doc,
    query_doc_with_hybrid_search,
    query_collection,
    query_collection_with_hybrid_search,
62
    search_web,
63
)
Timothy J. Baek's avatar
Timothy J. Baek committed
64

65
66
67
68
69
70
from utils.misc import (
    calculate_sha256,
    calculate_sha256_string,
    sanitize_filename,
    extract_folders_after_data_docs,
)
71
from utils.utils import get_current_user, get_admin_user
72

73
from config import (
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
74
    ENV,
75
    SRC_LOG_LEVELS,
76
77
    UPLOAD_DIR,
    DOCS_DIR,
78
79
    RAG_TOP_K,
    RAG_RELEVANCE_THRESHOLD,
80
    RAG_EMBEDDING_ENGINE,
81
    RAG_EMBEDDING_MODEL,
82
    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
83
    RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
Timothy J. Baek's avatar
Timothy J. Baek committed
84
    ENABLE_RAG_HYBRID_SEARCH,
85
    ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
Steven Kreitzer's avatar
Steven Kreitzer committed
86
    RAG_RERANKING_MODEL,
87
    PDF_EXTRACT_IMAGES,
88
    RAG_RERANKING_MODEL_AUTO_UPDATE,
Steven Kreitzer's avatar
Steven Kreitzer committed
89
    RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
Timothy J. Baek's avatar
Timothy J. Baek committed
90
91
    RAG_OPENAI_API_BASE_URL,
    RAG_OPENAI_API_KEY,
92
    DEVICE_TYPE,
93
94
95
    CHROMA_CLIENT,
    CHUNK_SIZE,
    CHUNK_OVERLAP,
Timothy J. Baek's avatar
Timothy J. Baek committed
96
    RAG_TEMPLATE,
97
    ENABLE_RAG_LOCAL_WEB_FETCH,
98
    YOUTUBE_LOADER_LANGUAGE,
Timothy J. Baek's avatar
Timothy J. Baek committed
99
100
101
102
103
104
105
106
    ENABLE_RAG_WEB_SEARCH,
    SEARXNG_QUERY_URL,
    GOOGLE_PSE_API_KEY,
    GOOGLE_PSE_ENGINE_ID,
    SERPSTACK_API_KEY,
    SERPSTACK_HTTPS,
    SERPER_API_KEY,
    RAG_WEB_SEARCH_RESULT_COUNT,
107
    RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
108
    AppConfig,
109
)
110

111
112
from constants import ERROR_MESSAGES

113
114
115
log = logging.getLogger(__name__)
log.setLevel(SRC_LOG_LEVELS["RAG"])

Timothy J. Baek's avatar
Timothy J. Baek committed
116
117
app = FastAPI()

118
app.state.config = AppConfig()
Timothy J. Baek's avatar
Timothy J. Baek committed
119

120
121
122
123
124
app.state.config.TOP_K = RAG_TOP_K
app.state.config.RELEVANCE_THRESHOLD = RAG_RELEVANCE_THRESHOLD

app.state.config.ENABLE_RAG_HYBRID_SEARCH = ENABLE_RAG_HYBRID_SEARCH
app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
125
126
    ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
)
Steven Kreitzer's avatar
Steven Kreitzer committed
127

128
129
app.state.config.CHUNK_SIZE = CHUNK_SIZE
app.state.config.CHUNK_OVERLAP = CHUNK_OVERLAP
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
130

131
132
133
134
app.state.config.RAG_EMBEDDING_ENGINE = RAG_EMBEDDING_ENGINE
app.state.config.RAG_EMBEDDING_MODEL = RAG_EMBEDDING_MODEL
app.state.config.RAG_RERANKING_MODEL = RAG_RERANKING_MODEL
app.state.config.RAG_TEMPLATE = RAG_TEMPLATE
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
135

136

137
138
app.state.config.OPENAI_API_BASE_URL = RAG_OPENAI_API_BASE_URL
app.state.config.OPENAI_API_KEY = RAG_OPENAI_API_KEY
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
139

140
app.state.config.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
141

Steven Kreitzer's avatar
Steven Kreitzer committed
142

143
app.state.config.YOUTUBE_LOADER_LANGUAGE = YOUTUBE_LOADER_LANGUAGE
144
145
146
app.state.YOUTUBE_LOADER_TRANSLATION = None


Timothy J. Baek's avatar
Timothy J. Baek committed
147
148
149
150
151
152
153
154
155
156
157
app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH
app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL
app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY
app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID
app.state.config.SERPSTACK_API_KEY = SERPSTACK_API_KEY
app.state.config.SERPSTACK_HTTPS = SERPSTACK_HTTPS
app.state.config.SERPER_API_KEY = SERPER_API_KEY
app.state.config.RAG_WEB_SEARCH_RESULT_COUNT = RAG_WEB_SEARCH_RESULT_COUNT
app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS = RAG_WEB_SEARCH_CONCURRENT_REQUESTS


158
159
160
161
def update_embedding_model(
    embedding_model: str,
    update_model: bool = False,
):
162
    if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
        app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
            get_model_path(embedding_model, update_model),
            device=DEVICE_TYPE,
            trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,
        )
    else:
        app.state.sentence_transformer_ef = None


def update_reranking_model(
    reranking_model: str,
    update_model: bool = False,
):
    if reranking_model:
        app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder(
            get_model_path(reranking_model, update_model),
            device=DEVICE_TYPE,
            trust_remote_code=RAG_RERANKING_MODEL_TRUST_REMOTE_CODE,
        )
    else:
        app.state.sentence_transformer_rf = None


update_embedding_model(
187
    app.state.config.RAG_EMBEDDING_MODEL,
188
189
190
191
    RAG_EMBEDDING_MODEL_AUTO_UPDATE,
)

update_reranking_model(
192
    app.state.config.RAG_RERANKING_MODEL,
193
194
    RAG_RERANKING_MODEL_AUTO_UPDATE,
)
Timothy J. Baek's avatar
Timothy J. Baek committed
195

Timothy J. Baek's avatar
Timothy J. Baek committed
196
197

app.state.EMBEDDING_FUNCTION = get_embedding_function(
198
199
    app.state.config.RAG_EMBEDDING_ENGINE,
    app.state.config.RAG_EMBEDDING_MODEL,
Timothy J. Baek's avatar
Timothy J. Baek committed
200
    app.state.sentence_transformer_ef,
201
202
    app.state.config.OPENAI_API_KEY,
    app.state.config.OPENAI_API_BASE_URL,
Timothy J. Baek's avatar
Timothy J. Baek committed
203
204
)

Timothy J. Baek's avatar
Timothy J. Baek committed
205
206
origins = ["*"]

207

Timothy J. Baek's avatar
Timothy J. Baek committed
208
209
210
211
212
213
214
215
216
app.add_middleware(
    CORSMiddleware,
    allow_origins=origins,
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


Timothy J. Baek's avatar
Timothy J. Baek committed
217
class CollectionNameForm(BaseModel):
218
219
220
    collection_name: Optional[str] = "test"


Timothy J. Baek's avatar
Timothy J. Baek committed
221
class UrlForm(CollectionNameForm):
Timothy J. Baek's avatar
Timothy J. Baek committed
222
223
    url: str

Timothy J. Baek's avatar
Timothy J. Baek committed
224

225
226
227
228
class SearchForm(CollectionNameForm):
    query: str


Timothy J. Baek's avatar
Timothy J. Baek committed
229
230
@app.get("/")
async def get_status():
Timothy J. Baek's avatar
Timothy J. Baek committed
231
232
    return {
        "status": True,
233
234
235
236
237
238
        "chunk_size": app.state.config.CHUNK_SIZE,
        "chunk_overlap": app.state.config.CHUNK_OVERLAP,
        "template": app.state.config.RAG_TEMPLATE,
        "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
        "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
        "reranking_model": app.state.config.RAG_RERANKING_MODEL,
239
240
241
    }


Timothy J. Baek's avatar
Timothy J. Baek committed
242
243
@app.get("/embedding")
async def get_embedding_config(user=Depends(get_admin_user)):
244
245
    return {
        "status": True,
246
247
        "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
        "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
248
        "openai_config": {
249
250
            "url": app.state.config.OPENAI_API_BASE_URL,
            "key": app.state.config.OPENAI_API_KEY,
251
        },
252
253
254
    }


Steven Kreitzer's avatar
Steven Kreitzer committed
255
256
@app.get("/reranking")
async def get_reraanking_config(user=Depends(get_admin_user)):
257
258
    return {
        "status": True,
259
        "reranking_model": app.state.config.RAG_RERANKING_MODEL,
260
    }
Steven Kreitzer's avatar
Steven Kreitzer committed
261
262


263
264
265
266
267
class OpenAIConfigForm(BaseModel):
    url: str
    key: str


268
class EmbeddingModelUpdateForm(BaseModel):
269
    openai_config: Optional[OpenAIConfigForm] = None
Timothy J. Baek's avatar
Timothy J. Baek committed
270
    embedding_engine: str
271
272
273
    embedding_model: str


Timothy J. Baek's avatar
Timothy J. Baek committed
274
275
@app.post("/embedding/update")
async def update_embedding_config(
276
277
    form_data: EmbeddingModelUpdateForm, user=Depends(get_admin_user)
):
Self Denial's avatar
Self Denial committed
278
    log.info(
279
        f"Updating embedding model: {app.state.config.RAG_EMBEDDING_MODEL} to {form_data.embedding_model}"
280
    )
281
    try:
282
283
        app.state.config.RAG_EMBEDDING_ENGINE = form_data.embedding_engine
        app.state.config.RAG_EMBEDDING_MODEL = form_data.embedding_model
Timothy J. Baek's avatar
Timothy J. Baek committed
284

285
        if app.state.config.RAG_EMBEDDING_ENGINE in ["ollama", "openai"]:
286
            if form_data.openai_config != None:
287
288
                app.state.config.OPENAI_API_BASE_URL = form_data.openai_config.url
                app.state.config.OPENAI_API_KEY = form_data.openai_config.key
289

Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
290
        update_embedding_model(app.state.config.RAG_EMBEDDING_MODEL)
291

Timothy J. Baek's avatar
Timothy J. Baek committed
292
        app.state.EMBEDDING_FUNCTION = get_embedding_function(
293
294
            app.state.config.RAG_EMBEDDING_ENGINE,
            app.state.config.RAG_EMBEDDING_MODEL,
Timothy J. Baek's avatar
Timothy J. Baek committed
295
            app.state.sentence_transformer_ef,
296
297
            app.state.config.OPENAI_API_KEY,
            app.state.config.OPENAI_API_BASE_URL,
Timothy J. Baek's avatar
Timothy J. Baek committed
298
299
        )

300
301
        return {
            "status": True,
302
303
            "embedding_engine": app.state.config.RAG_EMBEDDING_ENGINE,
            "embedding_model": app.state.config.RAG_EMBEDDING_MODEL,
304
            "openai_config": {
305
306
                "url": app.state.config.OPENAI_API_BASE_URL,
                "key": app.state.config.OPENAI_API_KEY,
307
            },
308
309
310
311
312
313
314
        }
    except Exception as e:
        log.exception(f"Problem updating embedding model: {e}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )
Timothy J. Baek's avatar
Timothy J. Baek committed
315
316


Steven Kreitzer's avatar
Steven Kreitzer committed
317
318
class RerankingModelUpdateForm(BaseModel):
    reranking_model: str
319

Steven Kreitzer's avatar
Steven Kreitzer committed
320
321
322
323
324
325

@app.post("/reranking/update")
async def update_reranking_config(
    form_data: RerankingModelUpdateForm, user=Depends(get_admin_user)
):
    log.info(
326
        f"Updating reranking model: {app.state.config.RAG_RERANKING_MODEL} to {form_data.reranking_model}"
Steven Kreitzer's avatar
Steven Kreitzer committed
327
328
    )
    try:
329
        app.state.config.RAG_RERANKING_MODEL = form_data.reranking_model
330

331
        update_reranking_model(app.state.config.RAG_RERANKING_MODEL), True
Steven Kreitzer's avatar
Steven Kreitzer committed
332
333
334

        return {
            "status": True,
335
            "reranking_model": app.state.config.RAG_RERANKING_MODEL,
Steven Kreitzer's avatar
Steven Kreitzer committed
336
337
338
339
340
341
342
343
344
        }
    except Exception as e:
        log.exception(f"Problem updating reranking model: {e}")
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )


Timothy J. Baek's avatar
Timothy J. Baek committed
345
346
@app.get("/config")
async def get_rag_config(user=Depends(get_admin_user)):
Timothy J. Baek's avatar
Timothy J. Baek committed
347
348
    return {
        "status": True,
349
        "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
Timothy J. Baek's avatar
Timothy J. Baek committed
350
        "chunk": {
351
352
            "chunk_size": app.state.config.CHUNK_SIZE,
            "chunk_overlap": app.state.config.CHUNK_OVERLAP,
Timothy J. Baek's avatar
Timothy J. Baek committed
353
        },
354
        "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
355
        "youtube": {
356
            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
357
358
            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
        },
Timothy J. Baek's avatar
Timothy J. Baek committed
359
360
361
362
363
364
365
366
367
368
369
370
371
        "web": {
            "search": {
                "enable": app.state.config.ENABLE_RAG_WEB_SEARCH,
                "searxng_query_url": app.state.config.SEARXNG_QUERY_URL,
                "google_pse_api_key": app.state.config.GOOGLE_PSE_API_KEY,
                "google_pse_engine_id": app.state.config.GOOGLE_PSE_ENGINE_ID,
                "serpstack_api_key": app.state.config.SERPSTACK_API_KEY,
                "serpstack_https": app.state.config.SERPSTACK_HTTPS,
                "serper_api_key": app.state.config.SERPER_API_KEY,
                "result_count": app.state.config.RAG_WEB_SEARCH_RESULT_COUNT,
                "concurrent_requests": app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
            }
        },
Timothy J. Baek's avatar
Timothy J. Baek committed
372
373
374
375
376
377
378
379
    }


class ChunkParamUpdateForm(BaseModel):
    chunk_size: int
    chunk_overlap: int


380
381
382
383
384
class YoutubeLoaderConfig(BaseModel):
    language: List[str]
    translation: Optional[str] = None


Timothy J. Baek's avatar
Timothy J. Baek committed
385
386
387
388
389
390
391
392
393
394
395
396
class WebSearchConfig(BaseModel):
    enable: bool
    searxng_query_url: Optional[str] = None
    google_pse_api_key: Optional[str] = None
    google_pse_engine_id: Optional[str] = None
    serpstack_api_key: Optional[str] = None
    serpstack_https: Optional[bool] = None
    serper_api_key: Optional[str] = None
    result_count: Optional[int] = None
    concurrent_requests: Optional[int] = None


Timothy J. Baek's avatar
Timothy J. Baek committed
397
class ConfigUpdateForm(BaseModel):
398
399
400
    pdf_extract_images: Optional[bool] = None
    chunk: Optional[ChunkParamUpdateForm] = None
    web_loader_ssl_verification: Optional[bool] = None
401
    youtube: Optional[YoutubeLoaderConfig] = None
Timothy J. Baek's avatar
Timothy J. Baek committed
402
403
404
405


@app.post("/config/update")
async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_user)):
406
    app.state.config.PDF_EXTRACT_IMAGES = (
407
        form_data.pdf_extract_images
408
409
        if form_data.pdf_extract_images is not None
        else app.state.config.PDF_EXTRACT_IMAGES
410
411
    )

412
413
414
415
    app.state.config.CHUNK_SIZE = (
        form_data.chunk.chunk_size
        if form_data.chunk is not None
        else app.state.config.CHUNK_SIZE
416
417
    )

418
    app.state.config.CHUNK_OVERLAP = (
419
        form_data.chunk.chunk_overlap
420
421
        if form_data.chunk is not None
        else app.state.config.CHUNK_OVERLAP
422
423
    )

424
    app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION = (
425
426
        form_data.web_loader_ssl_verification
        if form_data.web_loader_ssl_verification != None
427
        else app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
428
    )
Timothy J. Baek's avatar
Timothy J. Baek committed
429

430
    app.state.config.YOUTUBE_LOADER_LANGUAGE = (
431
        form_data.youtube.language
432
433
        if form_data.youtube is not None
        else app.state.config.YOUTUBE_LOADER_LANGUAGE
434
435
436
437
    )

    app.state.YOUTUBE_LOADER_TRANSLATION = (
        form_data.youtube.translation
438
        if form_data.youtube is not None
439
440
441
        else app.state.YOUTUBE_LOADER_TRANSLATION
    )

Timothy J. Baek's avatar
Timothy J. Baek committed
442
443
    return {
        "status": True,
444
        "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
Timothy J. Baek's avatar
Timothy J. Baek committed
445
        "chunk": {
446
447
            "chunk_size": app.state.config.CHUNK_SIZE,
            "chunk_overlap": app.state.config.CHUNK_OVERLAP,
Timothy J. Baek's avatar
Timothy J. Baek committed
448
        },
449
        "web_loader_ssl_verification": app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
450
        "youtube": {
451
            "language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
452
453
            "translation": app.state.YOUTUBE_LOADER_TRANSLATION,
        },
Timothy J. Baek's avatar
Timothy J. Baek committed
454
    }
455
456


Timothy J. Baek's avatar
Timothy J. Baek committed
457
458
459
460
@app.get("/template")
async def get_rag_template(user=Depends(get_current_user)):
    return {
        "status": True,
461
        "template": app.state.config.RAG_TEMPLATE,
Timothy J. Baek's avatar
Timothy J. Baek committed
462
463
464
    }


465
466
467
468
@app.get("/query/settings")
async def get_query_settings(user=Depends(get_admin_user)):
    return {
        "status": True,
469
470
471
472
        "template": app.state.config.RAG_TEMPLATE,
        "k": app.state.config.TOP_K,
        "r": app.state.config.RELEVANCE_THRESHOLD,
        "hybrid": app.state.config.ENABLE_RAG_HYBRID_SEARCH,
473
    }
Timothy J. Baek's avatar
Timothy J. Baek committed
474
475


476
477
class QuerySettingsForm(BaseModel):
    k: Optional[int] = None
478
    r: Optional[float] = None
479
    template: Optional[str] = None
Steven Kreitzer's avatar
Steven Kreitzer committed
480
    hybrid: Optional[bool] = None
481
482
483
484
485
486


@app.post("/query/settings/update")
async def update_query_settings(
    form_data: QuerySettingsForm, user=Depends(get_admin_user)
):
487
    app.state.config.RAG_TEMPLATE = (
Timothy J. Baek's avatar
Timothy J. Baek committed
488
        form_data.template if form_data.template else RAG_TEMPLATE
489
    )
490
491
492
    app.state.config.TOP_K = form_data.k if form_data.k else 4
    app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
    app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
Timothy J. Baek's avatar
Timothy J. Baek committed
493
        form_data.hybrid if form_data.hybrid else False
494
    )
Steven Kreitzer's avatar
Steven Kreitzer committed
495
496
    return {
        "status": True,
497
498
499
500
        "template": app.state.config.RAG_TEMPLATE,
        "k": app.state.config.TOP_K,
        "r": app.state.config.RELEVANCE_THRESHOLD,
        "hybrid": app.state.config.ENABLE_RAG_HYBRID_SEARCH,
Steven Kreitzer's avatar
Steven Kreitzer committed
501
    }
502
503


504
class QueryDocForm(BaseModel):
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
505
506
    collection_name: str
    query: str
507
    k: Optional[int] = None
508
    r: Optional[float] = None
Steven Kreitzer's avatar
Steven Kreitzer committed
509
    hybrid: Optional[bool] = None
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
510
511


512
@app.post("/query/doc")
Timothy J. Baek's avatar
Timothy J. Baek committed
513
def query_doc_handler(
514
    form_data: QueryDocForm,
Timothy J. Baek's avatar
Timothy J. Baek committed
515
516
    user=Depends(get_current_user),
):
517
    try:
518
        if app.state.config.ENABLE_RAG_HYBRID_SEARCH:
Timothy J. Baek's avatar
Timothy J. Baek committed
519
520
521
            return query_doc_with_hybrid_search(
                collection_name=form_data.collection_name,
                query=form_data.query,
Steven Kreitzer's avatar
Steven Kreitzer committed
522
                embedding_function=app.state.EMBEDDING_FUNCTION,
523
                k=form_data.k if form_data.k else app.state.config.TOP_K,
Steven Kreitzer's avatar
Steven Kreitzer committed
524
                reranking_function=app.state.sentence_transformer_rf,
525
                r=(
526
                    form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD
527
                ),
Timothy J. Baek's avatar
Timothy J. Baek committed
528
529
530
531
532
            )
        else:
            return query_doc(
                collection_name=form_data.collection_name,
                query=form_data.query,
Steven Kreitzer's avatar
Steven Kreitzer committed
533
                embedding_function=app.state.EMBEDDING_FUNCTION,
534
                k=form_data.k if form_data.k else app.state.config.TOP_K,
Timothy J. Baek's avatar
Timothy J. Baek committed
535
            )
536
    except Exception as e:
537
        log.exception(e)
538
539
540
541
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )
542
543


Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
544
545
546
class QueryCollectionsForm(BaseModel):
    collection_names: List[str]
    query: str
547
    k: Optional[int] = None
548
    r: Optional[float] = None
Steven Kreitzer's avatar
Steven Kreitzer committed
549
    hybrid: Optional[bool] = None
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
550
551


552
@app.post("/query/collection")
Timothy J. Baek's avatar
Timothy J. Baek committed
553
def query_collection_handler(
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
554
555
556
    form_data: QueryCollectionsForm,
    user=Depends(get_current_user),
):
557
    try:
558
        if app.state.config.ENABLE_RAG_HYBRID_SEARCH:
Timothy J. Baek's avatar
Timothy J. Baek committed
559
560
561
            return query_collection_with_hybrid_search(
                collection_names=form_data.collection_names,
                query=form_data.query,
Steven Kreitzer's avatar
Steven Kreitzer committed
562
                embedding_function=app.state.EMBEDDING_FUNCTION,
563
                k=form_data.k if form_data.k else app.state.config.TOP_K,
Steven Kreitzer's avatar
Steven Kreitzer committed
564
                reranking_function=app.state.sentence_transformer_rf,
565
                r=(
566
                    form_data.r if form_data.r else app.state.config.RELEVANCE_THRESHOLD
567
                ),
Timothy J. Baek's avatar
Timothy J. Baek committed
568
569
570
571
572
            )
        else:
            return query_collection(
                collection_names=form_data.collection_names,
                query=form_data.query,
Steven Kreitzer's avatar
Steven Kreitzer committed
573
                embedding_function=app.state.EMBEDDING_FUNCTION,
574
                k=form_data.k if form_data.k else app.state.config.TOP_K,
Timothy J. Baek's avatar
Timothy J. Baek committed
575
            )
576

577
578
579
580
581
582
    except Exception as e:
        log.exception(e)
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
583
584


Timothy J. Baek's avatar
Timothy J. Baek committed
585
586
587
@app.post("/youtube")
def store_youtube_video(form_data: UrlForm, user=Depends(get_current_user)):
    try:
588
589
590
        loader = YoutubeLoader.from_youtube_url(
            form_data.url,
            add_video_info=True,
591
            language=app.state.config.YOUTUBE_LOADER_LANGUAGE,
592
593
            translation=app.state.YOUTUBE_LOADER_TRANSLATION,
        )
Timothy J. Baek's avatar
Timothy J. Baek committed
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
        data = loader.load()

        collection_name = form_data.collection_name
        if collection_name == "":
            collection_name = calculate_sha256_string(form_data.url)[:63]

        store_data_in_vector_db(data, collection_name, overwrite=True)
        return {
            "status": True,
            "collection_name": collection_name,
            "filename": form_data.url,
        }
    except Exception as e:
        log.exception(e)
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )


614
@app.post("/web")
Timothy J. Baek's avatar
Timothy J. Baek committed
615
def store_web(form_data: UrlForm, user=Depends(get_current_user)):
616
617
    # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
    try:
618
        loader = get_web_loader(
619
            form_data.url,
620
            verify_ssl=app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
621
        )
622
        data = loader.load()
Timothy J. Baek's avatar
Timothy J. Baek committed
623
624
625
626
627

        collection_name = form_data.collection_name
        if collection_name == "":
            collection_name = calculate_sha256_string(form_data.url)[:63]

628
        store_data_in_vector_db(data, collection_name, overwrite=True)
Timothy J. Baek's avatar
Timothy J. Baek committed
629
630
        return {
            "status": True,
Timothy J. Baek's avatar
Timothy J. Baek committed
631
            "collection_name": collection_name,
Timothy J. Baek's avatar
Timothy J. Baek committed
632
633
            "filename": form_data.url,
        }
634
    except Exception as e:
635
        log.exception(e)
636
637
638
639
640
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )

641

642
def get_web_loader(url: Union[str, Sequence[str]], verify_ssl: bool = True):
643
    # Check if the URL is valid
644
    if not validate_url(url):
645
        raise ValueError(ERROR_MESSAGES.INVALID_URL)
646
647
648
649
    return WebBaseLoader(
        url,
        verify_ssl=verify_ssl,
        requests_per_second=RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
650
        continue_on_failure=True,
651
    )
652
653


654
655
656
657
def validate_url(url: Union[str, Sequence[str]]):
    if isinstance(url, str):
        if isinstance(validators.url(url), validators.ValidationError):
            raise ValueError(ERROR_MESSAGES.INVALID_URL)
658
        if not ENABLE_RAG_LOCAL_WEB_FETCH:
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
            # Local web fetch is disabled, filter out any URLs that resolve to private IP addresses
            parsed_url = urllib.parse.urlparse(url)
            # Get IPv4 and IPv6 addresses
            ipv4_addresses, ipv6_addresses = resolve_hostname(parsed_url.hostname)
            # Check if any of the resolved addresses are private
            # This is technically still vulnerable to DNS rebinding attacks, as we don't control WebBaseLoader
            for ip in ipv4_addresses:
                if validators.ipv4(ip, private=True):
                    raise ValueError(ERROR_MESSAGES.INVALID_URL)
            for ip in ipv6_addresses:
                if validators.ipv6(ip, private=True):
                    raise ValueError(ERROR_MESSAGES.INVALID_URL)
        return True
    elif isinstance(url, Sequence):
        return all(validate_url(u) for u in url)
    else:
        return False


678
679
680
681
682
683
684
685
686
687
688
def resolve_hostname(hostname):
    # Get address information
    addr_info = socket.getaddrinfo(hostname, None)

    # Extract IP addresses from address information
    ipv4_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET]
    ipv6_addresses = [info[4][0] for info in addr_info if info[0] == socket.AF_INET6]

    return ipv4_addresses, ipv6_addresses


Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
689
690
@app.post("/web/search")
def store_web_search(form_data: SearchForm, user=Depends(get_current_user)):
691
    try:
692
693
694
695
696
697
698
699
        try:
            web_results = search_web(form_data.query)
        except Exception as e:
            log.exception(e)
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=ERROR_MESSAGES.WEB_SEARCH_ERROR,
            )
700
701
        urls = [result.link for result in web_results]
        loader = get_web_loader(urls)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
702
        data = loader.load()
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721

        collection_name = form_data.collection_name
        if collection_name == "":
            collection_name = calculate_sha256_string(form_data.query)[:63]

        store_data_in_vector_db(data, collection_name, overwrite=True)
        return {
            "status": True,
            "collection_name": collection_name,
            "filenames": urls,
        }
    except Exception as e:
        log.exception(e)
        raise HTTPException(
            status_code=status.HTTP_400_BAD_REQUEST,
            detail=ERROR_MESSAGES.DEFAULT(e),
        )


722
def store_data_in_vector_db(data, collection_name, overwrite: bool = False) -> bool:
Timothy J. Baek's avatar
Timothy J. Baek committed
723

724
    text_splitter = RecursiveCharacterTextSplitter(
725
726
        chunk_size=app.state.config.CHUNK_SIZE,
        chunk_overlap=app.state.config.CHUNK_OVERLAP,
727
728
        add_start_index=True,
    )
729

730
    docs = text_splitter.split_documents(data)
Timothy J. Baek's avatar
Timothy J. Baek committed
731
732

    if len(docs) > 0:
733
        log.info(f"store_data_in_vector_db {docs}")
Timothy J. Baek's avatar
Timothy J. Baek committed
734
735
736
        return store_docs_in_vector_db(docs, collection_name, overwrite), None
    else:
        raise ValueError(ERROR_MESSAGES.EMPTY_CONTENT)
737
738
739


def store_text_in_vector_db(
Timothy J. Baek's avatar
Timothy J. Baek committed
740
    text, metadata, collection_name, overwrite: bool = False
741
742
) -> bool:
    text_splitter = RecursiveCharacterTextSplitter(
743
744
        chunk_size=app.state.config.CHUNK_SIZE,
        chunk_overlap=app.state.config.CHUNK_OVERLAP,
745
746
        add_start_index=True,
    )
Timothy J. Baek's avatar
Timothy J. Baek committed
747
    docs = text_splitter.create_documents([text], metadatas=[metadata])
748
749
750
    return store_docs_in_vector_db(docs, collection_name, overwrite)


Timothy J. Baek's avatar
Timothy J. Baek committed
751
def store_docs_in_vector_db(docs, collection_name, overwrite: bool = False) -> bool:
752
    log.info(f"store_docs_in_vector_db {docs} {collection_name}")
Timothy J. Baek's avatar
Timothy J. Baek committed
753

754
755
756
757
758
759
760
    texts = [doc.page_content for doc in docs]
    metadatas = [doc.metadata for doc in docs]

    try:
        if overwrite:
            for collection in CHROMA_CLIENT.list_collections():
                if collection_name == collection.name:
761
                    log.info(f"deleting existing collection {collection_name}")
762
763
                    CHROMA_CLIENT.delete_collection(name=collection_name)

764
        collection = CHROMA_CLIENT.create_collection(name=collection_name)
765

Timothy J. Baek's avatar
Timothy J. Baek committed
766
        embedding_func = get_embedding_function(
767
768
            app.state.config.RAG_EMBEDDING_ENGINE,
            app.state.config.RAG_EMBEDDING_MODEL,
Steven Kreitzer's avatar
Steven Kreitzer committed
769
            app.state.sentence_transformer_ef,
770
771
            app.state.config.OPENAI_API_KEY,
            app.state.config.OPENAI_API_BASE_URL,
Steven Kreitzer's avatar
Steven Kreitzer committed
772
773
774
        )

        embedding_texts = list(map(lambda x: x.replace("\n", " "), texts))
775
        embeddings = embedding_func(embedding_texts)
776
777
778

        for batch in create_batches(
            api=CHROMA_CLIENT,
779
            ids=[str(uuid.uuid4()) for _ in texts],
780
781
782
783
784
            metadatas=metadatas,
            embeddings=embeddings,
            documents=texts,
        ):
            collection.add(*batch)
785

786
        return True
787
    except Exception as e:
788
        log.exception(e)
789
790
791
792
793
794
        if e.__class__.__name__ == "UniqueConstraintError":
            return True

        return False


795
796
def get_loader(filename: str, file_content_type: str, file_path: str):
    file_ext = filename.split(".")[-1].lower()
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
    known_type = True

    known_source_ext = [
        "go",
        "py",
        "java",
        "sh",
        "bat",
        "ps1",
        "cmd",
        "js",
        "ts",
        "css",
        "cpp",
        "hpp",
        "h",
        "c",
        "cs",
        "sql",
        "log",
        "ini",
        "pl",
        "pm",
        "r",
        "dart",
        "dockerfile",
        "env",
        "php",
        "hs",
        "hsc",
        "lua",
        "nginxconf",
        "conf",
        "m",
        "mm",
        "plsql",
        "perl",
        "rb",
        "rs",
        "db2",
        "scala",
        "bash",
        "swift",
        "vue",
        "svelte",
    ]

    if file_ext == "pdf":
845
        loader = PyPDFLoader(
846
            file_path, extract_images=app.state.config.PDF_EXTRACT_IMAGES
847
        )
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
848
849
850
851
852
853
    elif file_ext == "csv":
        loader = CSVLoader(file_path)
    elif file_ext == "rst":
        loader = UnstructuredRSTLoader(file_path, mode="elements")
    elif file_ext == "xml":
        loader = UnstructuredXMLLoader(file_path)
854
    elif file_ext in ["htm", "html"]:
Timothy J. Baek's avatar
Timothy J. Baek committed
855
        loader = BSHTMLLoader(file_path, open_encoding="unicode_escape")
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
856
857
    elif file_ext == "md":
        loader = UnstructuredMarkdownLoader(file_path)
858
    elif file_content_type == "application/epub+zip":
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
859
860
        loader = UnstructuredEPubLoader(file_path)
    elif (
861
        file_content_type
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
862
863
864
865
        == "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        or file_ext in ["doc", "docx"]
    ):
        loader = Docx2txtLoader(file_path)
866
    elif file_content_type in [
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
867
868
869
870
        "application/vnd.ms-excel",
        "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
    ] or file_ext in ["xls", "xlsx"]:
        loader = UnstructuredExcelLoader(file_path)
Timothy J. Baek's avatar
Timothy J. Baek committed
871
872
873
874
875
    elif file_content_type in [
        "application/vnd.ms-powerpoint",
        "application/vnd.openxmlformats-officedocument.presentationml.presentation",
    ] or file_ext in ["ppt", "pptx"]:
        loader = UnstructuredPowerPointLoader(file_path)
876
877
878
    elif file_ext in known_source_ext or (
        file_content_type and file_content_type.find("text/") >= 0
    ):
879
        loader = TextLoader(file_path, autodetect_encoding=True)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
880
    else:
881
        loader = TextLoader(file_path, autodetect_encoding=True)
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
882
883
884
885
886
        known_type = False

    return loader, known_type


887
@app.post("/doc")
Timothy J. Baek's avatar
Timothy J. Baek committed
888
def store_doc(
Timothy J. Baek's avatar
Timothy J. Baek committed
889
    collection_name: Optional[str] = Form(None),
Timothy J. Baek's avatar
Timothy J. Baek committed
890
891
892
    file: UploadFile = File(...),
    user=Depends(get_current_user),
):
893
    # "https://www.gutenberg.org/files/1727/1727-h/1727-h.htm"
Timothy J. Baek's avatar
Timothy J. Baek committed
894

895
    log.info(f"file.content_type: {file.content_type}")
896
    try:
897
        unsanitized_filename = file.filename
Timothy J. Baek's avatar
Timothy J. Baek committed
898
        filename = os.path.basename(unsanitized_filename)
899

Timothy J. Baek's avatar
Timothy J. Baek committed
900
        file_path = f"{UPLOAD_DIR}/{filename}"
901

902
        contents = file.file.read()
Timothy J. Baek's avatar
Timothy J. Baek committed
903
        with open(file_path, "wb") as f:
904
905
906
            f.write(contents)
            f.close()

Timothy J. Baek's avatar
Timothy J. Baek committed
907
908
909
910
911
        f = open(file_path, "rb")
        if collection_name == None:
            collection_name = calculate_sha256(f)[:63]
        f.close()

Timothy J. Baek's avatar
Timothy J. Baek committed
912
        loader, known_type = get_loader(filename, file.content_type, file_path)
Timothy J. Baek's avatar
Timothy J. Baek committed
913
        data = loader.load()
Timothy J. Baek's avatar
Timothy J. Baek committed
914
915
916
917
918
919
920
921
922
923
924
925

        try:
            result = store_data_in_vector_db(data, collection_name)

            if result:
                return {
                    "status": True,
                    "collection_name": collection_name,
                    "filename": filename,
                    "known_type": known_type,
                }
        except Exception as e:
Timothy J. Baek's avatar
Timothy J. Baek committed
926
927
            raise HTTPException(
                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
Timothy J. Baek's avatar
Timothy J. Baek committed
928
                detail=e,
Timothy J. Baek's avatar
Timothy J. Baek committed
929
            )
930
    except Exception as e:
931
        log.exception(e)
Dave Bauman's avatar
Dave Bauman committed
932
933
934
935
936
937
938
939
940
941
        if "No pandoc was found" in str(e):
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=ERROR_MESSAGES.PANDOC_NOT_INSTALLED,
            )
        else:
            raise HTTPException(
                status_code=status.HTTP_400_BAD_REQUEST,
                detail=ERROR_MESSAGES.DEFAULT(e),
            )
942
943


944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
class TextRAGForm(BaseModel):
    name: str
    content: str
    collection_name: Optional[str] = None


@app.post("/text")
def store_text(
    form_data: TextRAGForm,
    user=Depends(get_current_user),
):

    collection_name = form_data.collection_name
    if collection_name == None:
        collection_name = calculate_sha256_string(form_data.content)

Timothy J. Baek's avatar
Timothy J. Baek committed
960
961
962
963
964
    result = store_text_in_vector_db(
        form_data.content,
        metadata={"name": form_data.name, "created_by": user.id},
        collection_name=collection_name,
    )
965
966
967
968
969
970
971
972
973
974

    if result:
        return {"status": True, "collection_name": collection_name}
    else:
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=ERROR_MESSAGES.DEFAULT(),
        )


975
976
@app.get("/scan")
def scan_docs_dir(user=Depends(get_admin_user)):
977
978
    for path in Path(DOCS_DIR).rglob("./**/*"):
        try:
979
980
981
982
983
984
985
986
987
            if path.is_file() and not path.name.startswith("."):
                tags = extract_folders_after_data_docs(path)
                filename = path.name
                file_content_type = mimetypes.guess_type(path)

                f = open(path, "rb")
                collection_name = calculate_sha256(f)[:63]
                f.close()

Timothy J. Baek's avatar
Timothy J. Baek committed
988
989
990
                loader, known_type = get_loader(
                    filename, file_content_type[0], str(path)
                )
991
992
                data = loader.load()

Timothy J. Baek's avatar
Timothy J. Baek committed
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
                try:
                    result = store_data_in_vector_db(data, collection_name)

                    if result:
                        sanitized_filename = sanitize_filename(filename)
                        doc = Documents.get_doc_by_name(sanitized_filename)

                        if doc == None:
                            doc = Documents.insert_new_doc(
                                user.id,
                                DocumentForm(
                                    **{
                                        "name": sanitized_filename,
                                        "title": filename,
                                        "collection_name": collection_name,
                                        "filename": filename,
                                        "content": (
                                            json.dumps(
                                                {
                                                    "tags": list(
                                                        map(
                                                            lambda name: {"name": name},
                                                            tags,
                                                        )
1017
                                                    )
Timothy J. Baek's avatar
Timothy J. Baek committed
1018
1019
1020
1021
1022
1023
1024
1025
1026
                                                }
                                            )
                                            if len(tags)
                                            else "{}"
                                        ),
                                    }
                                ),
                            )
                except Exception as e:
1027
                    log.exception(e)
Timothy J. Baek's avatar
Timothy J. Baek committed
1028
                    pass
1029

1030
        except Exception as e:
1031
            log.exception(e)
1032
1033
1034
1035

    return True


Timothy J. Baek's avatar
Timothy J. Baek committed
1036
@app.get("/reset/db")
1037
1038
def reset_vector_db(user=Depends(get_admin_user)):
    CHROMA_CLIENT.reset()
Timothy J. Baek's avatar
Timothy J. Baek committed
1039
1040
1041


@app.get("/reset")
1042
1043
1044
1045
def reset(user=Depends(get_admin_user)) -> bool:
    folder = f"{UPLOAD_DIR}"
    for filename in os.listdir(folder):
        file_path = os.path.join(folder, filename)
Timothy J. Baek's avatar
Timothy J. Baek committed
1046
        try:
1047
1048
1049
1050
            if os.path.isfile(file_path) or os.path.islink(file_path):
                os.unlink(file_path)
            elif os.path.isdir(file_path):
                shutil.rmtree(file_path)
Timothy J. Baek's avatar
Timothy J. Baek committed
1051
        except Exception as e:
1052
            log.error("Failed to delete %s. Reason: %s" % (file_path, e))
Timothy J. Baek's avatar
Timothy J. Baek committed
1053

1054
1055
1056
    try:
        CHROMA_CLIENT.reset()
    except Exception as e:
1057
        log.exception(e)
1058
1059

    return True
Timothy J. Baek's avatar
refac  
Timothy J. Baek committed
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070


if ENV == "dev":

    @app.get("/ef")
    async def get_embeddings():
        return {"result": app.state.EMBEDDING_FUNCTION("hello world")}

    @app.get("/ef/{text}")
    async def get_embeddings_text(text: str):
        return {"result": app.state.EMBEDDING_FUNCTION(text)}