Commit a48ac6a2 authored by Jun Siang Cheah's avatar Jun Siang Cheah
Browse files

refac: lazily load sentence_transformers to reduce start up memory usage

parent 17c68436
......@@ -48,8 +48,6 @@ import mimetypes
import uuid
import json
import sentence_transformers
from apps.webui.models.documents import (
Documents,
DocumentForm,
......@@ -190,6 +188,8 @@ def update_embedding_model(
update_model: bool = False,
):
if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
import sentence_transformers
app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer(
get_model_path(embedding_model, update_model),
device=DEVICE_TYPE,
......@@ -204,6 +204,8 @@ def update_reranking_model(
update_model: bool = False,
):
if reranking_model:
import sentence_transformers
app.state.sentence_transformer_rf = sentence_transformers.CrossEncoder(
get_model_path(reranking_model, update_model),
device=DEVICE_TYPE,
......
......@@ -442,8 +442,6 @@ from langchain_core.documents import BaseDocumentCompressor, Document
from langchain_core.callbacks import Callbacks
from langchain_core.pydantic_v1 import Extra
from sentence_transformers import util
class RerankCompressor(BaseDocumentCompressor):
embedding_function: Any
......@@ -468,6 +466,8 @@ class RerankCompressor(BaseDocumentCompressor):
[(query, doc.page_content) for doc in documents]
)
else:
from sentence_transformers import util
query_embedding = self.embedding_function(query)
document_embedding = self.embedding_function(
[doc.page_content for doc in documents]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment